Basic Monitoring for Hadoop Data Nodes
Here’s a basic monitoring script to monitor the HDFS cluster disk space, Temp Dir space and number of data nodes up. This was plenty useful before we switched to Cloudera Manager.
#!/usr/bin/env ruby
# Checks Hadoop and alerts if there is a change in data nodes
require 'yaml'
DFS_USED_PERCENT_THRESHOLD = 75
error_flag = 0
report = `hdfs dfsadmin -report`
# Datanodes available: 17 (28 total, 11 dead)
datanodes_report = report[/Data.*/]
persistence_report = {}
old_persistence_report = {}
dfs_used_percent = report[/DFS Used%.*/].split(' ')[-1].to_i
datanodes_total = datanodes_report[/\d+\stotal/].split(' ')[0]
datanodes_dead = datanodes_report[/\d+\sdead/].split(' ')[0]
datanodes_available = datanodes_report[/available:\s\d+/].split(' ')[-1]
persistence_report['datanodes_total'] = datanodes_total
persistence_report['datanodes_dead'] = datanodes_dead
persistence_report['datanodes_available'] = datanodes_available
# Load report from previous run
old_persistence_report = YAML::load_file("/etc/hadoop_report/hadoop_report.yml")
# Persist hash in a file
File.open("/etc/hadoop_report/hadoop_report.yml", "w") do |file|
file.write persistence_report.to_yaml
end
# Node monitoring
puts "New Report"
puts persistence_report.each {|k,v| puts "#{k} = #{v}"}
p;p;
puts "Old Report"
puts old_persistence_report.each {|k,v| puts "#{k} = #{v}"}
# Storage monitoring
puts "Threshold = #{DFS_USED_PERCENT_THRESHOLD}%"
puts "DFS Used% = #{dfs_used_percent}%"
tmp_dir_report = `hdfs dfs -du -s /tmp`
puts "Temp Directory Space = #{tmp_dir_report.split(' ')[0].to_f/(1024)**4} TB"
if persistence_report != old_persistence_report
puts "There was a change in the Number of Data Nodes available."
error_flag = 1
end
if dfs_used_percent > DFS_USED_PERCENT_THRESHOLD
puts "DFS Storage Space Used Threshold Exceeded"
error_flag = 1
end
exit 1 if error_flag == 1
puts "Everything is OK"