Our previously used health checks for HBase and HDFs, and remaining HDFs capacity alarms are easy to use
#/bin/bash
Bin= ' DirName
Bin= ' CD $bin;p wd '
State_ok=0
State_warning=1
state_critical=2
State_unknown=3
State_dependent=4
Source/etc/profile
Dfs_remaining_warning=15
Dfs_remaining_critical=5
Abnormal_query= "inconsistent| Corrupt| failed| Exception "
Hadoop_web_interface=
namenode external interface IP for Hadoop
# Hbck and Fsck report
Output=/data/logs/cluster-status
$HBASE _home/bin/hbase hbck >> $output
$HADOOP _home/bin/hadoop fsck/hbase >> $output
# Check Report
Count= ' egrep-c ' $ABNORMAL _query "$output"
If [$count-eq 0]; Then
echo "[OK] Cluster is healthy." >> $output
Else
echo "[Abnormal] Cluster is abnormal!" >> $output
# Get the last matching entry in the file
Last_entry= ' Egrep "$ABNORMAL _query" $output | Tail-1 '
echo "($count) $last _entry"
Exit $STATE _critical
Fi
# Check Regionserver Status
dead_region_servers= ' Curl-s http://${hadoop_web_interface}:60010/master-status | grep "Dead region Servers"-A 500 | grep "Regions in Transition"-B 500 | Egrep-o ' target= "_blank" >.*</a> "| Awk-f ">" ' {print $} ' | Awk-f "<" ' {print '} '
If [-Z $dead _region_servers];then
echo "[OK] all regionservers are healthy."
echo "[OK] all regionservers are healthy." >> $output
Else
echo "[Abnormal] the dead regionserver list:" >> $output
echo $dead _region_servers >> $output
Exit $STATE _critical
Fi
# HDFS Usage
Dfs_remaining= ' curl-s http://${hadoop_web_interface}:50070/dfshealth.jsp |egrep-o "DFS Remaining%.*%" | Egrep-o "[0-9]*\. [0-9]* "'
Dfs_remaining_word= "DFS remaining%: ${dfs_remaining}%"
echo "$dfs _remaining_word" >> $output
# Check HDFS usage
Dfs_remaining= ' echo $dfs _remaining | Awk-f '. ' ' {print '} '
If [$dfs _remaining-lt $DFS _remaining_critical]; Then
echo "Low DFS space. $dfs _remaining_word "
exit_status= $STATE _critical
elif [$dfs _remaining-lt $DFS _remaining_warning]; Then
echo "Low DFS space. $dfs _remaining_word "
exit_status= $STATE _warning
Else
echo "HBase check Ok-dfs and hbase healthy.
$dfs _remaining_word "
exit_status= $STATE _OK
Fi
Exit $exit _status
HBase Monitoring Simple Utility script