Background: Due to online server hard disk failure, resulting in service, data failure;
Objective: To ensure the first time to find out whether the hard disk information is normal;
Scenario: Use Nagios custom scripts to monitor the drive status;
Note: The following script only provides for the use of HPACUCLI MEGACLI software that has been installed;
#!/bin/bash#marc.wang 2014/06/17export path= $PATH:/usr/sbin/:/sbin/:/usr/bin/get_localhost_hostname= ' hostname -i |awk ' {print $1} ' nagios= ' nagios.org ' server_type=$ (/usr/sbin/dmidecode | grep "Vendor" | awk -F\: ' nr==1{print $2} ' |awk ' {print $1} ') #The nagios command run nscaSend_nsca_ssl_message () { /usr/local/ nagios/bin/send_nsca -h ${nagios} -d ";"  -C /USR/LOCAL/NAGIOS/ETC/SEND_NSCA.CFG &NBSP,} #hp command runhp_disk_status_command ( ) {rpm -qa |grep hpacucli >> /dev/null 2>&1echo $?} # dell command rundell_ibm_disk_status_command () {rpm -qa |grep megacli >> /dev/null 2>&1 echo $?} bug_test=$ (ps ax |grep hpacucli |grep -v grep |wc -l) IF [  " ${bug_test} " != " 0 " ]then echo " $Get _ Localhost_hostname;check_raid;2; hpacucli command run not data. " | send_nsca_ssl_message exit 2ficheck_raid_ status_hp () { /usr/sbin/hpacucli ctrl all show config detail |grep physicaldrive -a 4 |sed ' s/ //g ' |grep "Status:" |grep -v "Status:ok" | wc -l}case $SERVER _type inhp|hp| hp| Hewlett-Packard) test_hp_command () {hpacucli ctrl all show config Detail >> /dev/null 2>&1echo $?} hp_rpm=$ (hp_disk_status_command) sleep 3if [ ${hp_rpm} != "0" ] then echo "$Get _localhost_hostname;check_raid;2; $SERVER _type command hpacucli not found " | send_nsca_ssl_ message exit 2elif [[ ${hp_rpm} == "0" ]]; then hp_raid_status_number=$ (CHECK_RAID_STATUS_HP) sleep 3 test_hp=$ (Test_hp_command) if [ "$HP _raid_status_number" == "0" ] && [ "$TEST _hp" == "0" ]; then echo "$Get _localhost_hostname;check_raid;0; Check_raid_status:ok " | send_nsca_ssl_message exit 0 elif [ "${TEST_HP}" != "0" ] then echo "$Get _localhost_hostname;check_raid;2; Check_raid_status: run command hpacucli error " | Send_nsca_ssl_message exit 2 elif [ "$HP _raid_status_number" != "0" ] && [ "$TEST _hp" == "0" ] then echo "$Get _localhost_hostname;check_raid;2; Check_raid_status:critical " | Send_nsca_ssl_message exit 2 fi fi ;; dell| dell| dell| dell|dell| ibm|ibm| ibm| IBm) if [ -f "/opt/megaraid/megacli/megacli64" ]; thenCHECK_RAID_STATUS_IBM_DELL () { / opt/megaraid/megacli/megacli64 -ldpdinfo -a0 | grep -e "(Media Error count:|other error count:) " |awk -F: {sum1 += $2} END {PRINT SUM1} ' }TEST_DELL_COMMAND () {/opt/megaraid/megacli/megacli64 -ldpdinfo -a0 >> /dev/null echo $?} elsecheck_raid_status_ibm_dell () { MegaCli -LdPdInfo -a0 | grep -e "(media error count:|other error count:)" |awk -F: ' {sum1 += $2} end {print sum1} ' }TEST_DELL_COMMAND () {megacli - Ldpdinfo -a0 >> /dev/null echo $?} fiibm_dell_rpm=$ (Dell_ibm_disk_status_command) if [[ ${IBM_DELL_RPM} == "0" ]] then test_other_command=$ (TEST_ Dell_command) dell_ibm_status_number=$ (Check_raid_status_ibm_dell) if [[ -z "$DELL _ibm_status_number" ]] then echo "$Get _localhost_hostname;check_raid;2; check_raid_status:megacli command not found! " | send_nsca_ssl_message exit 2 elif [[ "$DELL _ibm_status_number" -gt "2000" ]] ; then echo "$Get _localhost_hostname;check_raid;2; Check_raid_status:critical " | Send_nsca_ssl_message exit 2 elif [[ "$DELL _ibm_status_number" -lt "2000" ]] && [[ "$TEST _other_command" == "0" ]] then echo "$Get _localhost_hostname;check_raid;0; Check_raid_status:ok " | Send_nsca_ssl_message exit 0 fi fi;; *) echo "$Get _localhost_hostname;check_raid;2; This machine is not ibm dell or hp! " | Send_nsca_ssl_message ;; Esac
This article is from the "Chinaops" blog, make sure to keep this source http://cnops.blog.51cto.com/9374660/1551793
Nagios monitors HDD raid