[email protected]: ~$ cat warning.sh
#!/bin/bash
#监控系统负载与CPU, memory, hard disk, number of logged on users, out of alert to send an email alarm. Prerequisites for installing the Mail Service
[email protected]
#提取本服务器的IP地址信息
ip= ' ifconfig eth0 | grep "inet addr" | cut-f 2-d ":" | Cut-f 1-d "" '
# 1, monitoring system load changes, out-of-the-time email alerts:
#抓取cpu的总核数
cpu_num= ' grep-c ' model name '/proc/cpuinfo '
#抓取当前系统15分钟的平均负载值
load_15= ' uptime | awk-f "," ' {print $} ' | cut-f1-d "," '
#计算当前系统单个核心15分钟的平均负载值, the result is less than 1.0 when the front single digit is 0.
average_load= ' echo ' scale=2;a= $load _15/$cpu _num;if (Length (a) ==scale (a)) print 0;print a "| BC '
#取上面平均负载值的个位整数
average_int= ' echo $average _load | cut-f 1-d "." `
#设置系统单个核心15分钟的平均负载的告警值为0.70 (i.e. alarm when using more than 70%).
load_warn=0.70
#当单个核心15分钟的平均负载值大于等于1.0 (that is, single-digit integer greater than 0), direct email alarm, if less than 1.0 two times comparison
if [$average _int-gt 0];then
echo "$IP server 15-minute system average load of $average_load, exceeding the alert value of 1.0, please immediately handle!!! " | Mail-s "$IP Server system load critical alarm!! "$MAil
Else
#当前系统15分钟平均负载值与告警值进行比较 (1 is returned when the alarm value is greater than 0.70, and 0 is returned if it is less than)
# load_now= ' expr $average _load \> $load _warn '
load_now= ' echo ' $average _load> $load _warn "|BC"
#如果系统单个核心15分钟的平均负载值大于告警值0.70 (return value is 1), send an email to the Administrator
if [$load _now-eq 0];then
echo "$IP server 15 minutes of system average load reached $average _load, exceeding the alert value of 0.70, please timely processing. "| Mail-s "$IP Server system load Alarm" $Mail
# $Mail
fi
fi
Echo 1end
# 2, monitor the situation of the system CPU, when using more than 80% when the alarm email:
#取当前空闲cpu百份比值 (take only integer parts)
cpu_idle= ' top-b-N 1 | grep CPU | awk ' {print $} ' | cut-f 1-d "." `
echo Free CPU: $CPU _idle%
#设置空闲cpu的告警值为20%, send an email alert immediately if the current CPU is using more than 80% (that is, less than 20% remaining)
if [$cpu _idle-ge]; Then
echo "$IP Server CPU remaining $cpu_idle%, the usage rate has exceeded 80%, please handle in time. " | Mail-s "$IP Server CPU Alarm" $Mail
fi
Echo 2end
# 3, the monitoring system swap partition swap situation, when using more than 80% when the alarm email:
#系统分配的交换分区总量
swap_total= ' free-m | grep swap | awk ' {print $} '
#当前剩余的交换分区free大小
swap_free= ' free-m | grep swap | awk ' {print $4} '
#当前已使用的交换分区used大小
swap_used= ' free-m | grep swap | awk ' {print $} '
#echo $swap _free
if [$swap _used-eq 0]; then #-ne 0
#如果交换分区已被使用, calculates the percentage of the total of the current remaining swap partition free, in decimal notation, to complement an integer digit before the decimal point 0
swap_per=0 ' echo ' scale=2, $swap _free/$swap _total "| BC "
# echo Swap_per$swap_per
#设置交换分区的告警值为20% (i.e. alarm when using more than 80%).
swap_warn=0.20
#当前剩余交换分区百分比与告警值进行比较 (1 is returned when it is greater than the alarm value (i.e. more than 20% remaining), and 0 is returned when it is less than (that is, less than 20%)
swap_now= ' echo ' $swap _per> $swap _warn "|BC"
# echo "Swap_now:${swap_now}"
#如果当前交换分区使用超过80% (i.e. the remainder is less than 20%, the above return value equals 0), send an email alert immediately
if [$swap _now-ne 0];then # if ($swap _now! = 0); Then
Echo swapfree:${swap_free}m
echo "$IP server swap partition only left $swap _free M unused, the remaining less than 20%, the utilization rate has exceeded 80%, please timely processing. " | Mail-s "$IP server memory Alarm" $Mail
fi
fi
Echo 3end
# 4, monitor the system hard disk root partition usage situation, when use more than 80% when sends the warning message:
Percentage of #取当前根分区 (/DEV/SDA1) Used (only integer part)
disk_sda1= ' df-h | grep/dev/sda1 | awk ' {print $} ' | cut-f 1-d "%" '
#设置空闲硬盘容量的告警值为80%, send an email alert immediately if the current drive is using more than 80%
if (($disk _sda1 > 1); then #设置 >80
echo "$IP and partition usage has reached ${disk_sda1}% by more than 1"
echo "$IP server/root partition utilization has exceeded 1%, to reach $disk _sda1 please deal with it in time. " | Mail-s "$IP Server hard Drive Warning" $Mail
fi
Echo 4end
#5, monitor the user log in the situation, when the number of users more than 3 times to send alarm messages:
#取当前用户登录数 (only numeric part)
users= ' uptime | awk-f ', ' {print $ {} ' |awk ' {print '} '
#设置登录用户数的告警值为3个, send an email alert immediately if the current number of users exceeds 3
if [$users-ge 1]; then #-ge 3
echo "$IP number of users reached $users please handle"
echo "The number of $IP server users has reached $users, please deal with it in a timely manner. " | Mail-s "Number of $IP Server users alert" $Mail
fi
Echo 5end
~~~~~~~~~~~~~~~~~~~~~~~~~~
Second, to join the mission plan: every 10 minutes to detect, there is an alarm immediately send mail (10 minutes to send).
Copy Codecode example:
# CRONTAB-E
*/10 * * * */scripts/sys-warning.sh
# Service Crond Restart
Monitor the system load and CPU, memory, hard disk, log on the number of users, beyond the alert to send email alerts.