Scene:
The two-pack version of CentOS6.3, which installs the Hortonworks's Hadoop release and is highly available in accordance with its official documentation, but without fence device support, causes the standby HA node to not be notified of resource release when the network breaks and power off tests occur.
Therefore, the author simply wrote a script, so that the HA's standby node in the primary node network and power outages can be learned through simple testing and access to resources to achieve the real meaning of ha.
Thinking:
How to determine the connectivity of the network?
How to judge the availability of VIP?
How to judge the fault of oneself?
How to prevent brain crack as much as possible?
Realize:
1, when its own node IP are not normal, so that the node restart
2, when the detection of their own network is normal, and the VIP is not line, and the log indicates that the other HA node failure to obtain resources
3, Ganglia VIP connection repair (not in the main thinking scope of this article)
The code is as follows:
#!/bin/bash std=$$ cron_if= ' ps aux|grep $0|grep-ve ' grep| $STD "|/usr/bin/wc-l" [["$Cron _if"-ge 2]] && exit 2 Sleep vip=${vip:-192.168.1.198} re_prd=${re_prd:-10} safe_time=${safe_time:-60} nmk=${nmk:-8} NUL=/dev/null Date= " /bin/date +%k:%m:%s/%y-%m-%d "[!-f/etc/sysconfig/hdp.conf] && echo" ' $Date ' error:no such config fil. ">&G T $Mlog && exit 1 [!-f/etc/init.d/cman] && echo "' $Date ' War: ' uname-n ' Invalid HA node." >> $Mlo G && exit 2 [!-f/etc/init.d/rgmanager] && echo "' $Date ' War: ' uname-n ' Invalid HA node." >> $Ml OG && exit 2 [-f/etc/sysconfig/hdp.conf] &&. /etc/sysconfig/hdp.conf while:;d o rqe1= '/etc/init.d/rgmanager status 2> $Mlog |grep ' is running. ' ' rqe2= '/etc/init.d/cman status 2> $Mlog |grep "is running." ' rqe3= '/etc/init.d/ricci status 2> $Mlog |grep "is running." ' rqe4= '/etc/init.d/modclusterd status 2> $Mlog |grep "is running." ' [-Z ' $RQE 2"] &&/etc/init.d/cman start &> $NUL [-Z" $RQE 1 "] &&/etc/init.d/rgmanager start &> $ NUL [-Z "$RQE 3"] &&/etc/init.d/ricci start &> $NUL [-Z "$RQE 4"] &&/etc/init.d/modclus Terd start &> $NUL if [[-N "$RQE 1" &&-n "$RQE 2"]];then break else sleep $SAFE _ti ME continue fi done node= (' grep clusternode/etc/cluster/cluster.conf|grep nodeid|awk-f\ ' {print $} ') for I in ${node[@]};d o node_ip= ' grep $i/etc/hosts|awk ' {print} ' judg_var= '/sbin/ip a|grep $NODE _ip ' [-N ' $JU Dg_var "] && n_name= $i [-Z" $JUDG _var "] && r_name= $i done Node_reboot () {w_vip= '/sbin/ip a show $Ne |grep $VIP ' nic_file=/etc/sysconfig/network-scripts/ifcfg-$Ne phy_ip_file= ' grep ipaddr $Nic _file|awk-f\= ' {pri
NT $} ' ip_file_if= '/sbin/ifconfig $Ne |grep $PHY _ip_file ' IF [[-Z ' $W _vip ' &&-z ' $IP _file_if ']];then Kill_pid= (' PS aux|grep-e "Rgmanager|fenced|dlm_controld|gfs_controld|corosync" |grep-v Grep|awk ' {print $} ') for I in ${kill_pid [@]}; Do kill-9 $i &> $NUL done/etc/init.d/rgmanager stop &> $NUL/etc/init . D/cman Stop &> $NUL/sbin/reboot &> $NUL fi} free_resource () {dfgw= '/sbin/route-n|grep $N
E|awk ' {print $} ' |grep-v ' 0.0.0.0 ' nfs_ip= ' grep netfs/etc/cluster/cluster.conf|awk-f\ ' {print $} ' |grep-v ' ^$ ' P_cmd= "/bin/ping-c 3-w 1" if! $P _cmd $VIP &> $NUL; then if $P _cmd $DFGW &> $NUL | | $P _cmd $R _name &> $NUL | | $P _cmd $NFS _ip &> $NUL; then if! $P _cmd $VIP &> $NUL; then down_log= '/usr/bin/tail-1/var/log/cluster/rgmanager.log|grep ' $R _name DOW N "' fence_log= ' tail-1/var/log/cluster/fenced.log|grep" $R _name failed "' If [[n] $DOWN _ LOG "&&-n" $FENCE _log "]];then EchoAbsolutely|/usr/sbin/fence_ack_manual $R _name fi fi Else echo "' $Date ' Gat
eWAY: $DFGW and Ha_node: $R _name and Nfs: $NFS _ip offline!! ">> $Mlog fi Fi} GGA () {re= ' netstat-anup|grep gmond|grep $VIP |/usr/bin/wc-l ' ["$RE"-eq 4] &A mp;& return 0 mgf=/etc/ganglia/gmond.conf re= ' grep-a 2-e "Udp_send_channel|tcp_accept_channel" $MGF |grep $VI P|/usr/bin/wc-l ' If ["$RE"-ne 2];then sed-i "/^udp_accept_channel/a\ \bind = $VIP" $MGF sed-i " /^tcp_accept_channel/a\ \bind = $VIP "$MGF fi gfl= (' Find/etc/ganglia/hdp-name" gmond.slave.conf ") for G in ${gfl[@]};d o if grep "bind = $VIP" $g &> $NUL; then continue fi sed-i "/\
See more highlights of this column: http://www.bianceng.cnhttp://www.bianceng.cn/OS/Linux/