Combining ansible technology to monitor storm cluster

Source: Internet
Author: User
Tags sleep sort zookeeper

1, my hosts configuration

# vim/etc/hosts

123456789101112

192.168.1.100 STORM_ZK1

192.168.1.101 STORM_ZK2

192.168.1.102 STORM_ZK3

192.168.1.103 Storm_nimbus

192.168.1.104 Storm_supervisor1

192.168.1.105 Storm_supervisor2

192.168.1.106 Storm_supervisor3

192.168.1.107 Storm_supervisor4

192.168.1.108 Storm_supervisor5

192.168.1.109 Storm_supervisor6

2, my Storm configuration

# Vim/usr/local/storm/conf/storm.yaml

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 6667686970

Drpc.servers:

-"Storm_supervisor1"

-"Storm_supervisor2"

-"Storm_supervisor3"

Storm.zookeeper.servers:

-"STORM_ZK1"

-"STORM_ZK2"

-"STORM_ZK3"

Storm.local.dir: "/data/storm/workdir"

Nimbus.host: "Storm_nimbus"

nimbus.thrift.port:6627

nimbus.thrift.max_buffer_size:1048576

Nimbus.childopts: "-xmx1024m"

Nimbus.task.timeout.secs:30

Nimbus.supervisor.timeout.secs:60

Nimbus.monitor.freq.secs:10

nimbus.cleanup.inbox.freq.secs:600

nimbus.inbox.jar.expiration.secs:3600

nimbus.task.launch.secs:240

Nimbus.reassign:true

nimbus.file.copy.expiration.secs:600

Nimbus.topology.validator: "Backtype.storm.nimbus.DefaultTopologyValidator"

storm.zookeeper.port:2181

Storm.zookeeper.root: "/data/storm/zkinfo"

Storm.cluster.mode: "Distributed"

Storm.local.mode.zmq:false

ui.port:8080

Ui.childopts: "-xmx768m"

Supervisor.slots.ports:

-6700

-6701

-6702

-6703

-6704

-6705

-6706

-6707

-6708

-6709

Supervisor.childopts: "-xmx2048m"

supervisor.worker.start.timeout.secs:240

Supervisor.worker.timeout.secs:30

Supervisor.monitor.frequency.secs:3

Supervisor.heartbeat.frequency.secs:5

Supervisor.enable:true

Worker.childopts: "-xmx4096m"

topology.max.spout.pending:5000

storm.zookeeper.session.timeout:5000

storm.zookeeper.connection.timeout:3000

Storm.zookeeper.retry.times:6

storm.zookeeper.retry.interval:2000

storm.zookeeper.retry.intervalceiling.millis:30000

Storm.thrift.transport: "Backtype.storm.security.auth.SimpleTransportPlugin"

Storm.messaging.transport: "Backtype.storm.messaging.netty.Context"

Storm.messaging.netty.server_worker_threads:50

Storm.messaging.netty.client_worker_threads:50

storm.messaging.netty.buffer_size:20971520

storm.messaging.netty.max_retries:100

storm.messaging.netty.max_wait_ms:1000

storm.messaging.netty.min_wait_ms:100

3, Nimbus node deployment

# vim/data/scripts/monitor_status_for_storm.sh

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263

#!/bin/sh

Path=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin

. /etc/profile

# # Monitor Page address parameters

Mon_srv_ipaddr= "192.168.1.103"

mon_srv_port= "8080"

# # is it scanned correctly

Scan_flag=0

# # Working Base Path

Base_path= "/data/scripts"

# # Exception Storm Supervisor host address List

Fail_supervisor_list= "${base_path}/fail_supervisor.txt"

#---------------------------------------------------------------------------------------------------

# # Restart the Storm Nimbus service

function Restart_storm_nimbus_server ()

{

[[-N ' ps aux | grep java | grep storm ']] && kill-9 ' ps aux | grep java | grep Storm | awk ' {print $} '

Nohup/usr/local/storm/bin/storm Nimbus >/dev/null 2>&1 &

Nohup/usr/local/storm/bin/storm UI >/dev/null 2>&1 &

Sleep 30

}

#---------------------------------------------------------------------------------------------------

# # 1, check the monitoring page is normal "8080-port Impassability"

for ((i=0; i<3; i++)); Todo

Retval= '/usr/bin/nmap-n-ss-p ${mon_srv_port} ${mon_srv_ipaddr} | grep Open '

[[-N ' ${retval}]]] && Scan_flag=1;break | | | Sleep 10

Done

[[${scan_flag}-ne 1]] && restart_storm_nimbus_server

#---------------------------------------------------------------------------------------------------

# # 2, the Monitoring page crawl content and local hosts content to compare the difference to determine whether there are abnormal Storm supervisor services

Curl-s http://${mon_srv_ipaddr}:${mon_srv_port}/| Sed ' s/<td>/<td>\n/g ' | Awk-f ' < '/^storm_/{print $} ' | awk '!/nimbus/{print} ' | Sort > ${base_path}/supervisor_list_from_page.txt

# # If you get the Storm Nimbus monitor page data is empty, represent Storm Nimbus service exception

[[z ' sed '/^$/d ' ${base_path}/supervisor_list_from_page.txt ']] && restart_storm_nimbus_server

Sort-nr ${base_path}/supervisor_list_from_page.txt ${base_path}/supervisor_list.txt | Uniq-u > ${base_path}/supervisor_list_for_failed.txt

[[z ' sed '/^$/d ' ${base_path}/supervisor_list_for_failed.txt ']] && rm-f ${base_path}/supervisor_list_for_ Failed.txt && Exit 0

#---------------------------------------------------------------------------------------------------

# # 3, the IP address list of the Storm Supervisor service that gets the exception

echo "[Fail_supervisor]" >> ${fail_supervisor_list}

For supervisor_nameaddr in ' Cat ${base_path}/supervisor_list_for_failed.txt '

Todo

Temp_ipaddr= ' grep-w ${supervisor_nameaddr}/etc/hosts | Grep-v ' # ' | awk ' {print $} ' | Tail-1 '

echo "${temp_ipaddr}" >> ${fail_supervisor_list}

Iplist= "${iplist} ${temp_ipaddr}"

Done

#---------------------------------------------------------------------------------------------------

# # 4, Remote restart Storm supervisor Service

/usr/local/bin/ansible-i ${fail_supervisor_list} fail_supervisor-m shell-a "/data/scripts/restart_storm_service.sh "

Rm-f ${fail_supervisor_list}

# Vim/data/scripts/supervisor_list.txt

123456

Storm_supervisor1

Storm_supervisor2

Storm_supervisor3

Storm_supervisor4

Storm_supervisor5

Storm_supervisor6

# Touch/var/run/check_storm.lock

# CRONTAB-E

*/2 * * * * (flock--timeout=0/var/run/check_storm.lock/data/scripts/monitor_status_for_storm.sh >/dev/null 2>& AMP;1)

4, Supervisor node deployment

# vim/data/scripts/restart_storm_service.sh

123456

#!/bin/sh

Path=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin

. /etc/profile

[[-N ' ps aux | grep java | grep storm ']] && kill-9 ' ps aux | grep java | grep Storm | awk ' {print $} '

Nohup/usr/local/storm/bin/storm Supervisor >/dev/null 2>&1 &

This article from the "Life Ideal is perseverance" blog, please be sure to keep this source http://sofar.blog.51cto.com/353572/1579897

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.