Python crawl system metrics spit to Kafka

Source: Internet
Author: User

This article introduces using Python to write scripts, crawl system metrics, and then call Kafka Client library to metrics spit to Kafka case sharing. For the students with Kafka is very practical.

You need to download two Python libraries to local before running this instance: six and Kafka-python


Cat Config_system_metrics.json

{

"env": {

"Site": "Cluster",

"Component": "Namenode",

"Metric_prefix": "System"

},

"Output": {

"Kafka": {

"topic": "System_metrics_cluster",

"Brokerlist": ["10.10.10.1:9092", "10.10.10.2:9092", "10.10.10.3:9092"]

}

}

}


Cat System_metrics.python

#!/usr/bin/env python

Import Sys

Import OS

Import JSON

Import socket

Import re

Import time

Import logging

Import threading


# load Kafka-python

Sys.path.append (Os.path.join (Os.path.dirname (Os.path.abspath (__file__)), ' ', ' lib/six ')

Import Six


# load Kafka-python

Sys.path.append (Os.path.join (Os.path.dirname (Os.path.abspath (__file__)), ' ', ' Lib/kafka-python ')

From Kafka import Kafkaclient, Simpleproducer, Simpleconsumer


Logging.basicconfig (Level=logging.info,

format= '% (asctime) s% (name) -12s% (levelname) -6s% (message) s ',

datefmt= '%m-%d%h:%m ')



Config_file = ' Config_system_metrics.json '



Class Loadconfig (object):

def __init__ (self):

Config_file = "./" + Config_file


Try

f = open (Config_file, ' R ')

Except Exception, E:

Print "Load config file%s Error!"% Config_file

Sys.exit (1)

Try

Config_json = Json.loads (F.read ())

Except Exception, E:

Print "Convert config file to Json format Error!"

Sys.exit (1)


If f:

F.close ()

Self.config = Config_json



Class Kafka (Loadconfig):


def __init__ (self):

Loadconfig.__init__ (self)

Self.broker = self.config["Output" ["Kafka"] ["brokerlist"]



def kafka_connect (self):

#print "Connecting to Kafka" +str (Self.broker)

# to send messages synchronously

KC = kafkaclient (self.broker,timeout = 30)

Producer = Simpleproducer (kc,async=false,batch_send=true)

Return KC, producer



def kafka_produce (self, producer, topic, Kafka_json):


# ************ Sample of Kafka_json ********************


# {' timestamp ': 1463710, ' host ': ' xxx ', ' metric ': ' system.nic.receivedbytes ', ' value ': ' 4739 ', ' component ': ' Namenode ', ' Site ': ' Apolloqa '}


# ******************************************************


# Note that the application are responsible for encoding messages to type str

Producer.send_messages (topic, Kafka_json)


Class Metric (Loadconfig):

def __init__ (self):

Loadconfig.__init__ (self)

Try:

Self.fqdn = Socket.getfqdn ()

Except Exception, E:

Print "Could not get hostname! %s "%e

Sys.exit (1)

Self.data = []

Self.datapoint = {}

self.datapoint["timestamp"] = Int (round (time.time () * 1000))

self.datapoint["host"] = Self.fqdn

self.datapoint["component"] = self.config[' env ' [' Component ']

self.datapoint["Site"] = self.config[' env ' [' site ']



Class Metric_uptime (Metric):

def __init__ (self):

Metric.__init__ (self)

Self.demensions = ["Uptime.day", "Idletime.day"]

Self.result = Os.popen (' Cat/proc/uptime '). ReadLines ()

Self.data = []


def metric_collect (self):

For line in Self.result:

Values = Re.split ("\s+", Line.rstrip ())

For I in range (len (self.demensions)):

self.datapoint["metric"] = self.config[' env ' [' metric_prefix '] + '. ' + ' uptime ' + '. ' + self.demensions[i '

self.datapoint["value"] = str (round (float (values[i])/86400, 2))

Self.data.append (Self.datapoint.copy ())

Return Self.data


Class Metric_loadavg (Metric):

def __init__ (self):

Metric.__init__ (self)

Self.demensions = [' cpu.loadavg.1min ', ' cpu.loadavg.5min ', ' cpu.loadavg.15min ']

Self.result = Os.popen (' Cat/proc/loadavg '). ReadLines ()

Self.data = []


def metric_collect (self):

For line in Self.result:

Values = Re.split ("\s+", Line.strip ())

For I in range (len (self.demensions)):

self.datapoint["metric"] = self.config[' env ' [' metric_prefix '] + '. ' + ' loadavg ' + '. ' + self.demensions[i '

self.datapoint["value"] = Values[i]

Self.data.append (Self.datapoint.copy ())

Return Self.data


Class Metric_memory (Metric):

def __init__ (self):

Metric.__init__ (self)

Self.result = Os.popen (' Cat/proc/meminfo '). ReadLines ()

Self.data = []


def metric_collect (self):

For line in Self.result:

Demensions = Re.split (":? \s+", Line.rstrip ())

self.datapoint["metric"] = self.config[' env ' [' metric_prefix '] + "." + ' memory ' + '. ' + demensions[0] + '. KB '

self.datapoint["value"] = demensions[1]

Self.data.append (Self.datapoint.copy ())

Return Self.data


Class Metric_cputemp (Metric):

def __init__ (self):

Metric.__init__ (self)

Self.result = Os.popen (' sudo ipmitool SDR | grep Temp | grep CPU '). ReadLines ()

Self.data = []


def metric_collect (self):

For line in Self.result:

Demensions = Re.split ("\|", Line.strip ())

self.datapoint["metric"] = self.config[' env ' [' metric_prefix '] + "." + Re.split ("", Demensions[0]) [0] + '. Temp '

self.datapoint["value"] = Re.split ("", Demensions[1]) [1]

Self.data.append (Self.datapoint.copy ())

Print Self.data

Return Self.data


Class Metric_net (Metric):

def __init__ (self):

Metric.__init__ (self)

Self.demensions = [' receivedbytes ', ' receivedpackets ', ' receivederrs ', ' receiveddrop ', ' transmitbytes ', ' TransmitPackets ',

' Transmiterrs ', ' Transmitdrop ']

Self.result = Os.popen ("Cat/proc/net/dev"). ReadLines ()

Self.data = []


def metric_collect (self):

For line in Self.result:

If Re.match (' ^ (inter|\s+face|\s+lo) ', line):

Continue

interface = Re.split (':? \s+ ', line) [1]

Values = Re.split (':? \s+ ', line) [2:6] + re.split (':? \s+ ', line) [9:13]

For I in range (len (self.demensions)):

self.datapoint["metric"] = self.config[' env ' [' metric_prefix '] + "." + interface + "." + self.demensions[i

self.datapoint["value"] = Values[i]

Self.data.append (Self.datapoint.copy ())

Print Self.data

Return Self.data



Class Collect (Loadconfig):

def __init__ (self):

Loadconfig.__init__ (self)

Self.topic = self.config[' output ' [' Kafka '] [' topic ']


def connect (self):

Self.kafkaclient = Kafka ()

SELF.KC, Self.producer = Self.kafkaclient.kafka_connect ()

Return SELF.KC, Self.producer


def send (self, kafka_producer, msg):

Self.kafkaclient.kafka_produce (Kafka_producer, Self.topic, Json.dumps (msg))


def close (self, kafka_producer, kafka_client):

If Kafka_producer is not None:

Kafka_producer.stop ()

If Kafka_client is not None:

Kafka_client.close ()


def run (self, kafka_client, Kafka_producer, metric):

data = Metric.metric_collect ()

#print data

Self.send (kafka_producer, data)

Self.close (Kafka_producer, kafka_client)


Collector = Collect ()

metrics = [Metric_uptime (), Metric_loadavg (), Metric_memory (), Metric_cputemp (), Metric_net ()]


# Establish TCP connection once forever, share the same TCP conncetion

Kafka_client, Kafka_producer = Collector.connect ()

For metric in metrics:

t = Threading. Thread (target = collector.run, args = (kafka_client, kafka_producer, metric))

T.start ()


This article from "Linux operation and Maintenance" blog, declined reprint!

Python crawl system metrics spit to Kafka

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.