Python parses nginx logs and pushes to Open-falcon

Source: Internet
Author: User
Tags stdin

No explanation, direct-code #!/usr/bin/python# --*-- coding: utf-8 --*--# nginx log Analysis import  Timeimport datetimeimport sysimport osimport os.pathimport reimport jsonimport  socketimport requestsimport subprocessclass nginxlog (object): "" "Initialize Data" "" def __init__ (self, log_file, interface_list, seek_file): Self.log_file = log_fileself.interface_ List = interface_listself.seek_file = seek_filedef jsonformat (Self, python_data) : "" formatted as JSON format "" "Json_data = json.dumps (python_data, indent=2) return json_datadef  Hostname (self): "" "host_name:  Host name host_ip:  host IP" "Host_name = socket.getfqdn ( Socket.gethostname ( ) host_ip = socket.gethostbyname (HOST_NAME) return host_namedef  Writeseek (Self, seek): "" "Read the file cursor written to temporary file" "" With open (Self.seek_file, ' W ')  as f:f.write ( Time.strftime ("%y-%m-%d %h:%m:%s",  time. LocalTime (Time.time ()))  +  ' \ n ') F.write (str (seek)  +  "\ n") Def logread (self): "" " read out the newly generated log #  if the first run, or delete the temporary file, run from the beginning, otherwise, from the last read run # 0 representative from the beginning, 1 for the current position, 2 for the end of the file position "" "If os.path.exists ( Self.seek_file): With open (Self.seek_file)  as f:seek_tmp = f.readlines () seek_old  = int (Seek_tmp[1].strip ()) Else:seek_old = 0with open (self.log_file)  as f:# Records the current latest file cursor F.seek (0,2) #最新游标位置seek_now  = f.tell () #读取上次读完之后的日志if  seek_now >= seek_old : F.seek (seek_old,0) #从文件开头为止偏移chunk  = f.read (seek_now - seek_old) #如果seek_now-seek_ Old less than 0 description log rotation else:f.seek (0,0) chunk = f.read (Seek_now) #  writes this cursor to a temporary file Self.writeseek (seek_now) Return chunkdef logstatistics (self): "" "parses the regular expression of the Nginx log, and if the log format changes, it needs to be changed accordingly" "" #log_examp  =  ' 127.0.0.1 - - [02/mar/2018:11:01:09 +0800]  "head /aaa http/1.1"  404  0  "-"   "curl/7.29.0" ' #输出结果 # #00.64.40.7 - - [05/mar/2018:09:15:19 +0800]  "get /wpsad.php http/1.0"   200 0  "-"   "mozilla/5.0  (WINDOWS NT 6.1; WOW64)  applewebkit/537.36   (Khtml, like gecko)  chrome/57.0.2987.98 safari/537.36 lbbrowser " " 114.255.44.143 " 0.369result_list = []time_ns =  datetime.datetime.now (). Microsecondtime_stamp = int (str (time.time ()) [0:10]) host_name = self.hostname () #  URL list Loop for interface_item in self.interface_list:interface_item_dict_count = {} Interface_item_dict_avg_request_time = {}interface_item_dict_2xx = {}interface_item_dict_ 4xx = {}interface_item_dict_5xx = {}interface_item_dict_count[' NS ']=interface_item_dict_ avg_request_time[' ns ']=interface_item_dict_2xx[' ns ']=interface_item_dict_4xx[' ns ']=interface_item_dict_5xx[' NS ']=time_nsinterface_item_dict_count[' clock ']=interface_item_dict_avg_request_time[' clock ']=interface_item_dict_2xx[' clock ']=interface_item_dict_4xx[' clock ' ]=interface_item_dict_5xx[' clock ']=time_stampinterface_item_dict_count[' host ']=interface_item_dict_avg_request _time[' host ']=interface_item_dict_2xx[' host ']=interface_item_dict_4xx[' host ']=interface_item_dict_5xx[' host ']= host_nameinterface_item_dict_count[' key '] = interface_item +  ' _count ' interface_item_dict_ count[' value '] = 0interface_item_dict_avg_request_time[' key '] = interface_item +   ' _avg_request_time ' interface_item_dict_avg_request_time[' value '] = 0interface_item_dict_2xx[' Key '] = interface_item +  ' _2xx ' interface_item_dict_2xx[' value '] = 0interface_ item_dict_4xx[' key '] = interface_item +  ' _4xx ' interface_item_dict_4xx[' value '] =  0interface_item_dict_5xx[' key '] = interface_item +  ' _5xx ' interface_item_dict_5xx[' Value '] = 0hit_url_count = 0# #实时输出日志for  line in self. Logread (). Split (' \ n '): #print (Line.split () [0]) If line != none and len (Line.split ())  != 0: #匹配字段remote_addr  = line.split () [0] #切割请求的urlrequest_url  = line.split () [6] Status_code = line.split () [8]request_time = line.split () [-1] #保留等会用 #  Match after data structure operation if  interface_item == request_url:hit_url_count += 1interface_item_dict_count[' value ']  += 1# response Time interface_item_dict_avg_request_time[' value '] += float (request_time) if  Status_code.strip (' \ '). StartsWith (' 2 '): interface_item_dict_2xx[' value '] += 1if status_ Code.strip (' \ '). StartsWith (' 4 '): interface_item_dict_4xx[' value '] += 1if status_code.strip (' \ "') ). StartsWith (' 5 '): interface_item_dict_5xx[' value '] += 1#  to find the average request response time If interface_item_dict_ avg_request_time[' value '] != 0:interface_item_dict_avg_request_time[' value'] = interface_item_dict_avg_request_time[' value '] / hit_url_count#  results added to the list result_ List.append (Interface_item_dict_count) result_list.append (interface_item_dict_avg_request_time) result_ List.append (interface_item_dict_2xx) result_list.append (interface_item_dict_4xx) result_list.append (interface_ item_dict_5xx) Return result_list#return self.jsonformat (result_list) Def pushFalcon (return_data) : "" "Data pushed to Openfalcon" "" all_data = []all_request_dic = {}wpsad_2xx_request_dic = {} wpsad_4xx_request_dic = {}wpsad_5xx_request_dic = {}wpsad_response_time_dic = {} for i in return_data:if  ' wpsad.php_count '  in i[' key ']:all_request_dic[' value ']  = i[' value ']all_request_dic[' key '] =  ' Wpsad.php_count ' all_request_dic[' host_name '] =  i[' Host ']all_data.append (all_request_dic) if  ' wpsad.php_2xx '  in i[' key ']:wpsad_2xx_request _dic[' value '] = i[' Value ']wpsad_2xx_request_dic[' key '] =  ' Wpsad.php_2xx_count ' wpsad_2xx_request_dic[' host_name ']  = i[' Host ']all_data.append (wpsad_2xx_request_dic) if  ' wpsad.php_4xx '  in i[' key ']: wpsad_4xx_request_dic[' value '] = i[' value ']wpsad_4xx_request_dic[' key '] =  ' wpsad.php_4xx_ Count ' wpsad_4xx_request_dic[' host_name '] = i[' host ']all_data.append (wpsad_4xx_request_dic) if  ' Wpsad.php_5xx '  in i[' key ']:wpsad_5xx_request_dic[' value '] = i[' value ']wpsad_5xx_request_ dic[' key '] =  ' Wpsad.php_5xx_count ' wpsad_5xx_request_dic[' host_name '] = i[' host ']all_ Data.append (wpsad_5xx_request_dic) if  ' wpsad.php_avg_request_time '  in i[' key ']:wpsad_response_ time_dic[' key '] =  ' wpsad_response_time ' wpsad_response_time_dic[' value '] = i[' value '] #all_ Data.append (wpsad_response_time_dic) # nginx request status Data Ts = int (Time.time ()) payload = []for  i in all_data:temp_dic = {# "endpoint":  i[' host_name ', "endpoint":  "vm172-31-32-13.ksc.com", "Metric":  i[' key ', "timestamp":  ts, "step":  60, "value":  i[' value '], "CounterType":  "GAUGE", "tags":   "url=" +i[' key ']}payload.append (temp_dic) #print (payload) #  response time Data response_time_dic = {" Endpoint ": " vm172-31-32-13.ksc.com "," metric ":  wpsad_response_time_dic[' key '," timestamp ":  ts," Step ":  60," value ":  wpsad_response_time_dic[' value ']," CounterType ": " GAUGE "," tags ": " ",} Payload.append (Response_time_dic) # nginx concurrent requests Count estab_data = {"endpoint":  " Vm172-31-32-13.ksc.com "," Metric ": " Nginx_estab_num "," timestamp ":  ts," step ":  60," value ":  0, "CounterType":  "GAUGE", "tags":  "",}time_wait = {"endpoint":  "vm172-31-32-13.ksc.com",                  "Metric":   "Nginx_timewait_num",                  "Timestamp": ts,                  "Step":  60,                  "Value":  0,                 "CounterType ": " GAUGE ",                  "tags":  "",}# time_waittime_wait_cmd =  "netstat -ant|grep -i  ' 80 ' | grep  ' time_wait ' |wc -l "time_wait_p = subprocess. Popen (time_wait_cmd, shell=true, stdin=subprocess. Pipe, stdout=subprocess. Pipe, stderr=subprocess. PIPE) Timewait_out = time_wait_p.stdout.read () Timewait_err = time_wait_p.stderr.read () if  not timewait_err:time_wait[' value '] = inT (Timewait_out.strip ()) payload.append (time_wait) # establishedestab_cmd =  "netstat -ant| grep -i  ' |grep  ' established ' |wc -l ' estab_p = subprocess. Popen (estab_cmd, shell=true, stdin=subprocess. Pipe, stdout=subprocess. Pipe, stderr=subprocess. PIPE) Estab_out = estab_p.stdout.read () Estab_err = estab_p.stderr.read () if not  estab_err:estab_data[' value '] = int (Estab_out.strip ()) payload.append (Estab_data) #  Nginx process occupied memory monitoring mem_dic = {"endpoint":  "vm172-31-32-13.ksc.com", "Metric":  "Nginx_mem", " Timestamp ": TS," step ":  60," value ":  0," CounterType ": " GAUGE "," tags ": " ",} #mem_cmd  =   ' top -b -n1|grep nginx|gawk  ' {if ($6~/m$/)  {sum+=$6*1024} else {sum+=$ 6} }; end {print int (sum/1024)} ' "#mem_p  = subprocess. Popen (time_wait_cmd, shell=true, stdin=subprocess. Pipe, stdout=subProcess. Pipe, stderr=subprocess. PIPE) #mem_out  = mem_p.stdout.read () #print (mem_out) #mem_err  = mem_p.stderr.read () #if   Not mem_err: #mem_dic [' Value '] = int (Mem_out.strip ()) #print (mem_dic[' value ') #nginxpid  =  subprocess. Popen (["Pidof",  "Nginx"], stdout=subprocess. PIPE) Nginxpid = nginxpid.stdout.read (). Split () Memsum = 0for i in nginxpid: Pidfile = os.path.join ("/proc/",  str (i),  "status") With open (pidfile)  as f: For mem in f:if mem.startswith ("Vmrss"):p Idmem = int (Mem.split () [1]) memsum  += pidmemmemsum = int (memsum)//1024#print ("%d %s"  % (memsum, "M")) mem_dic[' value ']  = memsumpayload.append (mem_dic) #  push to Falcon-agent#print (payload) r = requests.post (" Http://127.0.0.1:1988/v1/push ",  data=json.dumps (payload)) Def main ():#  list of URLs to parse interface_list  = ['/wpsad.php ']#  log file location log_file =  "/data/logs/nginx/ads.access.log" #  temporary file location seek_file =  "/data/ Logs/nginx/ads_log_check_seek.tmp "Nginx_log = nginxlog (log_file, interface_list, seek_file ) Return_data = nginx_log. Logstatistics () #print  return_json_datapushfalcon (return_data) if __name__ ==  ' __main__ ': Main ()


Python parses nginx logs and pushes to Open-falcon

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.