#!/usr/bin/env python # coding=utf-8 #------------------------------------------------------# name:nginx Log analysis feet
Ben # Purpose: This script is only used to parse Nginx access log # version:1.0 # author:leo # created:2013-05-07 # modified:2013-05-07 # Copyright: (c) LEO 2013 #------------------------------------------------------import SYS import time #该类是用来打 Printed Format class DisplayFormat (object): Def format_size (self,size): ' ' Formatted traffic unit ' KB = 1024 #KB-> b is byte MB = 1048576 #MB-> b GB = 1073741824 #GB-> b TB = 1099511627776 #TB-> b if size &G t;= tb:size = str (SIZE/TB) + ' T ' elif size < kb:size = str (size) + ' B ' elif size >= GB and size < Tb:size = str (SIZE/GB) + ' G ' elif size >= MB and size < Gb:size = str (Size/ MB) + ' M ' else:size = str (size/kb) + ' K ' return size #定义字符串格式化 formatstring = '%-15s%-10s%- 12s%8s%10s%10s%10s%10s%10s%10s%10s ' def transverse_line (self): ' ' Output horizontal ' print self.formatstring% ('-' *15, '-' *10, '-' *1 2, '-' *12, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10 ' def head (self): ' Output header info ' Print Self.format String% (' IP ', ' traffic ', ' Times ', ' times% ', ', ', ' 404 ', ' + ', ' 403 ', ' 302 ', ' 304 ', ' 503 ') def error_print (self): '
' Output error message ' Print print ' Usage: ' + sys.argv[0] + ' Nginxlogfilepath [number] ' Print sys.exit (1)
def execut_time (self): ' Output script execution time ' ' Print print ' Script Execution times:%.3f second '% Time.clock () Print #该类是用来生成主机信息的字典 class Hostinfo (object): Host_info = [', ' 404 ', ', ', ' 302 ', ' 304 ', ' 503 ', ' 403 ', ' Times ', ' s Ize '] def __init__ (self,host): Self.host = host = {}.fromkeys (self.host_info,0) def increment (self,status_t Imes_size,is_size): ' This method is used to add 1 ' if status_times_size = = ' Times ' of each value in Host_info: self.host[' times '] +
= 1 Elif is_size: self.host[' size '] = self.host[' size '] + status_times_size else:self.host[status_times_size] + = 1 D EF get_value (self,value): ' The method is to take the corresponding value in each host information ' ' Return Self.host[value] #该类是用来分析文件 class Fileanalysis (ob ject): def __init__ (self): ' Initialize an empty dictionary ' self.report_dict = {} self.total_request_times,self.total_tr affic,self.total_200, self.total_404,self.total_500,self.total_403,self.total_302, self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0 def split_eachline_todict (self,line): "' Splits each line in the file and returns a dictionary ' Split_line = line.spli T () split_dict = {' Remote_host ': split_line[0], ' status ': Split_line[8], ' bytes_sent ': Split_line[9],} r Eturn split_dict def generate_log_report (self,logfile): ' Read the file, parse the dictionary generated by the Split_eachline_todict method ' for Lin
E in logfile:try:line_dict = Self.split_eachline_todict (line) host = line_dict[' Remote_host '] Status = Line_dict[' status '] except valueerror:continue except indexerror:continue if host No T in self.report_dict:host_info_obj = Hostinfo (host) self.report_dict[host] = Host_info_obj els E:host_info_obj = Self.report_dict[host] Host_info_obj.increment (' Times ', False) if status in Ho St_info_obj.host_info:host_info_obj.increment (status,false) try:bytes_sent = Int (line_dict[' by Tes_sent ']) except valueerror:bytes_sent = 0 host_info_obj.increment (bytes_sent,true) return Self.report_dict def return_sorted_list (self,true_dict): "' Calculates the number of States, the total amount of traffic, the total number of requests, and calculates the total amount of each State and generates a true dictionary to facilitate sorting '
' for host_key in true_dict:host_value = True_dict[host_key] times = Host_value.get_value (' Times ')
Self.total_request_times = Self.total_request_times + times size = Host_value.get_value (' size ') Self.totaL_traffic = self.total_traffic + Size o200 = Host_value.get_value (') o404 = Host_value.get_value (' 404 ') o500 = host_value.get_value (' + ') o403 = Host_value.get_value (' 403 ') o302 = Host_value.get_value ( ' 302 ') o304 = Host_value.get_value (' 304 ') o503 = Host_value.get_value (' 503 ') true_dict[host_key] = {': o200, ' 404 ': o404, ': o500, ' 403 ': o403, ' 302 ': o302, ' 304 ': o304, ' 503 ': o503, ' "Tim
Es ': times, ' Size ': size} self.total_200 = self.total_200 + o200 self.total_404 = self.total_404 + o404 self.total_500 = self.total_500 + o500 self.total_302 = self.total_302 + o302 self.total_304 = Self.total_ 304 + o304 self.total_503 = self.total_503 + o503 sorted_list = sorted (True_dict.items (), Key=lambda T: (t[1][ ' Times '], t[1][' size '], Reverse=true return Sorted_list class Main (object): D EF Main (self): '' Keynote function ' Display_format = DisplayFormat () arg_length = Len (sys.argv) If arg_length = = 1:display_f Ormat.error_print () elif arg_length = = 2 or Arg_length = = 3:infile_name = sys.argv[1] Try:i
nfile = open (Infile_name, ' R ') if Arg_length = = 3:lines = Int (sys.argv[2]) Else: Lines = 0 except ioerror,e:print print e display_format.error_print () except V
Alueerror:print print "Please Enter A volid number!!"
Display_format.error_print () else:display_format.error_print () Fileanalysis_obj = Fileanalysis () Not_true_dict = Fileanalysis_obj.generate_log_report (infile) Log_report = Fileanalysis_obj.return_sorted_list (not_t
rue_dict) total_ip = Len (log_report) If Lines:log_report = Log_report[0:lines] Infile.close () Print total_traffic = Display_format.format_size (Fileanalysis_obj.total_traffic) Total_request_times = fileanalysis_obj.total_request_times print ' Total IP:%s Total Traffic:%s Total Request times:%d '% (total_ip,total_traffic,total_request_times) Print Display_forma T.head () Display_format.transverse_line () for host in log_report:times = host[1]["Times"] time S_percent = (float (times)/float (fileanalysis_obj.total_request_times)) * Print display_format.formatstring% (Host[0], display_format.format_size (host[1][' size '), Times,str (times_p ercent) [0:5], host[1]['],host[1][' 404 '], host[1]['],host[1][' 403 '], host[1][' 302 '],host[1][' 304 '],host[1][' 503 ']) if (not lines) O r Total_ip = = Lines:display_format.transverse_line () print display_format.formatstring% (total_ip,total_t
Raffic, Total_request_times, ' 100% ', fileanalysis_obj.total_200, Filean alysis_obj.total_404, fileanalysis_obj.total_500, Fileanalysis_obj.tota
l_403, fileanalysis_obj.total_302, fileanalysis_obj.total_304, fileanalysis_obj.total_503) Display_format.execut_time () if __name__ = = ' __main__ ': main_obj
= Main () main_obj.main ()