#!/usr/bin/env python # coding=utf-8 #------------------------------------------------------# name:apache Log Parsing script # Purpose: This script is only used to analyze Apache access log # version:2.0 # author:leo # created:2013-4-26 # modified:2013-5-4 # Copyrig HT: (c) LEO 2013 #------------------------------------------------------import SYS import time #该类是用来打印格式 class disp Layformat (object): Def format_size (self,size): ' Formatted traffic unit ' KB = 1024 MB = 1048576 GB = 1073 741824 TB = 1099511627776 if size >= tb:size = str (SIZE/TB) + ' T ' elif size < Kb:siz E = str (size) + ' B ' elif size >= GB and size < Tb:size = str (SIZE/GB) + ' G ' elif size >= MB and Size < Gb:size = str (SIZE/MB) + ' M ' else:size = str (size/kb) + ' K ' return size FORMATST Ring = '%-15s%-10s%-12s%8s%10s%10s%10s%10s%10s%10s%10s ' def transverse_line (self): ' Output horizontal ' print Self.formatstring% ('-' *15, '-' *10, '-' *12, '-' *12, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10 ' def head (self): ' Output header info ' Print Self.formatstring% (' IP ', ' traffic ', ' Times ', ' times% ', ', ', ' 404 ', ' + ', ' 403 ', ' 302 ', ' 304 ', ' 503 ') def error_print ( Self): ' Output error message ' Print print ' Usage: ' + sys.argv[0] + ' Apachelogfilepath [number] ' Print Sys.ex It (1) def execut_time (self): ' Output script execution time ' Print print script Execution times:%.3f second '% Time.clock () Print #该类是用来生成主机信息的字典 class Hostinfo (object): Host_info = [', ' 404 ', ' + ', ' 302 ', ' 304 ', ' 503 ', ' 403 ', ' Times ' ', ' size '] def __init__ (self,host): Self.host = host = {}.fromkeys (self.host_info,0) def increment (self,status_
Times_size,is_size): "The method is used to add 1" ' if status_times_size = = ' Times ' for each value in Host_info: self.host[' times '] + + 1 elif is_size:self.host[' size '] = self.host[' size '] + status_times_size else:self.host[status_times_s Ize] + 1 def get_value (sElf,value): "The method is to take the corresponding value in each host information ' ' Return Self.host[value] #该类是用来分析文件 class Fileanalysis (object): Def __init_
_ (Self): ' Initialize an empty dictionary ' self.report_dict = {} self.total_request_times,self.total_traffic,self.total_200, self.total_404,self.total_500,self.total_403,self.total_302, self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0 D EF split_eachline_todict (self,line): ' Splits each line in the file and returns a dictionary ' Split_line = line.split () split_dict = {' Remote_h OST ': split_line[0], ' status ': Split_line[-2], ' bytes_sent ': Split_line[-1],} return split_dict def generate_log_report (self,logfile): ' Read the file, parse the dictionary generated by the Split_eachline_todict method ' in logfile:try:line_dict = self . Split_eachline_todict (line) host = line_dict[' remote_host '] status = line_dict[' status '] except Val Ueerror:continue except Indexerror:continue if host not in Self.report_dict:ho St_info_obj = Hostinfo (host) Self.report_dict[host] = Host_info_obj else:host_info_obj = self.report_dict[host] Ho St_info_obj.increment (' Times ', False) if status in Host_info_obj.host_info:host_info_obj.increment (Statu
S,false) try:bytes_sent = Int (line_dict[' bytes_sent ')) except valueerror:bytes_sent = 0 Host_info_obj.increment (Bytes_sent,true) return self.report_dict def return_sorted_list (self,true_dict): ' ' Calculates the number of States, the total amount of traffic, the total number of requests, and calculates the total amount of each State and generates a true dictionary, conveniently sorted ' for host_key in true_dict:host_value = True_dict[host_ke Y] times = Host_value.get_value ("times") Self.total_request_times = Self.total_request_times + times s ize = host_value.get_value (' size ') self.total_traffic = self.total_traffic + Size o200 = Host_value.get_va Lue (' o404 ') = Host_value.get_value (' 404 ') o500 = host_value.get_value (' + ') o403 = Host_value.get_v Alue (' 403 ') o302 = Host_value.get_value (' 302 ') o304 = Host_value.get_value (' 304 ') o503 = Host_value.get_value (' 503 ') Tru E_dict[host_key] = {': o200, ' 404 ': o404, ': o500, ' 403 ': o403, ' 302 ': o302, ' 304 ': o304, ' 503 ': o503, ' time S ': times, ' Size ': size} self.total_200 = self.total_200 + o200 self.total_404 = self.total_404 + o404 SE lf.total_500 = self.total_500 + o500 self.total_302 = self.total_302 + o302 self.total_304 = self.total_304 + o304 self.total_503 = self.total_503 + o503 sorted_list = sorted (True_dict.items (), Key=lambda t: (t[1][' Times ') , t[1][' size ']), Reverse=true return Sorted_list class Main (object): Def Main (self): ' keynote function ' Display_fo Rmat = DisplayFormat () arg_length = Len (sys.argv) If arg_length = = 1:display_format.error_print () elif
Arg_length = = 2 or Arg_length = = 3:infile_name = sys.argv[1] try:infile = open (Infile_name, ' R ') If Arg_length = 3 : lines = Int (sys.argv[2]) else:lines = 0 except Ioerror,e:print PR int e display_format.error_print () except valueerror:print print "Please Enter A volid Num
BER!! " Display_format.error_print () else:display_format.error_print () Fileanalysis_obj = Fileanalysis () not _true_dict = Fileanalysis_obj.generate_log_report (infile) Log_report = Fileanalysis_obj.return_sorted_list (not_true
_dict) total_ip = Len (log_report) If Lines:log_report = Log_report[0:lines] Infile.close () print Total_traffic = Display_format.format_size (fileanalysis_obj.total_traffic) total_request_times = FileAnalysis_obj . total_request_times print ' Total IP:%s Total traffic:%s Total Request times:%d '% (total_ip,total_traffi C,total_request_times) Print display_format.head () Display_format.transverse_line () for host in Log_repo
Rt:Times = host[1]["Times"] times_percent = (float (times)/float (fileanalysis_obj.total_request_times)) * PR
int display_format.formatstring% (host[0], display_format.format_size (host[1][' size ')),
TIMES,STR (times_percent) [0:5], host[1]['],host[1][' 404 '],
host[1]['],host[1][' 403 '], host[1][' 302 '],host[1][' 304 '],host[1][' 503 ']) if (not lines) or total_ip = = Lines:display_format.transverse_line () print Display_format.f Ormatstring% (total_ip,total_traffic, total_request_times, ' 100% ', file analysis_obj.total_200, fileanalysis_obj.total_404, Fileanalysis_obj.tot
al_500, fileanalysis_obj.total_403, fileanalysis_obj.total_302, Fileanalysis_obj.total_304, fileanalysis_obj.total_503) display_format.execut_time () if __name__ = = ' __main__ ': main_obj = Main () Main_obj.main ()