Python analysis Apache access log script sharing _python

Source: Internet
Author: User
Tags print print apache access log apache log
#!/usr/bin/env python # coding=utf-8 #------------------------------------------------------# name:apache Log Parsing script # Purpose: This script is only used to analyze Apache access log # version:2.0 # author:leo # created:2013-4-26 # modified:2013-5-4 # Copyrig HT: (c) LEO 2013 #------------------------------------------------------import SYS import time #该类是用来打印格式 class disp Layformat (object): Def format_size (self,size): ' Formatted traffic unit ' KB = 1024 MB = 1048576 GB = 1073 741824 TB = 1099511627776 if size >= tb:size = str (SIZE/TB) + ' T ' elif size < Kb:siz E = str (size) + ' B ' elif size >= GB and size < Tb:size = str (SIZE/GB) + ' G ' elif size >= MB and Size < Gb:size = str (SIZE/MB) + ' M ' else:size = str (size/kb) + ' K ' return size FORMATST  Ring = '%-15s%-10s%-12s%8s%10s%10s%10s%10s%10s%10s%10s ' def transverse_line (self): ' Output horizontal ' print Self.formatstring% ('-' *15, '-' *10, '-' *12, '-' *12, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10, '-' *10 ' def head (self): ' Output header info ' Print Self.formatstring% (' IP ', ' traffic ', ' Times ', ' times% ', ', ', ' 404 ', ' + ', ' 403 ', ' 302 ', ' 304 ', ' 503 ') def error_print ( Self): ' Output error message ' Print print ' Usage: ' + sys.argv[0] + ' Apachelogfilepath [number] ' Print Sys.ex It (1) def execut_time (self): ' Output script execution time ' Print print script Execution times:%.3f second '% Time.clock () Print #该类是用来生成主机信息的字典 class Hostinfo (object): Host_info = [', ' 404 ', ' + ', ' 302 ', ' 304 ', ' 503 ', ' 403 ', ' Times ' ', ' size '] def __init__ (self,host): Self.host = host = {}.fromkeys (self.host_info,0) def increment (self,status_
    Times_size,is_size): "The method is used to add 1" ' if status_times_size = = ' Times ' for each value in Host_info: self.host[' times '] + + 1 elif is_size:self.host[' size '] = self.host[' size '] + status_times_size else:self.host[status_times_s Ize] + 1 def get_value (sElf,value): "The method is to take the corresponding value in each host information ' ' Return Self.host[value] #该类是用来分析文件 class Fileanalysis (object): Def __init_ 
    _ (Self): ' Initialize an empty dictionary ' self.report_dict = {} self.total_request_times,self.total_traffic,self.total_200, self.total_404,self.total_500,self.total_403,self.total_302, self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0 D EF split_eachline_todict (self,line): ' Splits each line in the file and returns a dictionary ' Split_line = line.split () split_dict = {' Remote_h OST ': split_line[0], ' status ': Split_line[-2], ' bytes_sent ': Split_line[-1],} return split_dict def generate_log_report (self,logfile): ' Read the file, parse the dictionary generated by the Split_eachline_todict method ' in logfile:try:line_dict = self . Split_eachline_todict (line) host = line_dict[' remote_host '] status = line_dict[' status '] except Val Ueerror:continue except Indexerror:continue if host not in Self.report_dict:ho St_info_obj = Hostinfo (host) Self.report_dict[host] = Host_info_obj else:host_info_obj = self.report_dict[host] Ho St_info_obj.increment (' Times ', False) if status in Host_info_obj.host_info:host_info_obj.increment (Statu
      S,false) try:bytes_sent = Int (line_dict[' bytes_sent ')) except valueerror:bytes_sent = 0 Host_info_obj.increment (Bytes_sent,true) return self.report_dict def return_sorted_list (self,true_dict): ' ' Calculates the number of States, the total amount of traffic, the total number of requests, and calculates the total amount of each State and generates a true dictionary, conveniently sorted ' for host_key in true_dict:host_value = True_dict[host_ke Y] times = Host_value.get_value ("times") Self.total_request_times = Self.total_request_times + times s ize = host_value.get_value (' size ') self.total_traffic = self.total_traffic + Size o200 = Host_value.get_va Lue (' o404 ') = Host_value.get_value (' 404 ') o500 = host_value.get_value (' + ') o403 = Host_value.get_v Alue (' 403 ') o302 = Host_value.get_value (' 302 ') o304 = Host_value.get_value (' 304 ') o503 = Host_value.get_value (' 503 ') Tru E_dict[host_key] = {': o200, ' 404 ': o404, ': o500, ' 403 ': o403, ' 302 ': o302, ' 304 ': o304, ' 503 ': o503, ' time S ': times, ' Size ': size} self.total_200 = self.total_200 + o200 self.total_404 = self.total_404 + o404 SE lf.total_500 = self.total_500 + o500 self.total_302 = self.total_302 + o302 self.total_304 = self.total_304 + o304 self.total_503 = self.total_503 + o503 sorted_list = sorted (True_dict.items (), Key=lambda t: (t[1][' Times ') , t[1][' size ']), Reverse=true return Sorted_list class Main (object): Def Main (self): ' keynote function ' Display_fo  Rmat = DisplayFormat () arg_length = Len (sys.argv) If arg_length = = 1:display_format.error_print () elif
        Arg_length = = 2 or Arg_length = = 3:infile_name = sys.argv[1] try:infile = open (Infile_name, ' R ') If Arg_length = 3 : lines = Int (sys.argv[2]) else:lines = 0 except Ioerror,e:print PR int e display_format.error_print () except valueerror:print print "Please Enter A volid Num
        BER!! " Display_format.error_print () else:display_format.error_print () Fileanalysis_obj = Fileanalysis () not _true_dict = Fileanalysis_obj.generate_log_report (infile) Log_report = Fileanalysis_obj.return_sorted_list (not_true 
    _dict) total_ip = Len (log_report) If Lines:log_report = Log_report[0:lines] Infile.close () print Total_traffic = Display_format.format_size (fileanalysis_obj.total_traffic) total_request_times = FileAnalysis_obj . total_request_times print ' Total IP:%s Total traffic:%s Total Request times:%d '% (total_ip,total_traffi C,total_request_times) Print display_format.head () Display_format.transverse_line () for host in Log_repo
      Rt:Times = host[1]["Times"] times_percent = (float (times)/float (fileanalysis_obj.total_request_times)) * PR
                         int display_format.formatstring% (host[0], display_format.format_size (host[1][' size ')),
                         TIMES,STR (times_percent) [0:5], host[1]['],host[1][' 404 '],
                         
    host[1]['],host[1][' 403 '], host[1][' 302 '],host[1][' 304 '],host[1][' 503 ']) if (not lines) or total_ip = = Lines:display_format.transverse_line () print Display_format.f Ormatstring% (total_ip,total_traffic, total_request_times, ' 100% ', file analysis_obj.total_200, fileanalysis_obj.total_404, Fileanalysis_obj.tot 
                         al_500, fileanalysis_obj.total_403, fileanalysis_obj.total_302, Fileanalysis_obj.total_304, fileanalysis_obj.total_503) display_format.execut_time () if __name__ = = ' __main__ ': main_obj = Main () Main_obj.main ()

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.