Python analyzes apache access log script sharing, pythonapache
#! /Usr/bin/env python # coding = UTF-8 # ---------------------------------------------------- # Name: Apache Log Analysis script # Purpose: This script is only used to analyze Apache access logs # Version: 2.0 # Hor: LEO # Created: 2013-4-26 # Modified: 2013-5-4 # Copyright: (c) LEO 2013 # using import sysimport time # This class is used to print the format class displayFormat (object): def format_size (self, size): ''' format the traffic unit ''' KB = 1024 MB = 1048576 GB = 1073741824 TB = 1099511627776 if size> = TB: size = str (size/TB) + 't'elif size <KB: size = str (size) + 'B' elif size> = GB and size <TB: size = str (size/GB) + 'G' elif size> = MB and size <GB: size = str (size/MB) + 'M' else: size = str (size/KB) + 'K' return size formatstring = '%-15 s %-10 s %-12 s % 8 s % 10 s % 10 s % 10 s % 10 s % 10 s % 10 s % 10 s % 10 s % 10s 'def transverse_line (self): '''Output crossline''' print self. formatstring % ('-' * 15, '-' * 10, '-' * 12, '-' * 12, '-' * 10, '-' * 10, '-' * 10, '-' * 10, '-' * 10, '-' * 10, '-' * 10) def head (self ): '''output header information''' print self. formatstring % ('IP', 'traffic ', 'times', 'times % ', '000000', '000000', '000000', '000000', '000000 ', '000000', '000000') def error_print (self): ''' output error message ''' print 'usage: '+ sys. argv [0] + 'apachelogfilepath [Number] 'print sys. exit (1) def execut_time (self): ''' output script Execution Time ''' print "Script Execution Time: %. 3f second "% time. clock () print # This class is the dictionary used to generate host information. class hostInfo (object): host_info = ['000000', '000000', '000000', '000000 ', '123', '123', '123', 'times ', 'SIZE'] def _ init _ (self, host): self. host = {}. fromkeys (self. host_info, 0) def increment (self, status_times_size, is_size): ''' this method is used to add 1 ''' if status_times_size = 'times 'to each value in host_info ': self. host ['times '] + = 1 el If is_size: self. host ['SIZE'] = self. host ['SIZE'] + status_times_size else: self. host [status_times_size] + = 1 def get_value (self, value): ''' this method obtains the corresponding value '''return self in each host information. host [value] # This class is used to analyze the file class fileAnalysis (object): def _ init _ (self): ''' Initialize an empty dictionary ''' self. report_dict ={} self. total_request_times, self. total_traffic, self. total_200, self. total_404, self. total_500, self. total_403, self. total_302, Self. total_304, self. total_503 = 0, 0, 0, 0, 0, 0 def split_eachline_todict (self, line): ''' splits each row in the file and returns a dictionary ''' split_line = line. split () split_dict = {'remote _ host': split_line [0], 'status': split_line [-2], 'bytes _ sent ': split_line [-1],} return split_dict def generate_log_report (self, logfile): ''' reads the file and analyzes the dictionary ''' for line in logfile: try: line_dict = self. split_eachline_todict (l Ine) host = line_dict ['remote _ host'] status = line_dict ['status'] failed t ValueError: continue failed t IndexError: continue if host not in self. report_dict: host_info_obj = hostInfo (host) self. report_dict [host] = host_info_obj else: host_info_obj = self. report_dict [host] host_info_obj.increment ('times ', False) if status in host_info_obj.host_info: host_info_obj.increment (status, False) try: bytes _ Sent = int (line_dict ['bytes _ sent ']) handle T ValueError: bytes_sent = 0 host_info_obj.increment (bytes_sent, True) return self. report_dict def return_sorted_list (self, true_dict): ''' calculates the number of states, total traffic, and total number of requests, calculates the total number of States, and generates a dictionary of authenticity, sort ''' for host_key in true_dict: host_value = true_dict [host_key] times = host_value.get_value ('times ') self. total_request_times = self. total_request_times + times size = hos T_value.get_value ('SIZE') self. total_traffic = self. total_traffic + size o200 = host_value.get_value ('000000') o404 = host_value.get_value ('000000') o500 = host_value.get_value ('000000') o403 = host_value.get_value ') o302 = host_value.get_value ('000000') o304 = host_value.get_value ('000000') o503 = host_value.get_value ('000000') true_dict [host_key] = {'000000': o200 ': o404, '000000': o500, '000000': o403, '000000': o302, '000000' ': O304, '000000': o503, 'times': times, 'SIZE': size} self. total_200 = self. total_200 + o200 self. total_404 = self. total_404 + o404 self. total_500 = self. total_500 + o500 self. total_302 = self. total_302 + o302 self. total_304 = self. total_304 + o304 self. total_503 = self. total_503 + o503 sorted_list = sorted (true_dict.items (), key = lambda t :( t [1] ['times '], t [1] ['SIZE']), reverse = True) return sorted_list class Main (object): def main (self): '''main function ''' display_format = displayFormat () arg_length = len (sys. argv) if arg_length = 1: display_format.error_print () elif arg_length = 2 or arg_length = 3: infile_name = sys. argv [1] try: infile = open (infile_name, 'R') if arg_length = 3: lines = int (sys. argv [2]) else: lines = 0 distinct T IOError, e: print e display_format.error_print () Comment t ValueError: Print "Please Enter A Volid Number !! "Parse () else: display_format.error_print () fileAnalysis_obj = fileAnalysis () not_true_dict = encrypt (infile) log_report = warn (not_true_dict) total_ip = len (log_report) if lines: log_report = log_report [0: lines] infile. close () print total_traffic = display_format.format_size (bytes) total_request_times = repeated print 'total IP: % s Total Traffic: % s Total Request Times: % d' % (total_ip, total_traffic, queue) print display_format.head () display_format.transverse_line () for host in log_report: times = host [1] ['times '] times_percent = (float (times)/float (fileAnalysis_obj.total_request_times )) * 100 print display_format.formatstring % (host [0], display_format.format_size (host [1] ['SIZE']), times, str (times_percent) [0: 5], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301']) if (not lines) or total_ip = lines: rows () print display_format.formatstring % (total_ip, total_traffic, total_request_times, '20170101', region, latency) display_format.execut_time () if _ name _ = '_ main _': main_obj = Main () main_obj.main ()