Python analyzes apache access log script sharing, pythonapache

Source: Internet
Author: User
Tags apache access log apache log

Python analyzes apache access log script sharing, pythonapache

#! /Usr/bin/env python # coding = UTF-8 # ---------------------------------------------------- # Name: Apache Log Analysis script # Purpose: This script is only used to analyze Apache access logs # Version: 2.0 # Hor: LEO # Created: 2013-4-26 # Modified: 2013-5-4 # Copyright: (c) LEO 2013 # using import sysimport time # This class is used to print the format class displayFormat (object): def format_size (self, size): ''' format the traffic unit ''' KB = 1024 MB = 1048576 GB = 1073741824 TB = 1099511627776 if size> = TB: size = str (size/TB) + 't'elif size <KB: size = str (size) + 'B' elif size> = GB and size <TB: size = str (size/GB) + 'G' elif size> = MB and size <GB: size = str (size/MB) + 'M' else: size = str (size/KB) + 'K' return size formatstring = '%-15 s %-10 s %-12 s % 8 s % 10 s % 10 s % 10 s % 10 s % 10 s % 10 s % 10 s % 10 s % 10s 'def transverse_line (self): '''Output crossline''' print self. formatstring % ('-' * 15, '-' * 10, '-' * 12, '-' * 12, '-' * 10, '-' * 10, '-' * 10, '-' * 10, '-' * 10, '-' * 10, '-' * 10) def head (self ): '''output header information''' print self. formatstring % ('IP', 'traffic ', 'times', 'times % ', '000000', '000000', '000000', '000000', '000000 ', '000000', '000000') def error_print (self): ''' output error message ''' print 'usage: '+ sys. argv [0] + 'apachelogfilepath [Number] 'print sys. exit (1) def execut_time (self): ''' output script Execution Time ''' print "Script Execution Time: %. 3f second "% time. clock () print # This class is the dictionary used to generate host information. class hostInfo (object): host_info = ['000000', '000000', '000000', '000000 ', '123', '123', '123', 'times ', 'SIZE'] def _ init _ (self, host): self. host = {}. fromkeys (self. host_info, 0) def increment (self, status_times_size, is_size): ''' this method is used to add 1 ''' if status_times_size = 'times 'to each value in host_info ': self. host ['times '] + = 1 el If is_size: self. host ['SIZE'] = self. host ['SIZE'] + status_times_size else: self. host [status_times_size] + = 1 def get_value (self, value): ''' this method obtains the corresponding value '''return self in each host information. host [value] # This class is used to analyze the file class fileAnalysis (object): def _ init _ (self): ''' Initialize an empty dictionary ''' self. report_dict ={} self. total_request_times, self. total_traffic, self. total_200, self. total_404, self. total_500, self. total_403, self. total_302, Self. total_304, self. total_503 = 0, 0, 0, 0, 0, 0 def split_eachline_todict (self, line): ''' splits each row in the file and returns a dictionary ''' split_line = line. split () split_dict = {'remote _ host': split_line [0], 'status': split_line [-2], 'bytes _ sent ': split_line [-1],} return split_dict def generate_log_report (self, logfile): ''' reads the file and analyzes the dictionary ''' for line in logfile: try: line_dict = self. split_eachline_todict (l Ine) host = line_dict ['remote _ host'] status = line_dict ['status'] failed t ValueError: continue failed t IndexError: continue if host not in self. report_dict: host_info_obj = hostInfo (host) self. report_dict [host] = host_info_obj else: host_info_obj = self. report_dict [host] host_info_obj.increment ('times ', False) if status in host_info_obj.host_info: host_info_obj.increment (status, False) try: bytes _ Sent = int (line_dict ['bytes _ sent ']) handle T ValueError: bytes_sent = 0 host_info_obj.increment (bytes_sent, True) return self. report_dict def return_sorted_list (self, true_dict): ''' calculates the number of states, total traffic, and total number of requests, calculates the total number of States, and generates a dictionary of authenticity, sort ''' for host_key in true_dict: host_value = true_dict [host_key] times = host_value.get_value ('times ') self. total_request_times = self. total_request_times + times size = hos T_value.get_value ('SIZE') self. total_traffic = self. total_traffic + size o200 = host_value.get_value ('000000') o404 = host_value.get_value ('000000') o500 = host_value.get_value ('000000') o403 = host_value.get_value ') o302 = host_value.get_value ('000000') o304 = host_value.get_value ('000000') o503 = host_value.get_value ('000000') true_dict [host_key] = {'000000': o200 ': o404, '000000': o500, '000000': o403, '000000': o302, '000000' ': O304, '000000': o503, 'times': times, 'SIZE': size} self. total_200 = self. total_200 + o200 self. total_404 = self. total_404 + o404 self. total_500 = self. total_500 + o500 self. total_302 = self. total_302 + o302 self. total_304 = self. total_304 + o304 self. total_503 = self. total_503 + o503 sorted_list = sorted (true_dict.items (), key = lambda t :( t [1] ['times '], t [1] ['SIZE']), reverse = True) return sorted_list class Main (object): def main (self): '''main function ''' display_format = displayFormat () arg_length = len (sys. argv) if arg_length = 1: display_format.error_print () elif arg_length = 2 or arg_length = 3: infile_name = sys. argv [1] try: infile = open (infile_name, 'R') if arg_length = 3: lines = int (sys. argv [2]) else: lines = 0 distinct T IOError, e: print e display_format.error_print () Comment t ValueError: Print "Please Enter A Volid Number !! "Parse () else: display_format.error_print () fileAnalysis_obj = fileAnalysis () not_true_dict = encrypt (infile) log_report = warn (not_true_dict) total_ip = len (log_report) if lines: log_report = log_report [0: lines] infile. close () print total_traffic = display_format.format_size (bytes) total_request_times = repeated print 'total IP: % s Total Traffic: % s Total Request Times: % d' % (total_ip, total_traffic, queue) print display_format.head () display_format.transverse_line () for host in log_report: times = host [1] ['times '] times_percent = (float (times)/float (fileAnalysis_obj.total_request_times )) * 100 print display_format.formatstring % (host [0], display_format.format_size (host [1] ['SIZE']), times, str (times_percent) [0: 5], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301'], host [1] ['20160301']) if (not lines) or total_ip = lines: rows () print display_format.formatstring % (total_ip, total_traffic, total_request_times, '20170101', region, latency) display_format.execut_time () if _ name _ = '_ main _': main_obj = Main () main_obj.main ()

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.