The boss has a request, said to see a URL daily visits, but the system in the development of the time did not do such a count, so I think, because the previous load using Nginx do, have access logs, try to analyze the results from the access log, the final effect is realized, Perhaps the efficiency is not so high, the logic is not so reasonable, at least the effect reached, I rookie one, if there is no, please do not spray, exchange just, right, not communication.
The script content is:
#!/usr/bin/python# _*_coding:utf-8 _*_import osimport shutilimport sysimport reimport randomimport timefrom xlrd.formula import nop# initialization System reload (SYS) Sys.setdefaultencoding (' Utf-8 ') #逻辑为: #1, copy the target file to the Temp directory # #, analyze the target temporary file, first find what date it contains, and create an access log file for each date based on the date # #, with a matching date, concatenation matches the regular expression of Access URL # #, progressive analysis of temporary files, the corresponding date, match the expression to the corresponding date of the log record, after the analysis completed on the number of files, and then write to the total record file # #, can be achieved: according to the date of statistics daily URL visits, Access log details for each access #################################### #搜索结果保存路径save_result = "/users/liuli/desktop/access" Log_file_ Source= "/users/liuli/desktop/test.log" #################################### #拷贝文件def copyfiles (SourceFile, targetfile): open (targetfile, "WB"). Write (Open (sourcefile, "RB"). Read () ) if os.path.exists (Save_result): shutil.rmtree (Save_result) os.makedirs (Save_result) else: os.makedirs (save_result) #数文件行数def count_lines ( File): try: fp = open (file, "R") return str (Len (Fp.readlines ())) except Exception,e: return print e sys.exit (0) finally: fp.close () #正则匹配def iscontain (regrex, strings): try: pattern = re.compile (Regrex, re. S) item = pattern.search (strings). Group () return item except exception, e: return print e sys.exit (0) #获取今天对应的月份def get_today_month3 (): isotimeformat = '%B ' return str (Time.strftime (Isotimeformat, time.localtime ()) [0:3]) # Get today's corresponding date def get_today_day (): isotimeformat = '%d ' return str (Time.strftime (Isotimeformat, time.localtime ())) #往文件中写内容def write_to_file (file, strings): if os.path.isfile (file): Try: file_object = open (file, "a") file_object.write (strings) except Exception, e: print e finally: file_object.close () else: try: file_object = open (file, "w") file_object.write (strings) except exception, e: print e finally: File_object.close () #将nginx的日志格式写入到日志文件中!write_to_file (save_result + "/log_format.txt", ' $remote _addr - $remote _user [$time _local] \ "$request \" ' $status $body _bytes_sent \ "$http _referer\" " " \ "$http _user_agent\" $http _x_forwarded_for \ "$upstream _addr\" \ "$upstream _status\" \ "$upstream _response_time\" \ "$request _time\" ') #初始化num = random.randrange (10086, 1008611) log_file = "/TMP/NGINX_COUNTER_TMP_" + str (num) + ". Log" #不在源文件上分析, Copy the source files to the Temp directory CopyFiles (log_file_source, log_file) days=[]all_regrex=[]forword_regrex= "^[0-9". ([0-9]{1,3}\.) {3} [0-9] {1,3}\ -\ -\ "day_regrex=" (\[) ([0-3]{1}) ([0-9]{1}) (\ \) (\w{3}) (/) (\d{4}) "conn_regrex=" ([\s\s]*) "Count_regrex=" ((GET) | ( POST)) (\ ) (\ \) (Tserv) (\ ) (HTTP) ([\s\s]*) "#获取日期列表daysf =open (Log_file," R ") Line = f.readline ( ) I=0while line: pattern = re.compile (Day_regrex, re. S) if pattern.search (line) is None : day111 = ' 1 ' else: item=pattern.search (line) group () regrexs = forword_regrex+ "\" + item + conn_regrex + count_regrex pattern1 = re.compile (Regrexs, re. S) if pattern1.search (line) is None : day111 = ' 1 ' else: Item1 = pattern1.search (line) group () write_to_file (save_result+ "/"  + STR (item). replace ("[", ""). Replace ("/", "_"). Replace ("]", ""). Replace (":", "_") + ". txt", str (ITEM1)) line = F.readline () #记录结果格式化日志: F.close () os.remove (log_file) #匹配内容并写入分体文件 # Create a record file and write the number of rows per file "Traffic" to the file for file In os.listdir (save_result): write_to_file (save_result+ "/count_save.txt", file+ " lines " +count_lines (save_result+ "/" +file) + "\ n")
Python implements statistics on Nginx's access log