Temporary help the company research and development colleagues to deal with Nginx Log, make a script, with the line, execution efficiency, can also, a g of the file, within a minute can be processed for your reference.
#!/usr/bin/env python
Import OS
Import re
Import Fileinput
LogFile = r "/data/log20140904/nginx/192.168.1.50"
#定义正则匹配变量
ip = r "? P<ip>[\d.] *"
TIMEP = r "" "? P<time>\[[^\[\]]*\] "" "
Request = R "" "? P<request>\ "
[^\"]*\"
"""
Status = R "? P<status>\d+ "
Bodybytessent = r "? P<bodybytessent>\d+ "
Refer = R "" "? P<refer>
[^\"]*\"[^\"]*\"
"""
Useragent=r "" "? P<useragent>
\s*
"""
Forwardr=r "" "? P<forwardr>
[^\"]*
"""
Request_time=r "" "? P<request_time>
[^\"]*
"""
Response_time=r "" "? P<response_time>
[^\"]*
"""
# Reference Variable match
p = Re.compile (r "(%s) \-\-\ (%s) \ (%s) (%s) \ (%s) \ (%s) (%s) \ (%s)"% (Ip,timep,request,status,bodybytessent,refer, USERAGENT,FORWARDR), Re. VERBOSE)
# define the Read directory file function and analyze the processing
def logfiledir (Filedir):
For file in Os.listdir (Filedir):
#print File,filedir
Backfile = Filedir + '/' + file.split ('. ') [0] + '. ' + ' txt '
Print Backfile
LogFile = Filedir + '/' + file
#print logfile
If Os.path.isfile (logfile):
BF = open (Backfile, ' W ')
For line in open ('%s '%logfile, ' R '). ReadLines ():
#print Line,
Tline = Re.match (R ' ^10.168.*.* ', line,re. M|re. I)
If Tline:
Print Tline.group ()
Else
Matchs = P.match (line)
#print Matchs
If Matchs!=none:
Allgroups = Matchs.groups ()
ip = allgroups[0]
Time = Allgroups[1].split () [0][1:]
Request = Allgroups[2].split () [1]
Status = Allgroups[3]
Bodybytessent = Allgroups[4]
Refer = Allgroups[5].split (': ', 1) [1]
useragent = Allgroups[6]
Forwardr = Allgroups[7]
#print ip,time,request,status,bodybytessent,refer,forwardr, ' \ n ',
Bf.writelines ('%s%s%s%s\n '% (ip,time,request,refer))
Bf.close ()
# Call function
if __name__ = = "__main__":
Logfiledir (logfile)
Python parsing and handling Nginx logs