This article describes how to use the python script to calculate the ip address access times in log files. Note that this script only applies to log files whose ip addresses start with each line. For more information, see
Applicable log formats:
106.45.185.214 - - [06/Aug/2014:07:38:59 +0800] "GET / HTTP/1.0" 200 10 "-" "-"171.104.119.22 - - [06/Aug/2014:08:55:01 +0800] "GET / HTTP/1.0" 200 10 "-" "-"27.31.238.242 - - [06/Aug/2014:09:43:19 +0800] "GET / HTTP/1.0" 200 10 "-" "-"218.65.202.131 - - [06/Aug/2014:10:33:59 +0800] "GET / HTTP/1.0" 200 10 "-" "-"
The above are nginx logs. this program is only applicable to this format where IP addresses are separated by spaces at the beginning.
Instance code:
import sys class Log: def __init__(self, filename,dic,count): self.filename = filename self.dic=dic self.count=count def parse(self): i=1 f=file(self.filename) while True: line=f.readline() if len(line)==0: break ip=line.split(' ') if ip[0] in dic: self.dic[ip[0]]=self.dic[ip[0]]+1 else: self.dic[ip[0]]=i soredic=sorted(self.dic.items(), key=lambda d:d[1],reverse=True) counts=0; for item in soredic: if counts==int(self.count): break print("IP:%s Total Times: %s"%(item[0],item[1])) counts=counts+1 f.close() if __name__=="__main__": if len(sys.argv)<3: print('usage:log.py log.log toptimes\nexample log.py log.log 20\ncode by iswin') sys.exit() dic={} log=Log(sys.argv[1],dic,sys.argv[2]) log.parse()