Below for you to share a Python log incremental crawl implementation method, with a good reference value, I hope to be helpful to everyone. Come and see it together.
The examples are as follows:
Import timeimport pickleimport osimport reclass Logincscaner (object): Def __init__ (Self,log_file, reg_ex,seek_file= '/ Tmp/log-inc-scan.seek.temp '): Self.log_file = log_file SELF.REG_EX = reg_ex self.seek_file = seek_file def scan ( Self): seek = Self._get_seek () File_mtime = Os.path.getmtime (self.log_file) if File_mtime <= seek[' time ']: print ' file mtime not since last scan ' seek[' time ' = File_mtime Self._dump_seek (Seek) return [] File_size = Os.path.getsize (self.log_file) if file_size <= seek[' position ']: print ' file size not change sinc E last scan ' seek[' position "= File_size Self._dump_seek (Seek) return [] print ' File Changed,start to S Can ' matchs = [] with open (Self.log_file, ' RB ') as Logfd:logfd.seek (seek[' position '],os. Seek_set) for match in Re.finditer (SELF.REG_EX, Logfd.read ()): Matchs.append (match) Seek = {' time ': time.t IME (), ' position ': Logfd.tell ()} Print Seek Self._dump_seek (Seek) return matchs def _get_seek (self): seek = {' time ': Time.time (), ' position ': 0} if Os.path. Exists (Self.seek_file): With open (Self.seek_file, ' RB ') as Seekfd:try:seek = Pickle.load (SEEKFD) Except:pass Print seek return seek def _dump_seek (self, seek): with open (Self.seek_file, ' W B ') as Seekfd:pickle.dump (SEEK,SEEKFD) def reset_seek (self): Self._dump_seek ({' Time ': Time.time (), ' position ': 0}) I F __name__ = = "__main__": Scaner = Logincscaner ('/var/log/messages ', R ' (\w+ \d+ \d+:\d+:\d+). +?exception ') scaner.reset_ Seek () while True:matchs = Scaner.scan () for match in Matchs:print ' fond at: ' + match.group (1) + ' content: ' + match.group (0) time.sleep (5)