We often encounter this situation in the operation of data, the need for some environment-generated log data analysis, and some large company friends, often do not have the authority of the server, so we need to automatically import data into MySQL, this content is based on.
Article Focus:
1st log files are read by row, prevent logging data from being too large, use row reads when operating, and do not occupy memory space
2 database using long links, write operations, MySQL long connection article in our "Python MySQL Long Connection" is written to "http://blog.csdn.net/wzm112/article/details/7745835"
3rd log data consolidated into a single logging directory, because I use log in the actual production environment as a logging directory do not adjust here
4 automatically creates the same table name as the log file in the specified database. Automatic data import work, of course, you can make their own improvements, make rules , according to your definition of the rules of import, such as a file or a few suffixes to import, data storage way these can be defined in advance rules.
5 on the increment log, read from the increment point, and the record increment position is the file name. seek files
6 scheduled Delete tasks, every three o'clock in the evening (according to the time in log.time) delete log files (TXT) and file names in the log. Seek (statistics read file location)
7 The entire operation is based on the basic files, where the operation of the log file suffix is txt, each person can adjust according to each situation
8 execution mode, use this file as daemon, using Nohup method, suspend on server
#!/usr/bin/env python #coding =utf-8 import os,sys,time,traceback from libs.mysql import MySQL from libs.config import _con Fig #调用配置文件, Profile resolution class reload (SYS) sys.setdefaultencoding (' UTF8 ') class read_log:def __init__ (self): self.dir_p Ath = Os.path.dirname (Os.path.dirname (Os.path.dirname (__file__))) #找到项目的根目录 self.logdbconfig =_config[' Db_confi
G '] #读取数据库配置 self.logdb = None self.get_log_conn () #连接数据库 Pass
def get_log_conn (self): host = self.logdbconfig[' host '] user = self.logdbconfig[' user '] passwd = self.logdbconfig[' passwd '] db = self.logdbconfig[' db '] self.logdb = MySQL (ho St, user, passwd, DB) def write_time_log (self,itime = 0): #写入当前时间 f = open (Os.path.join (Self.dir_path, ' Log_
Time_log.time '), ' W ') F.write (str (itime)) F.close () def read_time_log (self): #读出时间 #return Int (time.strftiMe ('%H ', Time.localtime (Time.time ())) _f_name = Os.path.join (Self.dir_path, ' log_time_log.time ') if Os.path . Exists (_f_name): F = open (_f_name, ' r ') result = F.read () f.close () if Res Ult:return Long return 0 def delete_txt_file (self): #删除txt后缀的日志文件和txt. Seek suffix file for f
Ilename in Self.read_dir (): Idir = '%s/log '% self.dir_path fname = '%s/%s '% (idir,filename)
Try:os.remove (fname) os.remove ('%s.%s '% (fname, ' seek ')) except:
Pass def Run (self): #主执行程序 while True:self.read_file_content () #将日志写入数据库 If Self.read_time_log () ==0 and int (time.strftime ('%H ', Time.localtime (Time.time ())) ==3: #每晚3点进行日志清理 SE Lf.delete_txt_file () self.write_time_log (int (time.time ())) Elif Int (time.strftime ('%H '), Time.lo Caltime (Time.time ())) = =3 and Time.strftime ('%y%m%d ', Time.localtime (Time.time ()))!= time.strftime ('%y%m%d ', Time.localtime (self.read_time_ Log ()): Self.delete_txt_file () self.write_time_log (int (time.time ()) TIME.SL
Eep #程序循环操作, cycle every 1 minutes, time according to their own situation def create_table (self,table_name = '): #创建日志写入的表结构 if table_name: sql = ' CREATE TABLE IF not EXISTS ' log_%s ' (' id ' int (one) not NULL auto_in Crement, ' task_id ' varchar NOT null DEFAULT ' 0 ', ' entid ' varchar ' is not NULL DEFA
ULT ' 0 ', ' source ' varchar NOT null DEFAULT ' 0 ', ' data ' longtext NOT NULL,
' Create_time ' timestamp not NULL DEFAULT current_timestamp, PRIMARY KEY (' id ')
) Engine=myisam DEFAULT Charset=utf8 auto_increment=1 '% table_name self.logDb.query (SQL) def write_seek (self,filename = ', seek = 0: #写入当前的seek位置 f = open (Os.path.join (Self.dir_path, '%s.seek '% filename), ' W ') f.write (str (SE EK)) f.close () def read_seek (self,filename = ', seek = 0): #读取文件的seek位置 _f_name = Os.path.join (self . Dir_path, '%s.seek '% filename) if os.path.exists (_f_name): F = open (_f_name, ' r ') result = F.read () f.close () if Result:return long (result) return 0 def read_dir (self):
#读取日志目录的包含. txt suffix file print self.dir_path idir = '%s/log '% self.dir_path print Idir return [row for row in Os.listdir (idir) if ' txt ' in row and ' seek ' not in row] def read_file_content (self): #将所 There are compliant files read to the corresponding database for filename in Self.read_dir (): Idir = '%s/log '% self.dir_path fname =
'%s/%s '% (idir,filename) if Len (filename) >4:table_name = filename[:-4] Self.create_table (Table_Name) #create table Self._read_content (fname,table_name) #read file name def _read_content ( Self,filename = ', table = '): #子方法, read a single file, write a single row to the database File_seek = Self.read_seek (filename) f = open (filename
, ' R ') F.seek (File_seek) #找到上次读完的seek文件, continue reading and writing from while True: _row = F.readline () #单行, looping
File_seek +=len (_row) #计算seek位置 Self.write_seek (Filename,file_seek) #保持新的seek位置
Result_row = ' Try:if _row.find (' {') ==0: #查找符合条件的字典数据, this according to their own circumstances, if their own log content according to the fixed can not need to verify
Result_row = eval (_row) #字符转字典 task_id = Result_row.get (' msgId ', ') Entid = Result_row.get (' Entid ', ') Source = result_row.get (' source ', ') dat A = _row.replace ("\", "\\\\"). Replace ("'", "\ \ \") sql = "INSERT into ' log_%s ' (' task_id ', ' Entid '), ' Source ', ' data ') VALUES ('%s ', '%s ', '%s ', '%s ') '% (table,task_id,entid,source,data) self.logDb.query (SQL) ex Cept:pass If not _row:break f.close () _read_log = Read_log () if __name__== ' __main__
': _read_log.run ()