Function:
1. Database Design Establishment Database
2.Python parsing Excel files
3.Python read file name and parse
4. Store the parsed data in the library
Set up a database
Based on the requirements of the database, the establishment of two tables, and ensure that the data can be stored in the existing database, the code is as follows:
Import Sqlite3def CreateDatabase (): CN = Sqlite3.connect (' check.db ') cn.execute ("' CREATE TABLE IF not EXISTS Tb_check (ID Integer PRIMARY KEY autoincrement,number integer,item text,reference text,summary text,object text,method TEXT, CONDITION text,value text,result text,score text,remarks text,province text,time TEXT); Cn.execute ("" CREATE TABLE IF not EXISTS tb_score (ID integer PRIMARY KEY autoincrement,province text,time text,filetype TE Xt,score INTEGER); if __name__ = = ' __main__ ': CreateDatabase ()
Two using Python to parse Excel
The XLRD module in Python is used to parse Excel. The relevant features are described below:
1. Import
Import xlrd
2. Reading data
data = Xlrd.open_workbook (' File.xls ')
3. function
(1) Get by index
Table = Data.sheet () [0]
Table = Data.sheet_by_index (0)
(2) Obtaining by name
Table = data.sheet_by_name (U ' Sheet1 ')
(3) Gets the value (array) of the entire row and the entire column
Table.row_values (i)
Table.col_values (i)
(4) Get the number of rows and columns
nrows = Table.nrows
Ncols = Table.ncols
(5) Cyclic row list data
For I in Range (nrows):
Print Table.row_values (i)
(6) cell
CELL_A1 = Table.cell (0,0). Value
(7) using the row and column index
CELL_A1 = Table.cell (0,0). Value
Practice Code:
Import xlrdimport xlwtfrom datetime import Date,datetimedef read_excel (): # Open File Workbook = Xlrd.open_workbook (R ' File.xls ') # get all Sheetsheet_name = Workbook.sheet_names () [0]sheet = Workbook.sheet_by_name (sheet_name) #获取一行的内容for I in range (6, Sheet.nrows): For J in Range (0,sheet.ncols):p rint Sheet.cell (i,j). Value.encode (' utf-8 ') if __name__ = = ' __main__ ': read_ Excel ()
Three Python reads the file name and resolves
In order to distinguish the data of each file, you need to put in the file name of the marked fields into the library, the code to parse the files are as follows:
Import Osdef getfilelist (dir,wildcard,recursion): os.chdir (dir) fileList = [] check_province = [] Check_time = [] file_type = [] exts = Wildcard.split ("") files = Os.listdir (dir) for name in files: F Ullname=os.path.join (Dir,name) if (Os.path.isdir (FullName) & recursion): getfilelist (FullName, Wildcard,recursion) Else: for ext in exts: if (name.endswith (EXT)): filelist.append (name) check_province.append (Name.split ('-') [1]) check_time.append (Name.split ('-') [0]) file_ Type.append (Name.split ('-') [2]) return Filelist,check_time,check_province,file_type
In the next use will encounter coding problems, so in the use of these fields need to transcode first, write transcoding function as follows:
#转码函数
def changecode (name):
Name = Name.decode (' GBK ')
Name = Name.encode (' UTF-8 ')
return name
Four-parse an Excel file and store it in SQLite
Python connection Database selection of the Python-brought SQLite database is relatively simple here do not do too much if you have doubts about Python operation SQLite Personal recommendation Rookie Tutorial ~
The following is an analysis of the Excel file and stored in the database, which contains the determination of the contents of cells:
def readexcel (filename,cn,check_province,check_time,filetype): #读取 workbook = xlrd.open_workbook (filename) # get sheet S Heet_name = Workbook.sheet_names () [0] sheet = workbook.sheet_by_name (sheet_name) Check_item = ' a ' itemCount = 0 Scor E = 0 second = Sheet.cell (7,1). Value.encode (' Utf-8 ') for I in Range (7,sheet.nrows): If Sheet.cell (i,1). Value.encode (' utf-8 ') = = Second:check_item = Sheet.cell (i,0). Value.encode (' utf-8 ') continue temp = [] for J in Range (0,sheet.ncols): Temp.append (Sheet.cell (i,j). Value.encode (' utf-8 ')) answer = Sheet.cell (i,7). Valu E.encode (' utf-8 ') if answer = = "Yes" or answer = = "No": Score = score + 1 if answer = = "Other": Print "!!! Failed to import '%s '% (filename) print!!! Please Choose the Answer for '%s '--------"% (filename) to break Else:cn.execute (" insert INTO Tb_c HECK (Item,field,type,content, "" Attribute,checkpoint,remarkS,answer,description, "" Suggestion,province,time,style) "" Values ('%s ', '%s ', '%s ', ' %s ', '%s ', '%s ', '%s ', '%s ', '%s ', '%s ', '%s ', '%s ', '%s ') ' ""% (temp[0],temp[1],temp[2],temp[3],temp[4],temp[5) , temp[6],temp[7],temp[8],temp[9],check_province,check_time,check_item)) ItemCount = ItemCount + 1 if ItemCount! = 0:score = Round (Score * (100/itemcount), 2) Cn.execute ("INSERT into Tb_score (Province,time,filetype,score) "Values ('%s ', '%s ', '%s ', '%.2f ')"% (Check_province,check_time,filetype,score)) print "Successful for '%s '- -------"% (filename) cn.commit ()
Integrate the above features:
def importdata (path): # DB createdatabase () database = Sqlite3.connect ("check.db") #文件类型 Wildcard = ". xls" list = Getfilelist (path,wildcard,1) nfiles = Len (list[0]) #文件名 file = list[0] #时间 time = list[1] #省份 province = list[2] # #文件类型 FileType = list[3] for count in range (0, Nfiles): filename = file[count] check_province = ChangeCode (Province[count]) check_time = Time[count] File_type = ChangeCode (Filetype[count]) Readexcel (filename,database,check_province,check_time,file_ Type) If __name__ = = ' __main__ ': If Len (sys.argv)! = 2: print "wrong Parameters" else: path = SYS.ARGV[1] importdata (path)
Python parses the Excel file and stores it in the SQLite database