一、建立資料庫
根據需求建立資料庫,建立了兩個表,並保證了可以將資料存放區到已有的資料庫中,代碼如下:
import sqlite3def createDataBase():cn = sqlite3.connect('check.db')cn.execute('''CREATE TABLE IF NOT EXISTS TB_CHECK(ID integer PRIMARY KEY AUTOINCREMENT,NUMBER INTEGER,ITEM TEXT,REFERENCE TEXT,SUMMARY TEXT,OBJECT TEXT,METHOD TEXT,CONDITION TEXT,VALUE TEXT,RESULT TEXT,SCORE TEXT,REMARKS TEXT,PROVINCE TEXT,TIME TEXT);''')cn.execute('''CREATE TABLE IF NOT EXISTS TB_SCORE(ID integer PRIMARY KEY AUTOINCREMENT,PROVINCE TEXT,TIME TEXT,FILETYPE TEXT,SCORE INTEGER);''')if __name__ == '__main__':createDataBase()
二、使用Python解析excel
Python中的xlrd模組用來解析excel。
相關功能介紹如下:
1. 匯入
2. 讀取資料
data = xlrd.open_workbook('file.xls')
3. 功能
(1) 通過索引擷取
table = data.sheet()[0]table = data.sheet_by_index(0)
(2)通過名稱擷取
table = data.sheet_by_name(u'sheet1')
(3)擷取整行和整列的值(數組)
table.row_values(i)table.col_values(i)
(4)擷取行數和列數
nrows = table.nrowsncols = table.ncols
(5)迴圈行列表資料
for i in range(nrows):print table.row_values(i)
(6)儲存格
cell_A1 = table.cell(0,0).value
(7)使用行列索引
cell_A1 = table.cell(0,0).value
練習代碼:
import xlrdimport xlwtfrom datetime import date,datetimedef read_excel():# 開啟檔案workbook = xlrd.open_workbook(r'file.xls')# 擷取所有sheetsheet_name = workbook.sheet_names()[0]sheet = workbook.sheet_by_name(sheet_name)#擷取一行的內容for i in range(6,sheet.nrows):for j in range(0,sheet.ncols):print sheet.cell(i,j).value.encode('utf-8')if __name__ == '__main__':read_excel()
三、Python讀取檔案名稱並解析
為了將各個檔案的資料加以區分,需要將檔案名稱中標誌性欄位入庫,解析檔案的代碼如下:
import osdef getFileList(dir,wildcard,recursion): os.chdir(dir) fileList = [] check_province = [] check_time = [] file_type = [] exts = wildcard.split(" ") files = os.listdir(dir) for name in files: fullname=os.path.join(dir,name) if(os.path.isdir(fullname) & recursion): getFileList(fullname,wildcard,recursion) else: for ext in exts: if(name.endswith(ext)): fileList.append(name) check_province.append(name.split('-')[1]) check_time.append(name.split('-')[0]) file_type.append(name.split('-')[2]) return fileList,check_time,check_province,file_type
在接下來的使用中 會遇到編碼問題 所以在使用這些欄位時需要先轉碼,編寫轉碼函數如下:
#轉碼函數def changeCode(name): name = name.decode('GBK') name = name.encode('UTF-8') return name
四、解析excel檔案並將其儲存到sqlite
Python串連資料庫 選取了Python內建的sqlite資料庫 相對簡單 在此不做太多介紹 如果大家對Python操作sqlite有疑惑的話 個人推薦菜鳥教程~
下面是解析excel檔案並存入資料庫,其中包含了判斷儲存格內容:
def readExcel(filename,cn,check_province,check_time,FileType): #讀取 workbook = xlrd.open_workbook(filename) # 擷取sheet sheet_name = workbook.sheet_names()[0] sheet = workbook.sheet_by_name(sheet_name) check_Item = 'a' itemCount = 0 score = 0 second = sheet.cell(7,1).value.encode('utf-8') for i in range(7,sheet.nrows): if sheet.cell(i,1).value.encode('utf-8') == second: check_Item = sheet.cell(i,0).value.encode('utf-8') continue temp = [] for j in range(0,sheet.ncols): temp.append(sheet.cell(i,j).value.encode('utf-8')) answer = sheet.cell(i,7).value.encode('utf-8') if answer == "yes" or answer == "no": score = score + 1 if answer == "other": print "!!!Failed to import'%s'" % (filename) print "!!!Please Choose an Right Answer for '%s'--------"%(filename) break else: cn.execute("insert into TB_CHECK (ITEM,FIELD,TYPE,CONTENT," "ATTRIBUTE,CHECKPOINT,REMARKS,ANSWER,DESCRIPTION," "SUGGESTION,PROVINCE,TIME,STYLE) " "values('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" ""%(temp[0],temp[1],temp[2],temp[3],temp[4],temp[5],temp[6],temp[7],temp[8],temp[9],check_province,check_time,check_Item)) itemCount = itemCount + 1 if itemCount != 0: score = round(score * (100 / itemCount), 2) cn.execute("insert into TB_SCORE (PROVINCE,TIME,FILETYPE,SCORE) " "values('%s','%s','%s','%.2f')"%(check_province,check_time,FileType,score)) print "Successful for'%s'--------" % (filename) cn.commit()
整合上述功能:
def importData(path): # 資料庫 createDataBase() database = sqlite3.connect("check.db") #檔案類型 wildcard = ".xls" list = getFileList(path,wildcard,1) nfiles = len(list[0]) #檔案名稱 file = list[0] #時間 time = list[1] #省份 province = list[2] # #檔案類型 FileType = list[3] for count in range(0,nfiles): filename = file[count] check_province = changeCode(province[count]) check_time = time[count] File_type = changeCode(FileType[count]) readExcel(filename,database,check_province,check_time,File_type)if __name__ == '__main__': if len(sys.argv) != 2: print "Wrong Parameters" else: path = sys.argv[1] importData(path)
總結
以上就是這篇文章的全部內容了,希望本文的內容對大家學習或者使用python能有所協助,如果有疑問大家可以留言交流。