python parsing Excel
Company background: OK LZ is too lazy.
The original cause: The company's boss said to export the company database to meet the criteria of the data, the source data is 4 million, meet the conditions of about 700,000.
Final purpose: Qualifying data and generating Excel
Cui Hua, on the code:
Because LZ Python's foundation is not very good only to write some simple scripts, all when it is recorded learning mileage. This is OPENPYXL, because it's supported by Excel2010.
# coding=utf-8from Openpyxl.workbook Import workbookfrom openpyxl.writer.excel import Excelwriterfrom openpyxl.styles Import Color, fillfrom Openpyxl.cell import cellimport datetimefrom pymongo import mongoclientimport pymongoimport smtpli Bfrom email. Mimetext Import mimetextfrom Email. Mimemultipart Import mimemultipartfrom Email. Mimebase Import mimebasefrom Email import encodersimport timemongodb = mongoclient (' beta-mongo01 ') # Company database using MongoDB Don't ask me why LZ doesn't know for wool. Database db_name = ' core ' db = Mongodb[db_name]rows = Db.customerProfiles.find () filters = ["Banking", "financial", "Securities", "Consulting", "bank", "Finance", "trusts", "funds", "Futures", "leases", "investments", "insurance", "accounting", "audits", "investment banks", "brokers", "equity", "risks", "finances", "wealth", "assets", #这是筛选条件 guess what we do ... def getfiltersbyexperiences (experiences): # This is the data filter for I in the Filters:if i.decode (' Utf-8 ') in Experiences:return truedef Getexperieces (Row): # filter Work experience try:experiences = "" I = 0while i < len (row["Workexperiences"]): Experiences = experiences + R ow["workexperiences"][i]["position"] + row["workexperiences"][i]["organization"]i =i +1Continuereturn getfiltersbyexperiences (experiences) except Exception as E:experiences = "" Return Getfiltersbyexperiences (experiences) def Getfiltersbyexpect (expect): # Another filter for I in Filters:if i.decode (' Utf-8 ') in Expect:return truedef getexpect (Row): # This is also try:expect = "" I = 0while i < len (row["Expect" ["Expectindustry"]): Expect = E Xpect + row["expect" ["Expectindustry"][i]i =i +1continuereturn getfiltersbyexpect (expect) except Exception as E:expect = "" Return Getfiltersbyexpect (expect) def getfullname (Row): # This is also try:if row["FullName"]! = "" and row["FullName"]! = None: Return Trueelse:return falseexcept Exception as E:return falsedef getemail (Row): # This is also try:if row["descriptions" [" Contactemail "]! =" "and row[" descriptions "[" Contactemail "]! = None:return Trueelse:return falseexcept Exception as E:re Turn falsedef getphone (Row): # This is also try:if row["descriptions" ["Contactphonenumber"]! = "" and row["descriptions" [" Contactphonenumber "]! = None:return Trueelse:return falseexcept Exception as E:returnFalsenow = Datetime.datetime.now () #新建一个workbookwb = Workbook () #第一个sheet是wsws = Wb.worksheets[0] #设置ws的名称ws. title = U " CV data "#给A1赋值ws. Cell (' A1 '). Value = '%s '% (" number ") Ws.cell (' B1 '). Value = '%s '% (" CustomerId ") Ws.cell (' C1 '). Value = '%s '% (" name ") Ws.cell (' D1 '). Value = '%s '% (" gender ") Ws.cell (' E1 '). Value = '%s '% (" location ") Ws.cell (' F1 '). Value = '%s '% (" Mailbox ") Ws.cell (' G1 '). Value = '%s '% ("phone") Ws.cell (' H1 '). Value = '%s '% ("formerly employed Position") Ws.cell (' I1 '). Value = '%s '% ("formerly employed Company") Ws.cell (' J1 '). Value = '%s ' '% ("desired industry") Ws.cell (' K1 '). Value = '%s '% ("Working year") Ws.cell (' L1 '). Value = '%s '% ("CV update Time") Ws.cell (' M1 '). Value = '%s '% ("resume Source" ) Count = 2 for row in rows:# loop fetch data if getfullname (row) = = True:if Getemail (row) = = True or Getphone (row) = = True:if Getexper Ieces (Row) = = True or getexpect (row) = = True:count = Count +1if count >200002:try:position = "" If Len (row["Workexperien Ces "]) = = 0:PASSELSE:I = 0 while I < Len (row[" Workexperiences "]):p osition = position + row[" workexperiences "][i][" posit Ion "] +"/"I =i +1continueexcept Exception as E:position = "Try:organization =" "If Len (row[" workexperiences "]) = = 0:PASSELSE:I = 0 while I < Len (row[" Workexperiences "]): organization = Organization + row["Workexperiences"][i]["organization"] + "/" I =i +1continueexcept Exception as E:organiz ation = "Try:expectedindustry =" "If Len (row[" Expect "[" expectedindustry "]) = = 0:PASSELSE:I = 0 while I < len (row[" exp ECT "[" Expectindustry "]): Expectedindustry = expectedindustry + row[" expect "[" Expectindustry "][i]i =i + 1continueexcept Exception as E:expectedindustry = "" try:# Start writing Excelws.cell (str (' A ' +str (count)). Value = '%s '% (str ( count-1)) Ws.cell (str (' B ' +str (count)). Value = '%s '% (str (row.get ("_id", ""))) Ws.cell (str (' C ' +str (count)). Value = '% S '% (Row.get ("FullName", "")) Ws.cell (str (' D ' +str (count))). Value = '%s '% (Row.get ("Gender", "")) Ws.cell (str (' E ' +str ( count)). Value = '%s '% (Row.get ("descriptions", "")). Get ("City", "")) Ws.cell (str (' F ' +str (count)). Value = '%s '% ( Row.get ("Descriptions", ""). Get ("Contactemail", "")) Ws.cell (str (' G ' +str (count)). Value ='%s '% (Row.get ("descriptions", ""). Get ("Contactphonenumber", "")) Ws.cell (str (' H ' +str (count)). Value = '%s '% ( Position) Ws.cell (str (' I ' +str (count)). Value = '%s '% (organization) Ws.cell (str (' J ' +str (count)). Value = '%s '% ( expectedindustry) Ws.cell (str (' K ' +str (count)). Value = '%s '% (str (row.get ("descriptions", ""). Get ("Worklife", "")) + " ") Ws.cell (str (' L ' +str (count))). Value = '%s '% (str (row.get (" UpdateTime "," 2015-05-05 00:00:00 ")) [0:11]) Ws.cell (str ( ' M ' +str (count)). Value = '%s '% (str (row.get ("source", ""))) except Exception as E:count = Count +1continueprint (count) if Count = = 500002:breakelse:continueelse:continueelse:continueelse:continue# #修改某一列宽度ws. column_dimensions["A"]. Width =10.0ws.column_dimensions["B"].width =25.0ws.column_dimensions["C"].width =10.0ws.column_dimensions["D"]. Width =10.0ws.column_dimensions["E"].width =15.0ws.column_dimensions["F"].width =20.0ws.column_dimensions["G"]. Width =15.0ws.column_dimensions["H"].width =25.0ws.column_dimensions["I"].width =35.0ws.column_dimensions["J"].WIdth =35.0ws.column_dimensions["K"].width =15.0ws.column_dimensions["L"].width =15.0ws.column_dimensions["M"]. Width =10.0# file namefile_name = str (now.strftime ("%y%m%d")) + "(2). xlsx" #文件存放地址file_dir = '/usr/src/python ' # # Save Build xlsxwb.save (filename = str (file_name)) ew = excelwriter (workbook = WB)
There are a number of ways to find records from the internet ...
Python reads the Excel file code:
Use XLRD to read files and use XLWT to generate Excel files (you can control the formatting of cells in Excel). However, it is not possible to use XLRD to read Excel, and XLWT generates Excel files that cannot be modified on the basis of an existing Excel file, such as the need to modify a file to use the Xluntils module. The Pyexcelerator module is similar to XLWT and can also be used to generate Excel files.
#coding =utf-8####################################################### #filename: test_xlrd.py#author:defias#date: Xxxx-xx-xx#function: Reading data from an Excel file ###################################################### #import xlrd# Open a workbookworkbook = Xlrd.open_workbook (' E:\\code\\python\\testdata.xls ') #抓取所有sheet页的名称worksheets = Workbook.sheet_names () print (' Worksheets is%s '%worksheets) #定位到sheet1worksheet1 = workbook.sheet_by_name (U ' Sheet1 ') "" "#通过索引顺序获取worksheet1 = Workbook.sheets () [0] #或worksheet1 = workbook.sheet_by_index (0)" "" "" #遍历所有sheet对象for Worksheet_name in Worksheets:worksheet = Workbook.sheet_by_name (worksheet_name) "" "#遍历sheet1中所有行rownum_rows = Worksheet1.nrowsfor Curr_row in Range (num_rows): row = Worksheet1.row_values (curr_row) print (' row%s is%s '% (Curr_row, Row)) #遍历sheet1中所有列colnum_cols = Worksheet1.ncolsfor Curr_col in range (num_cols): col = worksheet1.col_values (curr_col) Print (' col%s is%s '% (curr_col,col)) #遍历sheet1中所有单元格cellfor Rown in range (num_rows): For Coln in Range (num_cols): cell = Wor KshEet1.cell_value (ROWN,COLN) Print Cell "" "#其他写法: Cell = Worksheet1.cell (ROWN,COLN). Valueprint cell# or cell = Worksheet1.row (Rown) [Coln].valueprint cell# or cell = Worksheet1.col (COLN) [Rown].valueprint cell# Gets the type of the value in the cell, type 0 empty, 1 string, 2 number, 3 date, 4 Boolean, 5 Errorcell_type = Worksheet1.cell_type (rown,coln) print Cell_type "" "
#coding =utf-8######################################################## Filename:test_xlwt.py#author:defias#date:xxxx-xx-xx#function: Create a new Excel file and write the data ################################### ################### #import xlwt# Create workbook and Sheet objects workbook = xlwt. Workbook () #注意Workbook的开头W要大写sheet1 = Workbook.add_sheet (' Sheet1 ', cell_overwrite_ok=true) Sheet2 = workbook.add_ Sheet (' Sheet2 ', cell_overwrite_ok=true) #向sheet页中写入数据sheet1. Write (0,0, ' This should overwrite1 ') sheet1.write (0,1, ' Aaaaaaaaaaaa ') sheet2.write (0,0, ' This should Overwrite2 ') sheet2.write ("bbbbbbbbbbbbb") "" "#----------- Use style-----------------------------------#初始化样式style = XLWT. Xfstyle () #为样式创建字体font = XLWT. Font () Font.Name = ' Times New Roman ' Font.Bold = true# set style fonts Style.font = font# using style sheet.write (0,1, ' some bold times text ', s Tyle) "" "#保存该excel文件, the file with the same name is directly overwritten with Workbook.save (' E:\\code\\python\\test2.xls ') print ' Create Excel file complete! '
#coding =utf-8######################################################## Filename:test_xlutils.py#author:defias#date:xxxx-xx-xx#function: Writing data to an Excel file ################################# ##################### #import xlrdimport xlutils.copy# open a workbookrb = Xlrd.open_workbook (' e:\\code\\python\\ Test1.xls ') WB = Xlutils.copy.copy (RB) #获取sheet对象, the sheet object obtained through Sheet_by_index () does not have the write () method ws = Wb.get_sheet (0) # Write Data Ws.write (1, 1, ' changed! ') #添加sheet页wb. Add_sheet (' sheetnnn2 ', cell_overwrite_ok=true) #利用保存时同名覆盖达到修改excel文件的目的, note that unmodified content remains the same wb.save (' e:\\ Code\\python\\test1.xls ')
#coding =utf-8######################################################## Filename:test_pyexcelerator_read.py#author:defias#date:xxxx-xx-xx#function: Reading data from an Excel file ####################### ############################### #import Pyexcelerator#parse_xls Returns a list of data for each item that is a sheet page. #每项是一个二元组 (table name, cell data). Where the cell data is a dictionary, the key value is the index of the cell (I,J). If there is no data for a cell, then this value does not exist sheets = Pyexcelerator.parse_xls (' E:\\code\\python\\testdata.xls ') print sheets
#coding =utf-8####################################################### #filename: test_pyexcelerator.py#author: Defias#date:xxxx-xx-xx#function: Create a new Excel file and write the data ###################################################### #import pyexcelerator# Create Workbook and Sheet objects wb = Pyexcelerator.workbook () ws = Wb.add_sheet (U ' first page ') #设置样式myfont = Pyexcelerator.font () myfont.name = U ' Times New Roman ' myfont.bold = Truemystyle = Pyexcelerator.xfstyle () Mystyle.font = myfont# write data, use style ws.write (0,0,u ' Ni hao el Paso! ', MyStyle) #保存该excel文件, the file with the same name is directly overwritten with Wb.save (' E:\\code\\python\\mini.xls ') print ' Create Excel file complete! ‘
Python parsing Excel