#-*-Coding:utf-8-*-"" "Created on Wed Nov 11:18:16 2016@author:administrator" "" Import Xlrdimport Pandas,numpyimport Matplotlib.pyplot as Pltimport pandas as pd# parameter # report time reportdate= "2016/12/22" #省份admin = "Zhejiang" filename= " 2016 the 1th issue of Guangxi Drug quality announcement (total 37th period) does not meet. xlsx "year=" 2014 1 "#文件名file =" Pharmaceutical Quality Bulletin (total 5th period) "Url=" www.sda.gov.cn/WS01/CL0090/105223. HTML "#假冒设置为空值fake =" "Data = Xlrd.open_workbook (fileName) Table = data.sheets () [0] #获取行数和列数nrows = Table.nrowsncols = table.ncols# convert Excel data to list of listed elements list_columns=[] #A类, store tags containing keywords # store drug names related to Chinese list_title_names=[' drug name ', ' Inspection name ']# Storage marking production enterprises related Chinese list_title_manufacturers=[' marking production units ', ' marking the production enterprise '] #存储药标示批号相关中文list_title_pitchnos =[' marked lot number ', ' Lot number ']# Storage drug identification number of the relevant Chinese list_title_specifications=[' marking specifications ', ' specifications '] #存储不合格项目相关中文list_title_unpasseditems =[' does not meet the project ', ' unqualified items ']# Storage Validity period relevant Chinese list_title_validperiods=[' valid to '] #存储来源相关中文list_title_sources =[' inspection source ', ' source ' #存储检验单位相关中文list_title_ testoffices=[' Inspection Unit '] #存储检验物质相关中文list_title_testobjectives =[' test material '] #存储检验依据相关中文list_title_regulations =[' test basis ']# list_title_notices=[' remarks ' in the storage and storage notes #Bclass, storing column information list_name=[]list_manufacturer=[]list_pichno=[]list_specification=[]list_unpasseditem=[]list_validperiod =[]list_reportdate=[]list_source=[]list_testoffice=[]list_testobjective=[]list_regulation=[]list_year=[]list_ File=[]list_url=[]list_notice=[]list_fake=[] #把excel数据转换为列为元素的列表 function def list_columns (table,ncols): For I in range ( Ncols): List1=table.col_values (i) list_columns.append (list1) return list_columns #获取list_columnInforma tion data #list_columns is a list #list_title store column label information that transforms Excel data into column elements, such as List_title_unpasseditems#list_ Columninformation represents the related column information that was searched for, such as List_namesdef list_columninformation (list_columns,list_title_names,list_ columninformation): For I in range (len (list_columns)): #获取列数据 List_column=list_columns[i] #title表示 Title, such as product name Title=list_column[0] Print ("title:", title) if title in List_title_names:print (" Find title ") List_columninformation=list_column[1:] Return list_columninformation Else:list_columninformation=[']* (nrows-1) return list_columninformation #用于获取报告时间, URLs, filenames, etc. Column Information def list_repeat_columninformation (nrows,value): list_repeat_columninformation=[value]* (nrows-1) return List_r Epeat_columninformation #获取各列数据list_columns =list_columns (table,ncols) list_name=list_columninformation (list_ Columns,list_title_names,list_name) list_manufacturer=list_columninformation (list_columns,list_title_ Manufacturers,list_manufacturer) list_pichno=list_columninformation (list_columns,list_title_pitchnos,list_ Pichno) list_specification=list_columninformation (list_columns,list_title_specifications,list_specification) List_unpasseditem=list_columninformation (List_columns,list_title_unpasseditems,list_unpasseditem) list_ Validperiod=list_columninformation (List_columns,list_title_validperiods,list_validperiod) list_reportdate=List_ Repeat_columninformation (nrows,reportdate) list_source=list_columninformation (list_columns,list_title_sources, List_source) list_testoffice=list_columninformation (List_columns,list_title_testoffices,list_testoffice) list_testobjective =list_columninformation (list_columns,list_title_testobjectives,list_testobjective) list_regulation=List_ Columninformation (list_columns,list_title_regulations,list_regulation) list_adminadd=list_repeat_ Columninformation (nrows,admin) list_year=list_repeat_columninformation (nrows,year) list_file=List_repeat_ Columninformation (Nrows,file) list_url=list_repeat_columninformation (nrows,url) list_notice=list_ Columninformation (List_columns,list_title_notices,list_notice) list_fake=list_repeat_columninformation (nrows, " ) #pandas字典写入excel会有标题排序问题, so replace DATA=PD with a,b,c and other sequential letters. DataFrame ({' A ': List_name, ' B ': List_manufacturer, ' C ': List_pichno, ' d ': list_specification, ' E ': List_unpasseditem, ' F ': List_validperiod, ' G ': list_reportdate, ' h ': list_source, ' I ' : List_testoffice, ' j ': list_testobjective, ' K ': List_regUlation, ' l ': list_adminadd, ' m ': list_year, ' n ': list_file, ' o ': List_url, ' pme_notice ': List _notice, ' Q ': List_fake}) Data.to_excel ("Ypzl_normal.xlsx")
YPZL Drug Quality Unqualified database-excel automatic typesetting