python實戰之實現excel讀取、統計、寫入
背景
映像領域內的一個國內會議快要召開了,要發各種邀請郵件,之後要錄入、統計郵件回複(參會還是不參會等)。如此重要的任務,老師就託付給我了。ps: 統計回複郵件的時候,能知道誰參會或誰不參會。
而我主要的任務,除了錄入郵件回複,就是統計理事和普通會員的參會情況了(參會的、不參會的、沒回複的)。錄入郵件回複資訊沒辦法只能人工操作,但如果統計也要人工的話,那工作量就太大了(比如在上百人的列表中搜尋另外上百人在不在此列表中。。),於是就想到了用python來幫忙,花兩天時間不斷修改,寫了6個版本。。。 摘要 version_1基本實現了excel讀取、統計、顯示功能,但問題也有不少,像顯示出來後還要自已複製、粘貼到excel表,而且set中還有nan這樣的bug。 version_2相比較version_1而言,此版本用set代替list,可以自動去重。 version_3解決了set中出現nan的bug,而且還加入的excel寫入的功能,但一次只能寫入一張表,所以要運行兩次才能寫入兩張表(sheet)。 version_4的改進在於將version_3中寫入兩張表格的操作,整合在一個程式裡,只需要運行一次便可寫入兩張表,但也總是會寫入兩張表,萬一你只想寫入一張表呢。。 version_5相對之前版本的最大改進在於將程式模組化,更具可讀性了; 對修複set中出現nan的方法也進行了改進和簡化; 而且可以自由控制寫入多少張表了。 version_final相比較version_5,修複了一個bug,之前需要先驗知識,現在更通用一點(prep函數取代了set2list函數)。 version_1
基本實現了excel讀取、統計、顯示功能,但問題也有不少,像顯示出來後還要自已複製、粘貼到excel表,而且set中還有nan這樣的值。
#version_1import osimport numpy as npimport pandas as pdos.chdir('C:\\Users\\dell\\Desktop\\0711任務')print(os.getcwd())data = pd.read_excel('for_python.xlsx','Sheet2')return_set = set(data['回執名單'])demand_set = set(data['理事名單'])answer_list = []unanswer_list = []for each in demand_set: if each in return_set: answer_list.append(each) else: unanswer_list.append(each)notattend_set = set(data['回執名單'][-15:])nt = []for each in notattend_set: if each in answer_list: nt.append(each)def disp(ll, cap, num = True): print(cap) if num: for i, each in enumerate(ll): print(i+1,each) else: for each in enumerate(ll): print(each)disp(answer_list,'\n理事回執名單')disp(unanswer_list,'\n理事未回執名單')disp(nt,'\n理事回執說不參加名單')
version_2
相比較上一個版本,此版本用set代替list,可以自動去重。
#version_2import osimport numpy as npimport pandas as pdos.chdir('C:\\Users\\dell\\Desktop\\0711任務')print(os.getcwd())data = pd.read_excel('for_python.xlsx','Sheet2')return_set = set(data['回執名單'])demand_set = set(data['理事名單'])answer_set = set([]) #理事回執名單unanswer_set = set([]) #理事未回執名單for each in demand_set: if each in return_set: answer_set.add(each) else: unanswer_set.add(each)notattend_set = set(data['回執名單'][-17:])nt = set([]) #理事回執說不參加名單for each in notattend_set: if each in answer_set: nt.add(each)ans_att_set = answer_set - nt #理事回執參加名單def disp(ss, cap, num = False): print(cap) if num: for i, each in enumerate(ss): print(i+1,each) else: for each in ss: print(each)#disp(answer_set,'\n理事回執名單')disp(ans_att_set,'\n理事回執說參加名單')disp(nt,'\n理事回執說不參加名單')disp(unanswer_set,'\n理事未回執名單')print(len(ans_att_set),len(nt),len(unanswer_set))
version_3
此版本解決了set中出現nan的bug,而且還加入的excel寫入的功能,但一次只能寫入一張表,所以要運行兩次才能寫入兩張表(sheet)。 step_1
import osimport numpy as npimport pandas as pdos.chdir('C:\\Users\\dell\\Desktop')print('work_directory: ', os.getcwd())data = pd.read_excel('理事與會員名單.xlsx','理事與會員名單')#1.載入excel,得到三個名單ans_attend_set = set(data['回執參加']) #回執參會名單N = len(ans_attend_set)ans_notatt_idx = [i for i in range(N) if type(data['回執不參加'][i]) == np.float][0]ans_notatt_set = set(data['回執不參加'][:ans_notatt_idx])#回執不參會名單concil_idx = [i for i in range(N) if type(data['理事名單'][i]) == np.float][0]concil_set = set(data['理事名單'][:concil_idx]) #理事名單#2.統計理事參會情況concil_attend_set = set([]) #理事回執參會名單concil_notatt_set = set([]) #理事回執不參會名單concil_notans_set = set([]) #理事未回執名單for each in concil_set: if each in ans_attend_set: concil_attend_set.add(each) elif each in ans_notatt_set: concil_notatt_set.add(each) else: concil_notans_set.add(each)#3. 顯示結果def disp(ss, cap, num = True): #ss: 名單集合 #cap: 開頭描述 print(cap,'({})'.format(len(ss))) for i in range(np.ceil(len(ss)/5).astype(int)): pre = i * 5 nex = (i+1) * 5 #調整顯示格式 dd = '' for each in list(ss)[pre:nex]: if len(each) == 2: dd = dd + ' ' + each elif len(each) == 3: dd = dd + ' ' + each else: dd = dd + '' + each print('{:3.0f} -{:3.0f} {}'.format(i*5+1,(i+1)*5,dd))disp(concil_attend_set,'\n參會理事')disp(concil_notatt_set,'\n不參會理事')disp(concil_notans_set,'\n未回執理事')#4. 將理事參會情況,寫入exceldf = pd.DataFrame(list(concil_attend_set),columns = ['參會理事'])df['']=pd.DataFrame([''])df['序號1'] = pd.DataFrame(np.arange(len(concil_notatt_set))+1)df['不參會理事'] = pd.DataFrame(list(concil_notatt_set))df['_']=pd.DataFrame([''])df['序號2'] = pd.DataFrame(np.arange(len(concil_notans_set))+1)df['未回執理事'] = pd.DataFrame(list(concil_notans_set))df.index = df.index + 1df.to_excel('理事和會員回執統計.xlsx', sheet_name='理事回執統計')print('\n\n寫入excel成功~~')
step_2
import osimport numpy as npimport pandas as pdos.chdir('C:\\Users\\dell\\Desktop')print('work_directory: ', os.getcwd())data = pd.read_excel('理事與會員名單.xlsx','理事與會員名單')#1.載入excel,得到三個名單ans_attend_set = set(data['回執參加']) #回執參會名單N = len(ans_attend_set)ans_notatt_idx = [i for i in range(N) if type(data['回執不參加'][i]) == np.float][0]ans_notatt_set = set(data['回執不參加'][:ans_notatt_idx])#回執不參會名單mem_idx = [i for i in range(N) if type(data['被推薦人'][i]) == np.float][0]mem_set = set(data['被推薦人'][:mem_idx]) #被推薦為會員代表名單#2.統計會員參會情況mem_attend_set = set([]) #回執參會會員mem_notatt_set = set([]) #回執不參會會員mem_notans_set = set([]) #未回執會員for each in mem_set: if each in ans_attend_set: mem_attend_set.add(each) elif each in ans_notatt_set: mem_notatt_set.add(each) else: mem_notans_set.add(each)#3. 顯示結果def disp(ss, cap, num = True): #ss: 名單集合 #cap: 開頭描述 print(cap,'({})'.format(len(ss))) for i in range(np.ceil(len(ss)/5).astype(int)): pre = i * 5 nex = (i+1) * 5 #調整顯示格式 dd = '' for each in list(ss)[pre:nex]: if len(each) == 2: dd = dd + ' ' + each elif len(each) == 3: dd = dd + ' ' + each else: dd = dd + '' + each print('{:3.0f} -{:3.0f} {}'.format(i*5+1,(i+1)*5,dd))disp(mem_attend_set,'\n參會會員')disp(mem_notatt_set,'\n不參會會員')disp(mem_notans_set,'\n未回執會員')#4. 將會員參會情況,寫入excelif len(mem_attend_set) > len(mem_notans_set): print('#1') L = len(mem_attend_set) mem_notans_list = list(mem_notans_set) mem_notans_list.extend([''] * (L - len(mem_notans_set))) mem_attend_list = list(mem_attend_set)else: print('#2') L = len(mem_notans_set) mem_attend_list = list(mem_attend_set) mem_attend_list.extend([''] * (L - len(mem_attend_set))) mem_notans_list = list(mem_notans_set) df = pd.DataFrame(mem_attend_list,columns = ['參會會員'])df['']=pd.DataFrame([''])if len(mem_notatt_set) == 0: df['序號1'] = np.NaN df['不參會會員'] = np.NaNelse: df['序號1'] = pd.DataFrame(np.arange(len(mem_notatt_set))+1) df['不參會會員'] = pd.DataFrame(list(mem_notatt_set))df['_']=pd.DataFrame([''])df['序號2'] = pd.DataFrame(np.arange(len(mem_notans_set))+1)df['未回執會員'] = pd.DataFrame(mem_notans_list)df.index = df.index + 1df0 = pd.read_excel('理事和會員回執統計.xlsx',sheet_name='理事回執統計')writer = pd.ExcelWriter('理事和會員回執統計.xlsx')df0.to_excel(writer, sheet_name='理事回執統計')df.to_excel(writer, sheet_name='會員回執統計')writer.save()print('\n\n寫入excel成功~~')
version_4
version_4的改進在於將version_3中寫入兩張表格的操作,整合在一個程式裡,只需要運行一次便可寫入兩張表,也總是會寫入兩張表。問題是要是你只想寫入一張表呢。。
import osimport numpy as npimport pandas as pdos.chdir('C:\\Users\\dell\\Desktop')print('work_directory: ', os.getcwd())loadfile_sheet = ['理事與會員名單.xlsx','理事與會員名單']columns = ['回執參加','回執不參加','理事','會員']savefile_sheet = ['理事和會員回執統計.xlsx','理事回執統計','會員回執統計']display = [1,1]def main(loadfile_sheet,columns,savefile_sheet,display): #1. 載入excel,得到名單 data = pd.read_excel(loadfile_sheet[0],loadfile_sheet[1]) def first_nan_index(pd): for i, each in enumerate(pd): if type(each) == np.float: return i return i idx = first_nan_index(data[columns[0]]) ans_attend_set = set(data[columns[0]][:idx])#回執參會名單 idx = first_nan_index(data[columns[1]]) ans_notatt_set = set(data[columns[1]][:idx])#回執不參會名單 idx = first_nan_index(data[columns[2]]) concil_set = set(data[columns[2]][:idx])#理事名單 idx = first_nan_index(data[columns[3]]) mem_set = set(data[columns[3]][:idx])#會員名單 #2. 統計參會情況 concil_attend_set = set([]) #回執參會理事 concil_notatt_set = set([]) #回執不參會理事 concil_notans_set = set([]) #未回執理事 for each in concil_set: if each in ans_attend_set: concil_attend_set.add(each) elif each in ans_notatt_set: concil_notatt_set.add(each) else: concil_notans_set.add(each) mem_attend_set = set([]) #回執參會會員 mem_notatt_set = set([]) #回執不參會會員 mem_notans_set = set([]) #未回執會員 for each in mem_set: if each in ans_attend_set: mem_attend_set.add(each) elif each in ans_notatt_set: mem_notatt_set.add(each) else: mem_notans_set.add(each) #3. 是否顯示中間結果 def disp(ss, cap, num = True): #ss: 名單集合 #cap: 開頭描述 print(cap,'({})'.format(len(ss))) for i in range(np.ceil(len(ss)/5).astype(int)): pre = i * 5 nex = (i+1) * 5 #調整顯示格式 dd = '' for each in list(ss)[pre:nex]: if len(each) == 2: dd = dd + ' ' + each elif len(each) == 3: dd = dd + ' ' + each else: dd = dd + '' + each print('{:3.0f} -{:3.0f} {}'.format(i*5+1,(i+1)*5,dd)) if display[0]: disp(concil_attend_set,'\n參會理事') disp(concil_notatt_set,'\n不參會理事') disp(concil_notans_set,'\n未回執理事') if display[1]: disp(mem_attend_set,'\n參會會員') disp(mem_notatt_set,'\n不參會會員') disp(mem_notans_set,'\n未回執會員') #4. 寫入excel def trans_pd(df,ss,cap,i=1): if len(ss) == 0: df['序號{}'.format(i)] = np.NaN df[cap] = np.NaN else: df['序號{}'.format(i)] = pd.DataFrame(np.arange(len(ss))+1) df[cap] = pd.DataFrame(list(ss)) df['_'*i]=pd.DataFrame(['']) return df def set2list(mem_attend_set,mem_notans_set): if len(mem_attend_set) > len(mem_notans_set): L = len(mem_attend_set) mem_notans_list = list(mem_notans_set) mem_notans_list.extend([''] * (L - len(mem_notans_set))) mem_attend_list = list(mem_attend_set) else: L = len(mem_notans_set) mem_attend_list = list(mem_attend_set) mem_attend_list.extend([''] * (L - len(mem_attend_set))) mem_notans_list = list(mem_notans_set) return mem_attend_list,mem_notans_list mem_attend_list, mem_notans_list = set2list(mem_attend_set, mem_notans_set) df1 = pd.DataFrame(mem_attend_list,columns = ['參會會員']) df1['']=pd.DataFrame(['']) df1 = trans_pd(df1,mem_notatt_set,'不參會會員') df1 = trans_pd(df1,mem_notans_set,'未回執會員',2) df1.index = df1.index + 1 concil_attend_list, concil_notans_list = set2list(concil_attend_set, concil_notans_set) df2 = pd.DataFrame(concil_attend_list,columns = ['參會理事']) df2['']=pd.DataFrame(['']) df2 = trans_pd(df2,concil_notatt_set,'不參會理事') df2 = trans_pd(df2,concil_notans_list,'未回執理事',2) df2.index = df2.index + 1 writer = pd.ExcelWriter(savefile_sheet[0]) df2.to_excel(writer, sheet_name=savefile_sheet[1]) df1.to_excel(writer, sheet_name=savefile_sheet[2]) writer.save() print('\n\n寫入excel成功~~')if __name__ == '__main__': main(loadfile_sheet,columns,savefile_sheet,display)
version_5
version_5對修複set中出現nan的方法進行了改進和簡化; 而且將程式模組化,更具可讀性; 可以自由控制寫入多少張表了。
import osimport numpy as npimport pandas as pdos.chdir('C:\\Users\\dell\\Desktop')print('work_directory: ', os.getcwd())loadfile_sheet = ['理事與會員名單.xlsx','理事與會員名單']common_columns = ['回執參加','回執不參加']concerned_columns = ['理事','會員']disp_columns = ['參會','不參會','未回執']savefile_sheet = ['理事和會員回執統計.xlsx','理事回執統計','會員回執統計']def disp(ss, cap, num = True): #ss: 名單集合 #cap: 開頭描述 print(cap,'({})'.format(len(ss))) for i in range(np.ceil(len(ss)/5).astype(int)): pre = i * 5 nex = (i+1) * 5 #調整顯示格式 dd = '' for each in list(ss)[pre:nex]: if len(each) == 2: dd = dd + ' ' + each elif len(each) == 3: dd = dd + ' ' + each else: dd = dd + '' + each print('{:3.0f} -{:3.0f} {}'.format(i*5+1,(i+1)*5,dd))def trans_pd(df,ss,cap,i=1): df['_'*i]=pd.DataFrame(['']) if len(ss) == 0: df['序號{}'.format(i)] = np.NaN df[cap] = np.NaN else: df['序號{}'.format(i)] = pd.DataFrame(np.arange(len(ss))+1) df[cap] = pd.DataFrame(list(ss)) return dfdef set2list(ss1,ss2): if len(ss1) > len(ss2): L = len(ss1) ss2_list = list(ss2) ss2_list.extend([''] * (L - len(ss2))) ss1_list = list(ss1) else: L = len(ss2) ss1_list = list(ss1) ss1_list.extend([''] * (L - len(ss1))) ss2_list = list(ss2) return ss1_list,ss2_list def get_df(loadfile_sheet,common_columns,concerned_column,disp_columns, display = True): #1. 載入excel data = pd.read_excel(loadfile_sheet[0],loadfile_sheet[1]) common_set1 = set(data[common_columns[0]]) common_set1.discard(np.NaN) common_set2 = set(data[common_columns[1]]) common_set2.discard(np.NaN) concerned_set = set(data[concerned_column]) concerned_set.discard(np.NaN) #2. 統計 concerned_in_set_1 = set([]) concerned_in_set_2 = set([]) concerned_in_no_set = set([]) for each in concerned_set: if each in common_set1: concerned_in_set_1.add(each) elif each in common_set2: concerned_in_set_2.add(each) else: concerned_in_no_set.add(each) #3. 顯示 if display: disp(concerned_in_set_1,'\n'+disp_columns[0]+concerned_column) disp(concerned_in_set_2,'\n'+disp_columns[1</