python actual implementation Excel reads, counts, writes
background
Image in the field of a domestic conference is about to be convened, to send a variety of invitations to mail, and then to input, statistics mail reply (participants or not to attend, etc.). The teacher entrusted me with such an important task. PS: When the statistics reply to the mail, you can know who will attend or who does not attend.
And my main task, in addition to the entry email reply, is the statistical directors and ordinary members of the Conference (participants, not participants, did not reply). Input mail reply information can not only manual operation, but if the statistics are also manual, the workload is too large (such as the list of hundreds of people to search for the other hundred people in the table). ), so I thought of using Python to help, spend two days to constantly modify, wrote 6 versions ... Summary version_1 Basic Implementation of Excel reading, statistics, display functions, but there are many problems, such as the display after the copy, paste to Excel table, and set there are Nan such a bug. Version_2 compared to version_1, this version uses set instead of list, which can be automatically weighed. version_3 resolves a nan bug in set and also joins the Excel write feature, but writes only one table at a time, so you have to run it two times to write two tables (sheet). Version_4 's improvement is to write the two tables in the Version_3, integrated in a program, only need to run once to write two tables, but also always write two tables, in case you just want to write a table. The biggest improvement in the previous version of Version_5 is that the program is modular and more readable; The method of repairing the presence of Nan in set is also improved and simplified. And it's free to control how many sheets are written. Version_final compared to version_5, fixed a bug, prior knowledge required before, now more general point (Prep function replaces the set2list function). version_1
The basic implementation of Excel reading, statistics, display functions, but there are many problems, as shown after the copy, paste into the Excel table, and set there are Nan such values.
#version_1
import OS
import NumPy as NP
import pandas as PD
Os.chdir (' c:\\users\\dell\\desktop\\ 0711 task ')
print (OS.GETCWD ())
data = Pd.read_excel (' for_python.xlsx ', ' Sheet2 ')
Return_set = set (data[') Receipt list '])
Demand_set = Set (data[' governing list '))
answer_list = []
unanswer_list = [] For each in
Demand_set:
if each of Return_set:
answer_list.append (each)
else:
unanswer_list.append (each)
Notattend_set = set (data[' receipts list '][-15:])
NT = [] for each in
Notattend_set:
If all in Answer_list:
Nt.append (each)
Def disp (ll, caps, num = True):
print (CAP)
if num:
for I, each in enumerate (LL):
print (I+1,each)
else:
for all in Enumerate (LL):
print (each)
disp (answer_list, ' \ n Governing Receipts list ')
disp (unanswer_list, ' \ n Board of Directors ' non-receipt list ')
disp (NT, ' \ n Board of Directors said no to participate in the list ')
version_2
Compared to the previous version, this version uses set instead of list, which can be automatically weighed.
#version_2 import OS import numpy as NP import pandas as PD Os.chdir (' c:\\users\\dell\\desktop\\ 0711 task ') print (OS.GETCWD ()) data = Pd.read_excel (' for_python.xlsx ', ' Sheet2 ') Return_set = set (data[' receipts list ')) Demand_set = Set (data[' Board of Directors ']) Answer_set = set ([]) #理事回执名单 unanswer_set = set ([]) #理事未回执名单 for all in demand_set:if all in Return_set:answer_set.add (each) else:unanswer_set.add (each) Notattend_set = set (data[' receipts list '][-17: ] NT = set ([]) #理事回执说不参加名单 for each in notattend_set:if the Answer_set:nt.add (each) Ans_att_set = an
Swer_set-nt #理事回执参加名单 def disp (SS, cap, num = False): print (CAP) if num:for I, each in enumerate (ss): Print (I+1,each) else:for each into ss:print (each) #disp (answer_set, ' \ n Board of Directors ' receipts list ') disp (a Ns_att_set, ' \ n Board of directors said to participate in the list ' disp (NT, ' \ n/a Board of directors said not to participate in the list ') disp (unanswer_set, ' \ n/a Board of directors ' receipts list ') print (Len (ans_att_set), Len (NT), Len (unanswer_set))
Version_3
This version resolves a nan bug in set and also joins the Excel write feature, but only one table can be written at a time, so you can write two tables (sheet) by running two times. step_1
Import OS import numpy as NP import pandas as PD os.chdir (' C:\\users\\dell\\desktop ') print (' Work_directory: ', OS.GETCWD ( ) data = Pd.read_excel (' Directors and members list. xlsx ', ' Directors and Members ' list ') #1. Load Excel, get three list ans_attend_set = set (data[' receipt participates ') #回执参会名单 N = Len (A Ns_attend_set) Ans_notatt_idx = [I for I in range (N) if type (data[' receipt does not participate in '][i]) = = Np.float][0] Ans_notatt_set = set (data[ ' Receipt does not participate '][:ans_notatt_idx] #回执不参会名单 concil_idx = [I for I in range (N) if type (data[' List of directors '][i]) = = Np.float][0] Concil_set = Set (data[' Board of Directors '][:CONCIL_IDX]) #理事名单 #2. Participation of statistical Directors Concil_attend_set = set ([]) #理事回执参会名单 Concil_notatt_set = Set ([]) #理事回执不参会名单 concil_notans_set = set ([]) #理事未回执名单 for each in concil_set:if each in ans_attend_set:c Oncil_attend_set.add (each) elif each of Ans_notatt_set:concil_notatt_set.add (each) else:concil_n Otans_set.add (each) #3. Display result Def disp (SS, cap, num = True): #ss: List set #cap: Opening description print (Cap, ' ({}) '. Format (len (ss))) for I in rangE (Np.ceil (LEN (ss)/5). Astype (int)): Pre = i * 5 NEX = (i+1) * 5 #调整显示格式 dd = ' for Each in list (ss) [Pre:nex]: If Len (each) = = 2:DD = dd + "+ each Elif len" (ea ch) = = 3:DD = dd + ' + each ELSE:DD = dd + ' + each print (' {: 3.0 F}-{:3.0f} {} '. Format (i*5+1, (i+1) *5,dd) disp (concil_attend_set, ' \ n participants ') disp (concil_notatt_set, ' \ n not attending member ') DISP ( Concil_notans_set, ' \ n/No receipts ') #4. will be director of the event, written to excel df = PD. Dataframe (List (concil_attend_set), columns = [' participants ']) df[']=PD. Dataframe ([']) df[' serial number 1 '] = PD. Dataframe (Np.arange (len (concil_notatt_set)) +1) df[' no participants '] = PD. Dataframe (List (concil_notatt_set)) df[' _ ']=pd. Dataframe ([']) df[' serial number 2 '] = PD. Dataframe (Np.arange (len (concil_notans_set)) +1) df[' no receipt director '] = PD. Dataframe (List (concil_notans_set)) Df.index = Df.index + 1 Df.to_excel (' governing and membership receipts statistics. xlsx ', Sheet_name= ' governing receipt statistics ') print ('
N\n Write Excel Success ~ ~ ')
step_2
Import OS import numpy as NP import pandas as PD os.chdir (' C:\\users\\dell\\desktop ') print (' Work_directory: ', OS.GETCWD ( ) data = Pd.read_excel (' Directors and members list. xlsx ', ' Directors and Members ' list ') #1. Load Excel, get three list ans_attend_set = set (data[' receipt participates ') #回执参会名单 N = Len (A Ns_attend_set) Ans_notatt_idx = [I for I in range (N) if type (data[' receipt does not participate in '][i]) = = Np.float][0] Ans_notatt_set = set (data[ ' Receipt does not participate '][:ans_notatt_idx] #回执不参会名单 mem_idx = [I for I in range (N) if type (data[' recommended person '][i]) = = Np.float][0] Mem_set = set ( data[' recommended person '][:mem_idx] #被推荐为会员代表名单 #2. Statistical membership Attendance Mem_attend_set = set ([]) #回执参会会员 mem_notatt_set = set ([]) # Receipt does not participate member Mem_notans_set = set ([]) #未回执会员 for each in Mem_set:if each in Ans_attend_set:mem_attend_set.add (ea CH) Elif Each of the Ans_notatt_set:mem_notatt_set.add (each) Else:mem_notans_set.add #3. Display result Def disp (SS, cap, num = True): #ss: List set #cap: Opening description print (Cap, ' ({}) '. Format (len (ss))) for I in range (LEN (ss)/5). Astype (Np.ceil) (iNT): Pre = i * 5 NEX = (i+1) * 5 #调整显示格式 dd = ' For each in list (ss) [Pre:nex]: If Len (each) = = 2:DD = dd + ' + elif len (each) = = 3:dd = dd + ' + each ELSE:DD = dd + ' + each print (' {: 3.0f}-{:3.0f} {} '. Format (i*5+1, (i+1) *5,DD) disp (mem_attend_set, ' \ n attendees ') disp (Mem_notatt_set, ' no Attendees ') disp (mem_notans_set, ' \ n ' no receipts ') #4. Member attendees are written to Excel if Len (Mem_attend_set) > Len (mem_notans_set): Print (' #1 ') L = Len (mem_attend_set) Mem_notan S_list = List (mem_notans_set) mem_notans_list.extend (['] * (L-len (Mem_notans_set))) Mem_attend_list = List (mem_ Attend_set) else:print (' #2 ') L = Len (mem_notans_set) mem_attend_list = List (Mem_attend_set) Mem_attend_li St.extend (['] * (L-len (mem_attend_set)) mem_notans_list = List (mem_notans_set) df = PD. Dataframe (mem_attend_list,columns = [' Participant member ']) df[']=PD. DAtaframe ([']) If Len (mem_notatt_set) = = 0:df[' Serial number 1 ' = NP. NaN df[' not attending member '] = NP. NaN else:df[' serial number 1 '] = PD. Dataframe (Np.arange (mem_notatt_set) +1) df[' not participating member '] = PD. Dataframe (List (mem_notatt_set)) df[' _ ']=pd. Dataframe ([']) df[' serial number 2 '] = PD. Dataframe (Np.arange (len (mem_notans_set)) +1) df[' not receipt member '] = PD. Dataframe (mem_notans_list) Df.index = df.index + 1 df0 = Pd.read_excel (' Governing and membership receipts statistics. xlsx ', sheet_name= ' governing receipts statistics ') writer = PD.
Excelwriter (' governing and membership receipts statistics. xlsx ') Df0.to_excel (writer, sheet_name= ' governing receipts Statistics ') Df.to_excel (writer, sheet_name= ' member receipts statistics ')
Writer.save () print (' \ n \ nplease write Excel Success ~ ~ ')
Version_4
The improvement of Version_4 is to write two tables in the Version_3, integrated in a program, only need to run once to write two tables, and always write two tables. The question is, if you only want to write a table.
Import OS import numpy as NP import pandas as PD os.chdir (' C:\\users\\dell\\desktop ') print (' Work_directory: ', OS.GETCWD ( ) Loadfile_sheet = [' Directors and members list. xlsx ', ' Directors and Members ' list '] columns = [' Receipt participation ', ' receipt not participating ', ' ' director ', ' member '] savefile_sheet = [' directors ' and membership receipts statistics. xlsx ', ' Governing receipts statistics ', ' Member receipts statistics '] display = [1,1] def main (loadfile_sheet,columns,savefile_sheet,display): #1. Load Excel, get list of data = Pd.read_excel (loadfile_sheet[0],loadfile_sheet[1]) def first_nan_index (PD): For I, EAC h in Enumerate (PD): If Type (all) = = Np.float:return I return i idx = First_nan_ Index (Data[columns[0]]) Ans_attend_set = set (Data[columns[0]][:idx]) #回执参会名单 idx = First_nan_index (data[columns[1)] Ans_notatt_set = set (Data[columns[1]][:idx]) #回执不参会名单 idx = First_nan_index (data[columns[2)]) Concil_set = s
ET (Data[columns[2]][:idx]) #理事名单 idx = First_nan_index (data[columns[3]]) Mem_set = set (DATA[COLUMNS[3]][:IDX)) #会员名单 #2. Statistical attendance Concil_attend_set = Set ([]) #回执参会理事 concil_notatt_set = set ([]) #回执不参会理事 concil_notans_set = set ([]) #未回执理事 for all in Concil_set:
If each of Ans_attend_set:concil_attend_set.add (each) elif each in Ans_notatt_set:
Concil_notatt_set.add (each) else:concil_notans_set.add (each) Mem_attend_set = set ([]) #回执参会会员 Mem_notatt_set = set ([]) #回执不参会会员 mem_notans_set = set ([]) #未回执会员 for all in mem_set:if all in Ans_at
Tend_set:mem_attend_set.add (each) elif each of ans_notatt_set:mem_notatt_set.add (each) Else:mem_notans_set.add (each) #3. Show intermediate result def disp (SS, cap, num = True): #ss: List collection #cap: Opening description print (Cap, ' ({}) '. Format (l
En (ss))) for I in Range (Np.ceil (len (ss)/5). Astype (int)): Pre = i * 5 NEX = (i+1) * 5
#调整显示格式 dd = ' For each in list (ss) [Pre:nex]: If Len (each) = = 2:DD = dd + ' + elif len (each) = 3: DD = dd + ' + each ELSE:DD = dd + ' + each print (' {: 3.0f}-{:3.0 f} {} '. Format (i*5+1, (i+1) *5,dd)) if display[0]: disp (concil_attend_set, ' \ n participants ') disp (concil_notatt_s ET, ' \ n ' not a member of the Council ') disp (concil_notans_set, ' \ n ' no receipt member ') if DISPLAY[1]: disp (mem_attend_set, ' \ n participants ') d ISP (Mem_notatt_set, ' \ n ' disp member ') (Mem_notans_set, ' \ n ' no receipts ') #4. Write Excel def trans_pd (df,ss,cap,i=1): If Len (ss) = = 0:df[' ordinal {} '. Format (i)] = NP. NaN Df[cap] = NP. NaN else:df[' ordinal {} '. Format (i)] = PD. Dataframe (Np.arange (len (ss)) +1) Df[cap] = PD. Dataframe (List (ss)) df[' _ ' *i]=pd. Dataframe ([']) return DF def set2list (mem_attend_set,mem_notans_set): If Len (mem_attend_set) > Le
N (mem_notans_set): L = Len (mem_attend_set) mem_notans_list = List (mem_notans_set) mem_notans_list.extend (['] *
(L-len (Mem_notans_set)) Mem_attend_list = List (mem_attend_set) else:l = Len (mem_notans_set) mem_attend_list = Lis T (mem_attend_set) Mem_attend_list.extend (['] * (L-len (mem_attend_set)) Mem_notans_list = List ( Mem_notans_set) return mem_attend_list,mem_notans_list mem_attend_list, mem_notans_list = Set2list (mem_attend _set, mem_notans_set) df1 = PD. Dataframe (mem_attend_list,columns = [' Participant member ']) df1[']=PD. Dataframe ([']) df1 = TRANS_PD (Df1,mem_notatt_set, ' not participating member ') DF1 = TRANS_PD (Df1,mem_notans_set, ' No receipts ', 2) df1.in Dex = Df1.index + 1 concil_attend_list, concil_notans_list = Set2list (Concil_attend_set, concil_notans_set) DF2 = Pd. Dataframe (concil_attend_list,columns = [' Participant member ']) df2[']=PD. Dataframe ([']) DF2 = TRANS_PD (Df2,concil_notatt_set, ' not attendingDirector ') DF2 = TRANS_PD (df2,concil_notans_list, ' no return director ', 2) Df2.index = Df2.index + 1 writer = pd. Excelwriter (Savefile_sheet[0]) Df2.to_excel (writer, sheet_name=savefile_sheet[1]) Df1.to_excel (writer, sheet_name= SAVEFILE_SHEET[2] Writer.save () print (' \ n \ nthe write Excel success ~ ~ ') if __name__ = = ' __main__ ': Main (Loadfile_sheet,colu
Mns,savefile_sheet,display)
version_5
Version_5 improved and simplified the method to fix the presence of Nan in set; And the program is modular, more readable; You are free to control how many tables are written.
Import OS import numpy as NP import pandas as PD os.chdir (' C:\\users\\dell\\desktop ') print (' Work_directory: ', OS.GETCWD ( ) Loadfile_sheet = [' Directors and members list. xlsx ', ' Directors and Members list '] Common_columns = [' Receipt participation ', ' receipts not participating '] concerned_columns = [' trustee ', ' member '] Disp_co Lumns = [' Attendees ', ' No attendees ', ' no receipts '] Savefile_sheet = [' Directors and members ' receipts statistics. xlsx ', ' governing receipts statistics ', ' Membership receipts statistics '] def disp (SS, cap, num = True): #ss: The list collection #cap: The beginning describes print (cap, ' ({}) '. Format (len (ss))) for I in Range (Np.ceil (len (ss)/5). Astype (int)): PR E = i * 5 NEX = (i+1) * 5 #调整显示格式 dd = ' For each in list (ss) [Pre:nex]: if Le n (each) = = 2:DD = dd + "+ elif len (each) = = 3:DD = dd + ' + ea ch ELSE:DD = dd + ' + each print (' {: 3.0f}-{:3.0f} {} '. Format (i*5+1, (i+1) *5,dd)) de F trans_pd (Df,ss,cap,i=1): df[' _ ' *i]=pd. Dataframe ([']) If Len (ss) = = 0:df[' ordinal {} '. Format (i)] = NP. NaN Df[cap] = NP.
NaN Else:df[' serial number {} '. Format (i)] = PD. Dataframe (Np.arange (len (ss)) +1) Df[cap] = PD.
Dataframe (List (ss)) return DF def set2list (ss1,ss2): If Len (SS1) > Len (SS2): L = Len (SS1) Ss2_list = List (SS2) ss2_list.extend (['] * (L-len (SS2))) Ss1_list = List (SS1) else:l = Len (SS2) ss1_list = List (SS1) ss1_list.extend (['] * (L-len (SS1))) Ss2_list = List (SS2) return Ss1_list,ss2_list def get_df (loadfile_sheet,common_columns,concerned_column,disp_columns, display = True): #1.
Load Excel data = Pd.read_excel (loadfile_sheet[0],loadfile_sheet[1]) Common_set1 = set (Data[common_columns[0]) Common_set1.discard (NP. NaN) Common_set2 = set (Data[common_columns[1]]) Common_set2.discard (NP. NaN) Concerned_set = set (Data[concerned_column]) Concerned_set.discard (NP. NaN) #2. Statistic concerned_in_set_1 = set ([]) concerned_in_set_2 = set ([]) Concerned_in_no_set = set ([]For each in concerned_set:if the Common_set1:concerned_in_set_1.add (each) elif EAC h in Common_set2:concerned_in_set_2.add (each) Else:concerned_in_no_set.add #3 . Displays if Display:disp (concerned_in_set_1, ' \ n ' +disp_columns[0]+concerned_column) disp (concerned_in_set_2, ' \ n ' +disp_columns[1</