#-*-coding:utf-8-*-Importsysreload (SYS) sys.setdefaultencoding ("UTF8")defLoad_data_set (): Data_set= [ ['Beer','Baby Diapers','Shorts'] , ['Baby Diapers','Shorts'] , ['Baby Diapers','Milk'] , ['Beer','Baby Diapers','Shorts'] , ['Beer','Milk'] , ['Baby Diapers','Milk'] , ['Beer','Milk'] , ['Beer','Baby Diapers','Milk','Shorts'] , ['Beer','Baby Diapers','Milk'] ] returnData_setdefcreate_c1 (data_set): C1=set () forTinchData_set: forIteminchT:item_set=Frozenset ([item]) C1.add (item_set)returnC1defIs_apriori (Ck_item, Lksub1): forIteminchCk_item:sub_ck= Ck_item-Frozenset ([item])ifSub_ck not inchLksub1:returnFalsereturnTruedefCreate_ck (Lksub1, k): Ck=set () Len_lksub1=Len (Lksub1) List_lksub1=list (LKSUB1) forIinchRange (LEN_LKSUB1): forJinchRange (1, len_lksub1): L1=list (List_lksub1[i]) L2=list (List_lksub1[j]) L1.sort () L2.sort ()ifL1[0:k-2] = = L2[0:k-2]: Ck_item= List_lksub1[i] |List_lksub1[j]ifIs_apriori (Ck_item, Lksub1): Ck.add (Ck_item)returnCkdefGenerate_lk_by_ck (Data_set, Ck, Min_support, support_data): Lk=set () Item_count= {} forTinchData_set: forIteminchCk:ifItem.issubset (t):ifItem not inchItem_count:item_count[item]= 1Else: Item_count[item]+ = 1T_num=float (len (data_set)) forIteminchItem_count:if(Item_count[item]/t_num) >=Min_support:Lk.add (item) Support_data[item]= Item_count[item]/T_numreturnLkdefgenerate_l (Data_set, K, min_support): Support_data={} C1=create_c1 (data_set) L1=Generate_lk_by_ck (Data_set, C1, Min_support, support_data) Lksub1=l1.copy () L=[] L.append (LKSUB1) forIinchRange (2, k+1): Ci=Create_ck (Lksub1, i) Li=Generate_lk_by_ck (Data_set, Ci, Min_support, support_data) Lksub1=li.copy () l.append (LKSUB1)returnL, Support_datadefgenerate_big_rules (L, Support_data, min_conf): Big_rule_list=[] sub_set_list= [] forIinchRange (0, Len (L)): forFreq_setinchL[i]: forSub_setinchsub_set_list:ifSub_set.issubset (Freq_set): Conf= Support_data[freq_set]/support_data[freq_set-Sub_set] Big_rule= (Freq_set-Sub_set, Sub_set, conf)ifConf >= min_conf andBig_rule not inchbig_rule_list:big_rule_list.append (big_rule) sub_set_list.append (freq_set) returnbig_rule_listif __name__=="__main__": """Test"""Data_set=Load_data_set () L, Support_data= Generate_l (Data_set, k=3, min_support=0.2) Big_rules_list= Generate_big_rules (L, Support_data, min_conf=0.7) forLkinchL:Print "="*50Print "Frequent"+ STR (len list (Lk) [0]) +"-itemsets\t\tsupport" Print "="*50 forFreq_setinchLk:PrintFreq_set, Support_data[freq_set]Print Print "Big Rules" forIteminchbig_rules_list:PrintItem[0],"=", Item[1],"conf:", Item[2]
Output Result:
Python Implements association rule analysis Apriori algorithm