This paper mainly introduces the algorithm of extracting frequent itemsets from Fp-tree.
See MORE: Fp-growth algorithm introduction, fp_growth algorithm python implementation, fp-growth algorithm Python implementation of Fp-tree construction.
TRee_mINeR.Py GenerationCode :
#coding =utf-8ImportTree_builderImportCopy class tree_miner(object): "" " Tree_miner class. Function: Mining "" "of the tree with frequent itemsets" "" def __init__(self, tree=none, min_sup=-1, headertable={}): Initialization of the "" " Tree_miner. The Tree is a constructed fp_tree, Min_sup is the minimum support count, Headertable is Fp_tree's head node table "" "Self.min_sup = Min_sup self.tree_mining (tree=tree, headertable=headertable) def tree_mining(self, tree, a=[], headertable={}): "" " function: recursive implementation of tree trees frequent itemsets mining. A equivalent to α,b in pseudo-code equivalent to beta " " "B = [] Allelem = {}#用来保存单个路径情况时, all nodes on the pathnode = Tree.root#node取得树的根节点 whileLen (Node.children) >0:#判断是否是单个路径 ifLen (node.children)! =1:#如果路径上的某个节点的孩子数不止一个, it is not a single path Breaknode = node.children.values () [0]#node取得下一个节点Allelem.setdefault (Node.data,node.count)#记录路径上的节点, if it is a single path, it will be used ifLen (Node.children) <1:#Tree只包含单个路径L = Self.getl (Items=allelem, Min_sup=self.min_sup, A=a)#L即为我们要求的频繁项集Self.showresult (L)#对结果进行输出 return Else: forIteminchHeadertable:#对于头结点表中的元素, find the frequent itemsets at its end, one by one ifA:#产生项目集B forEleminchA:ifElem! = []: temp = copy.copy (elem) b.append (temp) B.append ([item]+temp)Else: B.append ([item]) pattem,counts = Self.findpattembase (item, headertable)#得到以项item结尾的所以条件模式基, counts the count of stored conditional mode basesmyheadertable = {} Conditiontree_builder = Tree_builder. Tree_builder (Routines=pattem, counts=counts, headertable=myheadertable)#新建一个Tree_builder对象, use it to construct conditions Fp-tree ifConditionTree_builder.tree.root.children:#如果构造的条件FP-The tree is not emptySelf.tree_mining (Tree=conditiontree_builder.tree, a=b, headertable=myheadertable)#递归调用B = []return def findpattembase(self, item, headertable): "" " function: According to the tree's Head node table to search the tree item's conditional pattern base" ""Itempattem = []#存放项item的所有模式基Counts = []#存放模式基的计数addresstable = Headertable[item]#头节点表中item链上所以节点的地址 forItemnodeinchAddresstable:#对头结点表表中存放的每个item节点Iteminpattem = []#用来存放item模式基中的各项Nodeinpattem = Itemnode.parent#item模式基的项, use it to backtrack to the root, which is a pattern base ifNodeinpattem.data = =' null ':#如果父亲节点就是树根, you skip Continue whileNodeinpattem.data! =' null ':#如果还没到树根, it keeps backtracking.Iteminpattem.append (Nodeinpattem.data)#把它压进item的模式基Nodeinpattem = Nodeinpattem.parent#让当前节点跳到它的父亲节点, backtrackingIteminpattem = tuple (Iteminpattem) itempattem.append (Iteminpattem)#找完了一条item的模式基了Counts.append (Itemnode.count)#模式基的计数 returnItempattem,counts def showresult(self, result=[[]): "" function: To show the frequent itemsets to be mined "" " forEleminchResult:num = Elem.pop ()#频繁项集的计数 PrintTuple (Elem),': 'Numreturn def combiner(self, myList, n): "" " function: Arranges all the elements of the list list, generating n-tuples grouped together " ""Answers = [] one = [0] * N def next_c(li = 0, ni = 0): ifNI = = n:answers.append (copy.copy (one))return forLjinchXrange (Li, Len (myList)): One[ni] = Mylist[lj] Next_c (LJ +1, Ni +1) Next_c ()returnAnswers def findminimum(self, items, elem): "" " function: Find the minimum value" "for each count in the Elem list according to the items dictionaryMinimum = items[elem[0]] forAinchElemifItems[a] < minimum:#如果某元素的计数更小, the Count of it is recordedMinimum = Items[a]returnMinimum def getl(self, items, min_sup=-1, a=[]): "" function: Generate frequent Itemsets "" for a single-path-only treeTempresult = [] Finnalresult = [] nodes = Items.keys ()#取得items字典的键, which is all nodes on a single path forIinchRange1, Len (nodes) +1):#对nodes, that is, all nodes on the path generate various combinationsTempresult + = Self.combiner (Mylist=nodes, n=i) forEleminchtempresult[::-1]:#elem逆序对dearResult访问, because the next step is to delete the element and reverse the operation .Elemminimum = self.findminimum (items, elem)#找出elem里面的最小计数 ifElemminimum < min_sup:#如果组合elem的最小计数小于最小支持度计数, it is deleted.Tempresult.remove (Elem)Else:#否则把它压入结果列表中进行输出, but it is only a conditional pattern base, plus the last item to form a frequent itemsets, plus the minimum count forAeleminchA:#A可能含有多项 ifAelem:temp = Elem Temp + = Aelem temp.append (elemmini Mum) Finnalresult.append (temp)#将挖掘出的频繁项集保存在finnalResult列表 returnFinnalresult
Copyright NOTICE: Reprint Please indicate the source, thank you!
The mining of frequent itemsets implemented by fp-growth algorithm Python