Python implements a pruning-free decision tree
ImportMathImportNumPy asNpImportPydot"""make your own decision tree: no pruning"""classNode:def __init__( Self, data): Self. attr= -1 # Partitioning properties on the current node Self. Sons={}# The knot-point of the son's knot Self. ans= None # The answer to this node is only the leaf node, and this property determines if the leaf node Self. Data=Data# Subscript List of data defIs_leaf ( Self):return Self. ans is not None def __str__( Self):return "node_cnt:{} {}".format(Len( Self. Data),"ans:%d" % Self. ansif Self. ansElse "")classDicisiontree:def __init__( Self, X, Y, Creteria="ID3"): Self. x=Np.array (x) Self. Y=Np.array (y)if Self. x.dtype not inch(NP.int, Np.int64)or Self. y.dtype not inch(NP.int, Np.int64):Raise Exception("The decision tree here can only handle integer types") Self. num_class= Len(Set(y)) Self. num_attr= Len(x[0]) Self. Creteria=Creteria Self. root= Self. _build (List(Range(Len( Self. Y)),List(Range( Self. num_attr)))def_split ( Self, X, attr):# Separate the DataSet X by the value of the attribute attrXset={} forIinchX:v= Self. x[i][attr]ifV not inchXSET:XSET[V]=[] Xset[v].append (i)returnXsetdef_buildtable ( Self, X, Attrs):# count elements by attribute, attribute value, category three dimensionsTable=[{} for_inch Range( Self. Num_attr)] forIinchX: forattrinch Range( Self. num_attr): V, c= Self. X[i][attr], Self. Y[i]ifV not inchTABLE[ATTR]: Table[attr][v]={}ifC not inchTABLE[ATTR][V]: table[attr][v][c]= 0TABLE[ATTR][V][C]+= 1 returnTabledef_ID3 ( Self, table):# to find the value of ID3 according to the tableAloga= 0Rlogr= 0 forVinchTable:r= 0 forCinchTABLE[V]: Aloga+=TABLE[V][C]*Math.log (Table[v][c]) r+=TABLE[V][C] Rlogr+=R*Math.log (R)returnAloga-Rlogrdef_C45 ( Self, table, Tlogt, slogs): Aloga= 0Rlogr= 0 forVinchTable:r= 0 forCinchTABLE[V]: Aloga+=TABLE[V][C]*Math.log (Table[v][c]) r+=TABLE[V][C] Rlogr+=R*Math.log (R)return(Tlogt-Aloga)/(1 if Len(table)== 1 ElseRlogr-slogs)def_gini ( Self, table): Gain= 0 forVinchTable:a2= 0R= 0 forCinchTABLE[V]: A2+=TABLE[V][C]** 2R+=Table[v][c] Gain+=A2/RreturnGain# only C45 used slogs and Tlogt,id3 and Gini. def_c45_tlogt ( Self, data): slogs= Len(data)*Math.log (Len(data)) Cnt={} forIinchData:y= Self. Y[i]ifY not inchCnt:cnt[y]= 0Cnt[y]+= 1Tlogt= 0 forIinchCnt:tlogt+=Cnt[i]*Math.log (Cnt[i])returnTlogt, slogsdef_selectattr ( Self, X, Attrs):# Select PropertiesT= Self. _buildtable (x, Attrs) ans_attr, Ans_gain= None,-0xfffff forattrinchAttrs:if Self. Creteria== "ID3": Gain= Self. _ID3 (T[attr])elif Self. Creteria== "Gini": Gain= Self. _gini (T[attr])elif Self. Creteria== "C45": Tlogt, Slogs= Self. _C45_TLOGT (x) gain= Self. _C45 (T[attr], Tlogt=Tlogt, slogs=slogs)Else:Raise Exception("Unkown creterial{},the 3 suported Creteria is Id3,c45,gini".format( Self. Creteria))ifAns_gain is None orGain>Ans_gain:ans_gain=Gain Ans_attr=attrreturnAns_attrdef_allsame ( Self, array): X=array[0] forIinchArrayifX!=Ireturn False return True def_build ( Self, data, Attrs): node=Node (data)if Self. _allsame ( Self. Y[data])or notAttrs:node.ans= Self. y[data[0]]returnNode node.attr= Self. _selectattr (data, attrs)# Print (node.attr, "Selected attr")Attrs.remove (node.attr) xset= Self. _split (data, node.attr) forVinchXset.keys (): Node.sons[v]= Self. _build (Xset[v], attrs) attrs.append (node.attr)# Revert properties back to parent node returnNodedefPredict Self, data_x):def_predict_one (x): node= Self. root while notNode.is_leaf (): Value=X[NODE.ATTR]ifValueinchNode.sons:node=Node.sons[value]Else: Break ifNode.is_leaf ():returnNode.ansreturn None # No answers returnNp.array (List(Map(_predict_one, data_x)))defGet_node_count ( Self):defDFS (node): CNT= 1 forIinchNode.sons:cnt+=DFS (Node.sons[i])returnCntreturnDfs Self. Root)defExport_graphviz ( Self): G=Pydot. Dot (Graph_type="digraph")defDFS (node, parent, label):if hasattr(DFS,"Nodeid"): Dfs.nodeid+= 1 Else: Dfs.nodeid= 0Me=Pydot. Node (Str(Dfs.nodeid), label=Str(node)) G.add_node (Me)ifParent is not None: G.add_edge (Pydot. Edge (parent, ME, label=Label)) forKvinchNode.sons.items (): Dfs (V, ME,"attr{}={}".format(Node.attr, K)) Dfs Self. Root,None,"") G.write ("Haha.jpg", prog=' dot ',format="JPG")if __name__ == ' __main__ ':# Gain function selection: id3,gini,c45Gain_f= "ID3"X=Np.array ([[0,3,0], [0,2,1], [1,1,2], [1,2,2], [2,3,0], [2,1,1]]) y=Np.array ([0,0,1,2,0,1]) tree=Dicisiontree (x, y, gain_f) ans=Tree.predict (x)Print(ANS) CNT=Np.count_nonzero (y==AnsPrint(' correct number, correct rate ', CNT, CNT/ Len(x))Print(' number of uncertainties ',Len([1 forIinch Range(Len(ANS))ifAns[i]== ' not found ']))Print(' Total number of nodes ', Tree.get_node_count ())
Python Implementation Decision Tree