Python Implementation Decision Tree

Source: Internet
Author: User
Tags id3

Python implements a pruning-free decision tree

ImportMathImportNumPy asNpImportPydot"""make your own decision tree: no pruning"""classNode:def __init__( Self, data): Self. attr= -1  # Partitioning properties on the current node         Self. Sons={}# The knot-point of the son's knot         Self. ans= None  # The answer to this node is only the leaf node, and this property determines if the leaf node         Self. Data=Data# Subscript List of data    defIs_leaf ( Self):return  Self. ans is  not None    def __str__( Self):return "node_cnt:{} {}".format(Len( Self. Data),"ans:%d" %  Self. ansif  Self. ansElse "")classDicisiontree:def __init__( Self, X, Y, Creteria="ID3"): Self. x=Np.array (x) Self. Y=Np.array (y)if  Self. x.dtype not inch(NP.int, Np.int64)or  Self. y.dtype not inch(NP.int, Np.int64):Raise Exception("The decision tree here can only handle integer types") Self. num_class= Len(Set(y)) Self. num_attr= Len(x[0]) Self. Creteria=Creteria Self. root=  Self. _build (List(Range(Len( Self. Y)),List(Range( Self. num_attr)))def_split ( Self, X, attr):# Separate the DataSet X by the value of the attribute attrXset={} forIinchX:v=  Self. x[i][attr]ifV not inchXSET:XSET[V]=[] Xset[v].append (i)returnXsetdef_buildtable ( Self, X, Attrs):# count elements by attribute, attribute value, category three dimensionsTable=[{} for_inch Range( Self. Num_attr)] forIinchX: forattrinch Range( Self. num_attr): V, c=  Self. X[i][attr], Self. Y[i]ifV not inchTABLE[ATTR]: Table[attr][v]={}ifC not inchTABLE[ATTR][V]: table[attr][v][c]= 0TABLE[ATTR][V][C]+= 1        returnTabledef_ID3 ( Self, table):# to find the value of ID3 according to the tableAloga= 0Rlogr= 0         forVinchTable:r= 0             forCinchTABLE[V]: Aloga+=TABLE[V][C]*Math.log (Table[v][c]) r+=TABLE[V][C] Rlogr+=R*Math.log (R)returnAloga-Rlogrdef_C45 ( Self, table, Tlogt, slogs): Aloga= 0Rlogr= 0         forVinchTable:r= 0             forCinchTABLE[V]: Aloga+=TABLE[V][C]*Math.log (Table[v][c]) r+=TABLE[V][C] Rlogr+=R*Math.log (R)return(Tlogt-Aloga)/(1 if Len(table)== 1 ElseRlogr-slogs)def_gini ( Self, table): Gain= 0         forVinchTable:a2= 0R= 0             forCinchTABLE[V]: A2+=TABLE[V][C]** 2R+=Table[v][c] Gain+=A2/RreturnGain# only C45 used slogs and Tlogt,id3 and Gini.    def_c45_tlogt ( Self, data): slogs= Len(data)*Math.log (Len(data)) Cnt={} forIinchData:y=  Self. Y[i]ifY not inchCnt:cnt[y]= 0Cnt[y]+= 1Tlogt= 0         forIinchCnt:tlogt+=Cnt[i]*Math.log (Cnt[i])returnTlogt, slogsdef_selectattr ( Self, X, Attrs):# Select PropertiesT=  Self. _buildtable (x, Attrs) ans_attr, Ans_gain= None,-0xfffff         forattrinchAttrs:if  Self. Creteria== "ID3": Gain=  Self. _ID3 (T[attr])elif  Self. Creteria== "Gini": Gain=  Self. _gini (T[attr])elif  Self. Creteria== "C45": Tlogt, Slogs=  Self. _C45_TLOGT (x) gain=  Self. _C45 (T[attr], Tlogt=Tlogt, slogs=slogs)Else:Raise Exception("Unkown creterial{},the 3 suported Creteria is Id3,c45,gini".format( Self. Creteria))ifAns_gain is None orGain>Ans_gain:ans_gain=Gain Ans_attr=attrreturnAns_attrdef_allsame ( Self, array): X=array[0] forIinchArrayifX!=Ireturn False        return True    def_build ( Self, data, Attrs): node=Node (data)if  Self. _allsame ( Self. Y[data])or  notAttrs:node.ans=  Self. y[data[0]]returnNode node.attr=  Self. _selectattr (data, attrs)# Print (node.attr, "Selected attr")Attrs.remove (node.attr) xset=  Self. _split (data, node.attr) forVinchXset.keys (): Node.sons[v]=  Self. _build (Xset[v], attrs) attrs.append (node.attr)# Revert properties back to parent node        returnNodedefPredict Self, data_x):def_predict_one (x): node=  Self. root while  notNode.is_leaf (): Value=X[NODE.ATTR]ifValueinchNode.sons:node=Node.sons[value]Else: Break            ifNode.is_leaf ():returnNode.ansreturn None  # No answers        returnNp.array (List(Map(_predict_one, data_x)))defGet_node_count ( Self):defDFS (node): CNT= 1             forIinchNode.sons:cnt+=DFS (Node.sons[i])returnCntreturnDfs Self. Root)defExport_graphviz ( Self): G=Pydot. Dot (Graph_type="digraph")defDFS (node, parent, label):if hasattr(DFS,"Nodeid"): Dfs.nodeid+= 1            Else: Dfs.nodeid= 0Me=Pydot. Node (Str(Dfs.nodeid), label=Str(node)) G.add_node (Me)ifParent is  not None: G.add_edge (Pydot. Edge (parent, ME, label=Label)) forKvinchNode.sons.items (): Dfs (V, ME,"attr{}={}".format(Node.attr, K)) Dfs Self. Root,None,"") G.write ("Haha.jpg", prog=' dot ',format="JPG")if __name__ == ' __main__ ':# Gain function selection: id3,gini,c45Gain_f= "ID3"X=Np.array ([[0,3,0], [0,2,1], [1,1,2], [1,2,2], [2,3,0], [2,1,1]]) y=Np.array ([0,0,1,2,0,1]) tree=Dicisiontree (x, y, gain_f) ans=Tree.predict (x)Print(ANS) CNT=Np.count_nonzero (y==AnsPrint(' correct number, correct rate ', CNT, CNT/ Len(x))Print(' number of uncertainties ',Len([1  forIinch Range(Len(ANS))ifAns[i]== ' not found ']))Print(' Total number of nodes ', Tree.get_node_count ())

Python Implementation Decision Tree

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.