fromNumPyImportArray fromPyspark.mllib.regressionImportLabeledpoint fromPyspark.mllib.treeImportDecisionTree, Decisiontreemodel fromPysparkImportSPARKCONTEXTSC= Sparkcontext (appname="Pythondecisiontreeclassificationexample") Data=[Labeledpoint (0.0, [0.0]), Labeledpoint (1.0, [1.0]), Labeledpoint (0.0, [-2.0]), Labeledpoint (0.0, [-1.0]), Labeledpoint (0.0, [-3.0]), Labeledpoint (1.0, [4.0]), Labeledpoint (1.0, [4.5]), Labeledpoint (1.0, [4.9]), Labeledpoint (1.0, [3.0])]all_data=sc.parallelize (data) (Trainingdata, TestData)= All_data.randomsplit ([0.8, 0.2])#model = Decisiontree.trainclassifier (sc.parallelize (data), 2, {})Model = Decisiontree.trainclassifier (Trainingdata, numclasses=2, categoricalfeaturesinfo={}, impurity='Gini', Maxdepth=5, maxbins=32)Print(model)Print(Model.todebugstring ()) model.predict (Array ([1.0]) model.predict (Array ([0.0])) Rdd= Sc.parallelize ([[1.0], [0.0]]) model.predict (RDD). Collect () predictions= Model.predict (Testdata.map (Lambdax:x.features)) Labelsandpredictions= Testdata.map (LambdaLp:lp.label). zip (predictions) Testerr= Labelsandpredictions.filter (Lambda(V, p): V! = p). Count ()/Float (testdata.count ())Print('Test Error ='+str (testerr))Print('learned Classification Tree model:')Print(Model.todebugstring ())#Save and load ModelModel.save (SC,"./mydecisiontreeclassificationmodel") Samemodel= Decisiontreemodel.load (SC,"./mydecisiontreeclassificationmodel")
My spark Python decision tree Instance