Public Static voidMain (string[] args) {sparkconf sparkconf=Newsparkconf (). Setappname ("Regression"). Setmaster ("Local[2]"); Javasparkcontext SC=NewJavasparkcontext (sparkconf); Javardd<String> data = Sc.textfile ("/home/yurnom/lpsa.txt"); Javardd<LabeledPoint> parseddata = Data.map (Line-{string[] parts= Line.split (","); Double[] ds = Arrays.stream (Parts[1].split ("") . Maptodouble (Double::p arsedouble). ToArray (); return NewLabeledpoint (Double.parsedouble (parts[0]), Vectors.dense (DS)); }). cache (); intnumiterations = 100;//Number of iterationsLinearregressionmodel model =Linearregressionwithsgd.train (Parseddata.rdd (), numiterations); Ridgeregressionmodel Model1=Ridgeregressionwithsgd.train (Parseddata.rdd (), numiterations); Lassomodel Model2=Lassowithsgd.train (Parseddata.rdd (), numiterations); Print (parseddata, model); Print (parseddata, model1); Print (parseddata, model2); //predicting a new data method Double[] D =New Double[]{1.0, 1.0, 2.0, 1.0, 3.0,-1.0, 1.0,-2.0}; Vector v=Vectors.dense (d); System.out.println (Model.predict (v)); System.out.println (Model1.predict (v)); System.out.println (Model2.predict (v));} Public Static voidPrint (javardd<labeledpoint>parseddata, Generalizedlinearmodel model) {Javapairrdd<double, double> valuesandpreds = Parseddata.maptopair (Point- { DoublePrediction = Model.predict (Point.features ());//predicting training data with models return NewTuple2<>(Point.label (), prediction); }); Double MSE= Valuesandpreds.maptodouble ((tuple2<double, double> t), Math.pow (T._1 ()-t._2 (), 2)). mean ();//calculates the mean of the squared value of the difference between the predicted value and the actual valueSystem.out.println (Model.getclass (). GetName () + "training Mean squared Error =" +MSE);} Run result Linearregressionmodel training Mean Squared Error= 6.206807793307759Ridgeregressionmodel Training Mean Squared Error= 6.416002077543526Lassomodel Training Mean Squared Error= 6.972349839013683prediction of linear:0.805390219777772prediction of Ridge:1.0907608111865237prediction of Lasso:0.18652645118913225
Test data:
-0.4307829,-1.63735562648104-2.00621178480549-1.86242597251066-1.02470580167082-0.522940888712441- 0.863171185425945-1.04215728919298-0.864466507337306
-0.1625189,-1.98898046126935-0.722008756122123-0.787896192088153-1.02470580167082-0.522940888712441- 0.863171185425945-1.04215728919298-0.864466507337306
-0.1625189,-1.57881887548545-2.1887840293994 1.36116336875686-1.02470580167082-0.522940888712441- 0.863171185425945 0.342627053981254-0.155348103855541
Reference:
Http://blog.selfup.cn/747.html
Linear regression of Spark mllib