Data set
House.csv
Data overview
Code
PackageORG.APACHE.SPARK.EXAMPLES.EXAMPLESFORMLImportOrg.apache.spark.ml.feature.VectorAssemblerImportorg.apache.spark.ml.regression.LinearRegressionImportorg.apache.spark.sql.SparkSessionImportOrg.apache.spark. {sparkconf, sparkcontext}ImportScala.util.Random/*Date: 2018.10.15 description: 7-6 linear regression algorithm forecast price data set: House.csv*/Object Linear {def main (args:array[string]): Unit={val conf=NewSparkconf (). Setmaster ("local[*]"). Setappname ("Linearregression") Val SC=Newsparkcontext (conf) Val Spark=sparksession.builder (). config (conf). Getorcreate () Val file=spark.read.format ("CSV"). Option ("Header", "true")//y. Option ("Sep", ";")//Separators. Load ("D:\\ machine learning algorithm prepares \\7-6 linear regression-Forecast rate \\house.csv") Importspark.implicits._ val Random=NewRandom () Val Data=file.select ("Square", "Price"). Map (Row= (Row.getas[string] (0). todouble,row.getas[string] (1). Todouble,random.nextdouble ()). TODF ("Square", "Price", "Rand"). Sort ("Rand") Data.show () Val Assembler=NewVectorassembler (). Setinputcols (Array ("Square"). Setoutputcol ("Features") Val DataSet=assembler.transform (data) var Array (train,test)=dataset.randomsplit (Array (0.8,0.2), 1234L) Train.show () println (Test.count ()) var regression=NewLinearregression (). Setmaxiter () Setregparam (0.3). Setelasticnetparam (0.8) Val Model=regression.setlabelcol ("Price"). Setfeaturescol ("Features"). Fit (train) model.transform (test). Show () Val s=model.summary.totalIterations println (s"ITER: ${s}") }}
Output:
Mastering Spark Machine Learning Library -07.6-linear regression to realize house price forecast