1.people.txt:
Soyo8, 35
Small week, 30
Xiao Hua, 19
soyo,88
/** * Created by Soyo on 17-10-10. * Define RDD Mode programmatically*/Import Org.apache.spark.sql.types._import Org.apache.spark.sql. {Row, sparksession}Objectrdd_to_dataframe2 {def main (args:array[string]): Unit={val Spark=Sparksession.builder (). Getorcreate () Val Peoplerdd=spark.sparkcontext.textfile ("file:///home/soyo/Desktop/spark Programming test data/people.txt") Val schema_s="name Age"Val Fields=schema_s.split (" "). Map (X=>structfield (x,stringtype,nullable =true)) //Build ModeVal schema=structtype (Fields) Val Rowrdd=peoplerdd.map (_.split (","). Map (X=>row (x (0), X (1). Trim) Val Peopledf=spark.createdataframe (Rowrdd,schema) Peopledf.createorreplacetempview ("People2") Val Results=spark.sql ("SELECT * from People2") Results.show () Results.groupby (" Age"). Count (). Show ()}}
Results:
+-----+---+
| name|age|
+-----+---+
|soyo8| 35|
| Small week | 30|
| Xiao Hua | 19|
| soyo| 88|
+-----+---+
+---+-----+
|age|count|
+---+-----+
| 30| 1|
| 35| 1|
| 19| 1|
| 88| 1|
+---+-----+
Spark SQL in RDD conversion to DataFrame (method two)