The data of the RDD is written to the MySQL database via the spark SQL External-Data Sources JDBC implementation.
Jdbc.scala Important API Description:
/*** Save This RDD to a JDBC database at ' url ' under the Table name ' table '. * This would run a ' CREATE table ' and a BuNC H of ' INSERT into ' statements. * If you pass ' true ' for ' allowexisting ', it'll drop any table with the * given name; If you pass ' false ', it'll throw if the table already * exists. */def createjdbctable(url:string, table:string, Allowexisting:boolean)/*** Save This RDD to a JDBC database at ' url ' under the Table name ' table '. * Assumes the table already exists and have A compatible schema. IF You * pass ' true ' for ' overwrite ', it'll ' TRUNCATE ' the table before * performing the ' INSERT '. * The table must a Lready exist on the database. It must has a schema * that's compatible with the schema of this RDD; Inserting the rows of * the RDD in order via the simple statement * ' INSERT into table VALUES (?,?, ...,?) ' should not fail. */def Insertintojdbc(url:string, table:string, Overwrite:boolean)
Import org.apache.spark.sql.SQLContext Import Org.apache.spark.sql.Rowimport Org.apache.spark.sql.types._val SqlContext=New SqlContext (SC) Import sqlcontext._# data preparation Val URL="Jdbc:mysql://hadoop000:3306/Test?User=Root&Password=Root "Val arr2x2=Array[Row](Row.apply ("Dave", the), row.apply ("Mary",222)) Val arr1x2=Array[Row](Row.apply ("Fred",3)) Val schema2=Structtype (Structfield ("name", StringType):: Structfield ("id", Integertype):: Nil) Val arr2x3=Array[Row](Row.apply ("Dave", the,1), row.apply ("Mary",222,2)) Val schema3=Structtype (Structfield ("name", StringType):: Structfield ("id", Integertype):: Structfield ("seq", Integertype):: Nil) Import Org.apache.spark.sql.jdbc._================================CREATE======================================Val Srdd=Sqlcontext.applyschema (Sc.parallelize (arr2x2), schema2)srdd.createjdbctable (URL, "person", false) Sqlcontext.jdbcrdd (URL, "person"). Collect.foreach (println)[dave,42][mary,222]==============================CREATE withOverwrite========================================Val Srdd=Sqlcontext.applyschema (Sc.parallelize (arr2x3), schema3) srdd.createjdbctable (URL, "Person2", false) Sqlcontext.jdbcrdd (URL, "Person2"). Collect.foreach (println)[mary,222,2][dave,42,1]Val srdd2=Sqlcontext.applyschema (Sc.parallelize (arr1x2), schema2)srdd2.createjdbctable (URL, "Person2", True )sqlcontext.jdbcrdd (URL, "Person2"). Collect.foreach (println)[fred,3]================================CREATE Then INSERT toAppend======================================Val Srdd=Sqlcontext.applyschema (Sc.parallelize (arr2x2), Schema2) Val srdd2=Sqlcontext.applyschema (Sc.parallelize (arr1x2), schema2)srdd.createjdbctable (URL, "Person3", false) Sqlcontext.jdbcrdd (URL, "Person3"). Collect.foreach (println)[mary,222][dave,42]SRDD2.INSERTINTOJDBC (URL, " Person3", false)Sqlcontext.jdbcrdd (URL, "Person3"). Collect.foreach ( println)[mary,222][dave,42][fred,3]================================CREATE Then INSERT to truncate======================================Val Srdd=Sqlcontext.applyschema (Sc.parallelize (arr2x2), Schema2) Val srdd2=Sqlcontext.applyschema (Sc.parallelize (arr1x2), schema2)srdd.createjdbctable (URL, "Person4", false) Sqlcontext.jdbcrdd (URL, "Person4"). Collect.foreach (println)[dave,42][mary,222]srdd2.insertintojdbc (URL, "Person4", true)[fred,3]================================IncompatibleINSERT toAppend======================================Val Srdd=Sqlcontext.applyschema (Sc.parallelize (arr2x2), Schema2) Val srdd2=Sqlcontext.applyschema (Sc.parallelize (arr2x3), schema3)srdd.createjdbctable (URL, "Person5", false) SRDD2.INSERTINTOJDBC (URL, "Person5", True) java.sql.SQLException:Column Countdoesn't match value count at row 1
Spark SQL External Data Sources JDBC Official implementation write test