Package cn.spark.study.core.mycode_dataFrame;
Import Java.sql.DriverManager;
Import java.util.ArrayList;
Import Java.util.HashMap;
Import java.util.List;
Import Java.util.Map;
Import org.apache.spark.SparkConf;
Import Org.apache.spark.api.java.JavaPairRDD;
Import Org.apache.spark.api.java.JavaRDD;
Import Org.apache.spark.api.java.JavaSparkContext;
Import org.apache.spark.api.java.function.Function;
Import org.apache.spark.api.java.function.PairFunction;
Import org.apache.spark.api.java.function.VoidFunction;
Import Org.apache.spark.sql.DataFrame;
Import Org.apache.spark.sql.Row;
Import Org.apache.spark.sql.RowFactory;
Import Org.apache.spark.sql.SQLContext;
Import Org.apache.spark.sql.types.DataTypes;
Import Org.apache.spark.sql.types.StructField;
Import Org.apache.spark.sql.types.StructType;
Import java.sql.Connection;
Import java.sql.Statement;
Import Scala. Tuple2;
public class Jdbcdatasource {
public static void Main (string[] args) {
sparkconf conf = new sparkconf ()
. Setappname ("Jdbcdatasource")
. Setmaster ("local")
;
Javasparkcontext sc = new Javasparkcontext (conf);
SqlContext sqlcontext = new SqlContext (SC);
map<string,string> options = new hashmap<string,string> ();
Options.put ("url", "Jdbc:mysql://127.0.0.1:3306/testdb");
Options.put ("DBTable", "Student_infos");
Loading table information
DataFrame studentinfosdf = Sqlcontext.read (). Format ("JDBC")
. options (Options). Load ();
Options.put ("DBTable", "student_scores");
DataFrame STUDENTSCOREDF = Sqlcontext.read (). Format ("JDBC")
. options (Options). Load ();
Javapairrdd<string, Tuple2<integer, integer>> Result_rdd = Studentinfosdf.javardd (). MapToPair (New Pairfunction<row, String, integer> () {
@Override
Public tuple2<string, integer> call (Row row) throws Exception {
return new tuple2<string, integer> (row.getstring (0), integer.valueof (String.valueof (Row.get (1)));
}
})
. Join (Studentscoredf.javardd (). Maptopair (New Pairfunction<row, String, integer> () {
@Override
Public tuple2<string, integer> call (Row row) throws Exception {
return new tuple2<string, integer> (row.getstring (0), integer.valueof (String.valueof (Row.get (1)));
}
}));
Result_rdd.foreach (New voidfunction<tuple2<string,tuple2<integer,integer>>> () {
//
@Override
public void Call (Tuple2<string, Tuple2<integer, integer>> tuple)
Throws Exception {
System.out.println (tuple._1 + ":" + tuple._2._1 + ":" + tuple._2._2);
//}
//});
Javardd<row> result_rdd_row= Result_rdd.map (New function<tuple2<string,tuple2<integer,integer> Row> () {
@Override
Public Row Call (tuple2<string, Tuple2<integer, integer>> tuple)
Throws Exception {
Return Rowfactory.create (tuple._1,tuple._2._1,tuple._2._2);
}
});
list<structfield> list = new arraylist<structfield> ();
List.add (Datatypes.createstructfield ("name", Datatypes.stringtype, true));
List.add (Datatypes.createstructfield ("Age", Datatypes.integertype, true));
List.add (Datatypes.createstructfield ("Score", Datatypes.integertype, true));
Structtype st = Datatypes.createstructtype (list);
DataFrame RESULT_DF = Sqlcontext.createdataframe (Result_rdd_row, ST);
Result_df.javardd (). foreach (new voidfunction<row> () {
Private static final long serialversionuid = 1L;
@Override
public void call (Row row) throws Exception {
String sql = "INSERT into Good_student_infos values ("
+ "'" + string.valueof (row.getstring (0)) + "',"
+ integer.valueof (string.valueof (Row.get (1)) + ","
+ integer.valueof (string.valueof (Row.get (2)) + ")";
SYSTEM.OUT.PRINTLN ("sql:" + SQL);
Class.forName ("Com.mysql.jdbc.Driver");
Connection conn = null;
Statement stmt = null;
try {
conn = Drivermanager.getconnection ("Jdbc:mysql://127.0.0.1:3306/testdb", "", "" ");
stmt = Conn.createstatement ();
Stmt.execute (SQL);
} catch (Exception e) {
E.printstacktrace ();
}finally
{
if (stmt! = null)
{
Stmt.close ();
}
IF (conn! = null)
{
Conn.close ();
}
}
}
});
}
}
Database Preparation
--Create TABLE Student_info (name VARCHAR (), age INTEGER);
--Create TABLE Studnet_scores (name VARCHAR (), score INTEGER);
--INSERT into student_info values ("Leo"), ("Marry", +), ("Jack", 19)
--INSERT into student_scores values ("Leo", Marry), ("Jack", 60)
--ALTER TABLE student_info RENAME to Student_infos;
--Create TABLE Good_student_infos (name VARCHAR (), age Integer,score INTEGER)
The resulting table
Spark parses SQL content into the SQL table