Before you learn any spark technology, be sure to understand spark correctly, as a guide: understanding spark correctly
Here is the use of the Spark RDD Java API to read data from a relational database using a derby local database, which can be a relational database such as MySQL or Oracle:
package com.twq.javaapi.java7;import org.apache.spark.api.java.javardd;import Org.apache.spark.api.java.javasparkcontext;import org.apache.spark.api.java.function.function;import org.apache.spark.rdd.jdbcrdd;import java.io.serializable;import java.sql.*;p Ublic class javajdbcrddsuite implements serializable { public static void preparedata () throws ClassNotFoundException, SQLException { //using local Database Derby, of course, you can use relational database such as MySQL class.forname ("Org.apache.derby.jdbc.EmbeddedDriver"); Connection connection = drivermanager.getconnection ("Jdbc:derby:target/javajdbcrddsuitedb;create=true"); try&nbSp { //Creates a table Foo,id is a self-increasing primary key, and data is an integer column Statement create = Connection.createstatement (); Create.execute ( "Create table foo (" + "id integer not null generated always as identity ( start with 1, increment by 1), " + "Data integer)"); create.close (); //Inserting Data preparedstatement insert = connection.preparestatement ("insert Into foo (DATA) values (?) "); for (int i = 1; i <= 5; i++) { insert.setint (1, i * 2); insert.executeupdate (); } insert.close (); } catch (SQLExceptIon e) { // If TABLE&NBSP;DOESN ' T exist... if (E.getsqlstate () compareTo ("X0y32") != 0) { throw e; } } finally { connection.close (); } } public static void SHUTDOWNDB () throws sqlexception { try { drivermanager.getconnection ("JDBC: Derby:target/javajdbcrddsuitedb;shutdOwn=true "); } catch (SQLException e) { // Throw if not normal single database shutdown // https://db.apache.org/derby/docs/10.2/ref/rrefexcept71493.html if (E.getsqlstate () compareTo ("08006") != 0) { throw e; } } } public static void Main (String[] args) throws Exception { javasparkcontext sc = new  Javasparkcontext ("local", "Javaapisuite"); //preparing data preparedata (); // Build jdbcrdd javardd<integer> rdd = Jdbcrdd.create ( sc, new jdbcrdd.connectionfactory () { @Override public Connection Getconnection () throws SQLException { &nbSp; return drivermanager.getconnection ("jdbc:derby:target/ Javajdbcrddsuitedb "); } }, "select data from foo where ? <= id and id <= ? ", 1, 5, 1, new Function<ResultSet, Integer> () { @Override public integer call (ResultSet &NBSP;R) throws Exception { return r.getint (1); } } ); //results: [2, 4, 6, 8, 10] system.out.println ( Rdd.collect ()); shutdowndb (); sc.stop (); }}
For more information about the RDD API, you can refer to the following: Spark core RDD API Rationale
spark2.x deep into the end series six of the RDD Java API with Jdbcrdd read relational database