Previously used Sqoop to complete data extraction from the generated HDFS data store to Oracle's database: Sqoop Extract statement:
Sqoop export--connect "Jdbc:oracle:thin: @ip:p ort:sid"--username user name--password password--table sid. Table name--export-dir Hdfs://na MESERVICE1/USER/XXX (HDFs address)--fields-terminated-by "\001"--null-non-string "--null-string" '-M 10;
由于项目需求我们现在要完成在代码中省城所需字段之后,直接回写到oracle中,因为数据量每天都很大,用实例或者List存有很大的局限性,可能会出现内存异常等不可预料的东西,所以我通过缓存器机制来存储数据,然后进行生成结果的临时表直接回写(后面做的hbase接口封装批量提交也比较类似) 废话不多说直接上代码: 1、建立缓存实体 package usi.java.oracle;
/**
- @author HK
@date 2011-2-15 06:45:57
*/
public class Cache {
Private String key;
private Object value;
Private long TimeOut;
private Boolean expired;
Public Cache () {
Super ();
}
Public Cache (String key, String value, Long TimeOut, Boolean expired) {
This.key = key;
This.value = value;
This.timeout = TimeOut;
this.expired = expired;
}
Public String GetKey () {
Return key;
}
Public long gettimeout () {
return timeOut;
}
Public Object GetValue () {
return value;
}
public void Setkey (string string) {
key = string;
}
public void SetTimeOut (long l) {
TimeOut = l;
}
public void SetValue (Object object) {
Value = object;
}
public boolean isexpired () {
return expired;
}
public void setexpired (Boolean b) {
expired = b;
}
}
2. Setting up the cache controller
Package usi.java.oracle;
Import Java.util.Date;
Import Java.util.HashMap;
/**
}
3, establish the need to export data objects
Package usi.java.oracle;
Public class Taskall {
private String Mme_eid;
Private String Mme_editor;
Private String Entitytype_eid;
Private String Project_eid;
Private String Resource_eid;
Public String Getmme_eid () {
return mme_eid;
}
public void Setmme_eid (String mme_eid) {
This.mme_eid = Mme_eid;
}
Public String Getmme_editor () {
return mme_editor;
}
public void Setmme_editor (String mme_editor) {
This.mme_editor = mme_editor;
}
Public String Getentitytype_eid () {
return entitytype_eid;
}
public void Setentitytype_eid (String entitytype_eid) {
This.entitytype_eid = Entitytype_eid;
}
Public String Getproject_eid () {
return project_eid;
}
public void Setproject_eid (String project_eid) {
This.project_eid = Project_eid;
}
Public String Getresource_eid () {
return resource_eid;
}
public void Setresource_eid (String resource_eid) {
This.resource_eid = Resource_eid;
}
}
5, the execution of logical body, write back data, batch submission
Package usi.java.oracle;
Import java.sql.Connection;
Import Java.sql.DriverManager;
Import java.sql.PreparedStatement;
Import Java.sql.ResultSet;
Import java.util.List;
Import org.apache.spark.SparkConf;
Import Org.apache.spark.SparkContext;
Import Org.apache.spark.sql.DataFrame;
Import Org.apache.spark.sql.Row;
Import Org.apache.spark.sql.hive.HiveContext;
public class Redict_to_171ora {
public static void Main (string[] args) {
sparkconf sc = new sparkconf (). Setappname ("Redict_to_171ora");
Sparkcontext JSC = new Sparkcontext (SC);
Hivecontext HC = new Hivecontext (JSC);
String hivesql1= "Select T.mme_eid,t.mme_editor,t.entitytype_eid,t.project_eid,t.resource_eid from Usi_odso.c_ Taskall t limit 150000 ";
DataFrame redict_to_171ora= Hc.sql (HIVESQL1); Redict_to_171ora.registertemptable ("HIVESQL1"); List<row> Collect=redict_to_171ora.javardd (). Collect (); int o=0; for (Row lists:collect) {taskall task=new taskall (); Task.setmme_eid (lists.getstring (0)); Task.setmme_editor (lists.getstring (1)); Task.setentitytype_eid (lists.getstring (2)); Task.setproject_eid (Lists.getstring (3)); Task.setresource_eid (Lists.getstring (4)); Cachemanager.putcontent (o+ "", Task, 30000000); o++; /* SYSTEM.OUT.PRINTLN (Lists.size ()); System.out.println (lists.getstring (0)); System.out.println (lists.getstring (1)); System.out.println (lists.getstring (2)); System.out.println (Lists.getstring (3)); System.out.println (Lists.getstring (4)); */} SYSTEM.OUT.PRINTLN (O); Connection con = null;//Create a database connection PreparedStatement pre = null;//Create a precompiled statement object, usually with this instead of statement//resultset result = null;//CreatedA result set object try {class.forname ("oracle.jdbc.driver.OracleDriver");//Load the Oracle driver System. Out.println ("Start trying to connect to the database!") "); String url = "Jdbc:oracle:" + "thin: @ip: 1521:sid";//127.0.0.1 is a native address, XE is the default database name for Lite Oracle String user = "user";// Username, system default account name String password = "password";//The password you set to install is con = drivermanager.getconnection (URL, user , password);//Get Connection System.out.println ("Connection succeeded! "); String sql = "INSERT into C_taskall_test (Mme_eid,mme_editor,entitytype_eid,project_eid,resource_eid) VALUES (?,?,?,?, ?)";/ /precompiled statement, "? "represents the parameter pre = con.preparestatement (SQL);//instantiates the precompiled statement for (int i=0;i<o;i++) {//for (Row lis Ts:collect) {//String sql = "INSERT INTO C_taskall_test (mme_eid,mme_editor,entitytype_eid,project _eid,resource_eid) VALUES (' "+task.getmme_eid () +" ', ' "+task.getmme_editor () +" ', ' "+task.getentitytype_eid () +" ', ' "+ Task.getproject_eid () + "', '" +task.getresOurce_eid () + "')";//Pre-compiled statements, "? "Delegate parameter//pre.setstring (1," Samsung ");//Set parameter, preceding 1 indicates index of parameter, not index of column name in table Taskall task= (Taskall) cachemanager.getcontent ("" +i). GetValue (); Pre.setstring (1, Task.getmme_eid ()); Pre.setstring (2, Task.getmme_editor ()); Pre.setstring (3, Task.getentitytype_eid ()); Pre.setstring (4, Task.getproject_eid ()); Pre.setstring (5, Task.getresource_eid ()); Pre.addbatch (); if (i%20000==0) {//can be set in different sizes, such as 50,100,500,1000 etc. pre.executebatch (); Con.commit (); Pre.clearbatch (); System.out.println (Value of "I" +i); }//result = Pre.executequery ();//execute the query, note that no additional arguments are required in brackets} pre.executebatch (); Con.commit (); Pre.clearbatch (); System.out.println (Value of "I" +i); /* if (result! = NULL) Result.close (); */if (pre! = null) pre.close (); /* while (Result.next ())//When the result set is not empty System.out.println ("Usernum:" + result.getstring ("Usern Um ") +" Flow: "+ result.getstring (" Flow ")); */} catch (Exception e) { E.printstacktrace (); Finally {try {//} closes several of the above objects, because if you do not close it will affect performance and consume resources/ Note The order of closing, the last used first close/* if (result! = null) result.close (); */if (pre! = nul L) pre.close (); if (con! = null) con.close (); SYSTEM.OUT.PRINTLN ("The database connection is closed! "); } catch (Exception e) {e.printstacktrace (); } } }
}
Direct write-back of the Sparksql data stream directly with CacheManager without using the Sqoop process