Direct write-back of the Sparksql data stream directly with CacheManager without using the Sqoop process

Source: Internet
Author: User
Tags object object sqoop

Previously used Sqoop to complete data extraction from the generated HDFS data store to Oracle's database: Sqoop Extract statement:
Sqoop export--connect "Jdbc:oracle:thin: @ip:p ort:sid"--username user name--password password--table sid. Table name--export-dir Hdfs://na MESERVICE1/USER/XXX (HDFs address)--fields-terminated-by "\001"--null-non-string "--null-string" '-M 10;

 由于项目需求我们现在要完成在代码中省城所需字段之后,直接回写到oracle中,因为数据量每天都很大,用实例或者List存有很大的局限性,可能会出现内存异常等不可预料的东西,所以我通过缓存器机制来存储数据,然后进行生成结果的临时表直接回写(后面做的hbase接口封装批量提交也比较类似) 废话不多说直接上代码: 1、建立缓存实体 package usi.java.oracle;

/**

    • @author HK
    • @date 2011-2-15 06:45:57
      */
      public class Cache {
      Private String key;
      private Object value;
      Private long TimeOut;
      private Boolean expired;
      Public Cache () {
      Super ();
      }

      Public Cache (String key, String value, Long TimeOut, Boolean expired) {
      This.key = key;
      This.value = value;
      This.timeout = TimeOut;
      this.expired = expired;
      }

      Public String GetKey () {
      Return key;
      }

      Public long gettimeout () {
      return timeOut;
      }

      Public Object GetValue () {
      return value;
      }

      public void Setkey (string string) {
      key = string;
      }

      public void SetTimeOut (long l) {
      TimeOut = l;
      }

      public void SetValue (Object object) {
      Value = object;
      }

      public boolean isexpired () {
      return expired;
      }

      public void setexpired (Boolean b) {
      expired = b;
      }
      }

2. Setting up the cache controller
Package usi.java.oracle;

Import Java.util.Date;
Import Java.util.HashMap;

/**

  • @author HK
  • @date 2011-2-15 09:40:00
    */
    public class CacheManager {

    private static HashMap Cachemap = new HashMap ();

    /**

    • This was singleton so private constructor is used.
      */
      Private CacheManager () {
      Super ();
      }

      /**

    • Returns cache item from HashMap
    • @param key
    • @return Cache
      */
      Private synchronized static Cache GetCache (String key) {
      Return (Cache) cachemap.get (key);
      }

      /**

    • Looks at the HashMap if a cache item exists or not
    • @param key
    • @return Cache
      */
      Private synchronized Static Boolean Hascache (String key) {
      return Cachemap.containskey (key);
      }

      /**

    • Invalidates all cache
      */
      Public synchronized static void Invalidateall () {
      Cachemap.clear ();
      }

      /**

    • Invalidates a single cache item
    • @param key
      */
      Public synchronized static void invalidate (String key) {
      Cachemap.remove (key);
      }

      /**

    • Adds new Item to cache HashMap
    • @param key
    • @return Cache
      */
      Private synchronized static void Putcache (String key, Cache object) {
      Cachemap.put (Key, object);
      }

      /**

    • Reads a cache item ' s content
    • @param key
    • @return
      */
      public static Cache getcontent (String key) {
      if (Hascache (key)) {
      Cache cache = GetCache (key);
      if (cacheexpired (cache)) {
      Cache.setexpired (TRUE);
      }
      return cache;
      } else {
      return null;
      }
      }

      /**

    • @param key
    • @param content
    • @param ttl
      */
      public static void Putcontent (String key, Object content, long TTL) {
      Cache cache = new cache ();
      Cache.setkey (key);
      Cache.setvalue (content);
      Cache.settimeout (ttl + new Date (). GetTime ());
      Cache.setexpired (FALSE);
      Putcache (key, cache);
      }

      /* @modelguid {172828d6-3ab2-46c4-96e2-e72b34264031} /
      private static Boolean cacheexpired (cache cache) {
      if (cache = = null) {
      return false;
      }
      Long Milisnow = new Date (). GetTime ();
      Long Milisexpire = Cache.gettimeout ();
      if (Milisexpire < 0) {//Cache never Expires
      return false;
      } else if (Milisnow >= milisexpire) {
      return true;
      } else {
      return false;
      }
      }

}

3, establish the need to export data objects
Package usi.java.oracle;

Public class Taskall {
private String Mme_eid;
Private String Mme_editor;
Private String Entitytype_eid;
Private String Project_eid;
Private String Resource_eid;
Public String Getmme_eid () {
return mme_eid;
}
public void Setmme_eid (String mme_eid) {
This.mme_eid = Mme_eid;
}
Public String Getmme_editor () {
return mme_editor;
}
public void Setmme_editor (String mme_editor) {
This.mme_editor = mme_editor;
}
Public String Getentitytype_eid () {
return entitytype_eid;
}
public void Setentitytype_eid (String entitytype_eid) {
This.entitytype_eid = Entitytype_eid;
}
Public String Getproject_eid () {
return project_eid;
}
public void Setproject_eid (String project_eid) {
This.project_eid = Project_eid;
}
Public String Getresource_eid () {
return resource_eid;
}
public void Setresource_eid (String resource_eid) {
This.resource_eid = Resource_eid;
}

}
5, the execution of logical body, write back data, batch submission

Package usi.java.oracle;

Import java.sql.Connection;
Import Java.sql.DriverManager;
Import java.sql.PreparedStatement;
Import Java.sql.ResultSet;
Import java.util.List;

Import org.apache.spark.SparkConf;
Import Org.apache.spark.SparkContext;
Import Org.apache.spark.sql.DataFrame;
Import Org.apache.spark.sql.Row;
Import Org.apache.spark.sql.hive.HiveContext;

public class Redict_to_171ora {
public static void Main (string[] args) {
sparkconf sc = new sparkconf (). Setappname ("Redict_to_171ora");
Sparkcontext JSC = new Sparkcontext (SC);
Hivecontext HC = new Hivecontext (JSC);
String hivesql1= "Select T.mme_eid,t.mme_editor,t.entitytype_eid,t.project_eid,t.resource_eid from Usi_odso.c_ Taskall t limit 150000 ";

    DataFrame redict_to_171ora= Hc.sql (HIVESQL1);           Redict_to_171ora.registertemptable ("HIVESQL1");    List<row> Collect=redict_to_171ora.javardd (). Collect ();    int o=0;        for (Row lists:collect) {taskall task=new taskall ();        Task.setmme_eid (lists.getstring (0));        Task.setmme_editor (lists.getstring (1));        Task.setentitytype_eid (lists.getstring (2));        Task.setproject_eid (Lists.getstring (3));        Task.setresource_eid (Lists.getstring (4));        Cachemanager.putcontent (o+ "", Task, 30000000);    o++;     /* SYSTEM.OUT.PRINTLN (Lists.size ());     System.out.println (lists.getstring (0));     System.out.println (lists.getstring (1));     System.out.println (lists.getstring (2));     System.out.println (Lists.getstring (3));        System.out.println (Lists.getstring (4)); */} SYSTEM.OUT.PRINTLN (O);  Connection con = null;//Create a database connection PreparedStatement pre = null;//Create a precompiled statement object, usually with this instead of statement//resultset result = null;//CreatedA result set object try {class.forname ("oracle.jdbc.driver.OracleDriver");//Load the Oracle driver System. Out.println ("Start trying to connect to the database!")            ");  String url = "Jdbc:oracle:" + "thin: @ip: 1521:sid";//127.0.0.1 is a native address, XE is the default database name for Lite Oracle String user = "user";// Username, system default account name String password = "password";//The password you set to install is con = drivermanager.getconnection (URL, user , password);//Get Connection System.out.println ("Connection succeeded!            "); String sql = "INSERT into C_taskall_test (Mme_eid,mme_editor,entitytype_eid,project_eid,resource_eid) VALUES (?,?,?,?, ?)";/ /precompiled statement, "? "represents the parameter pre = con.preparestatement (SQL);//instantiates the precompiled statement for (int i=0;i<o;i++) {//for (Row lis Ts:collect) {//String sql = "INSERT INTO C_taskall_test (mme_eid,mme_editor,entitytype_eid,project _eid,resource_eid) VALUES (' "+task.getmme_eid () +" ', ' "+task.getmme_editor () +" ', ' "+task.getentitytype_eid () +" ', ' "+ Task.getproject_eid () + "', '" +task.getresOurce_eid () + "')";//Pre-compiled statements, "?            "Delegate parameter//pre.setstring (1," Samsung ");//Set parameter, preceding 1 indicates index of parameter, not index of column name in table            Taskall task= (Taskall) cachemanager.getcontent ("" +i). GetValue ();            Pre.setstring (1, Task.getmme_eid ());            Pre.setstring (2, Task.getmme_editor ());            Pre.setstring (3, Task.getentitytype_eid ());            Pre.setstring (4, Task.getproject_eid ());            Pre.setstring (5, Task.getresource_eid ());             Pre.addbatch ();                  if (i%20000==0) {//can be set in different sizes, such as 50,100,500,1000 etc. pre.executebatch ();                  Con.commit ();                 Pre.clearbatch ();            System.out.println (Value of "I" +i);                  }//result = Pre.executequery ();//execute the query, note that no additional arguments are required in brackets} pre.executebatch ();                  Con.commit ();                 Pre.clearbatch ();          System.out.println (Value of "I" +i);         /* if (result! = NULL)       Result.close (); */if (pre! = null) pre.close (); /* while (Result.next ())//When the result set is not empty System.out.println ("Usernum:" + result.getstring ("Usern            Um ") +" Flow: "+ result.getstring (" Flow ")); */} catch (Exception e) {        E.printstacktrace ();  Finally {try {//} closes several of the above objects, because if you do not close it will affect performance and consume resources/ Note The order of closing, the last used first close/* if (result! = null) result.close (); */if (pre! = nul                L) pre.close ();                if (con! = null) con.close (); SYSTEM.OUT.PRINTLN ("The database connection is closed!            ");            } catch (Exception e) {e.printstacktrace (); }        }    }

}

Direct write-back of the Sparksql data stream directly with CacheManager without using the Sqoop process

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.