Spark Operation HBase

Last Update:2016-06-30 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

Import Org.apache.hadoop.hbase.util.Bytesimport org.apache.hadoop.hbase. {hcolumndescriptor, Htabledescriptor, TableName, Hbaseconfiguration}import org.apache.hadoop.hbase.client._import Org.apache.spark.SparkContextimport scala.collection.javaconversions._/** * HBase 1.0.0 new API, CRUD basic operation code example **/ Object Hbasenewapi {def main (args:array[string]) {val sc = new Sparkcontext ("local", "Sparkhbase") val conf = HB Aseconfiguration.create () conf.set ("Hbase.zookeeper.property.clientPort", "2181") Conf.set (" Hbase.zookeeper.quorum "," Master ") the creation of//connection is a heavyweight work, thread safe, is the entry for the operation hbase val conn = Connectionfactory.createconn Ection (conf)//Get the Admin object from connection (equivalent to previous hadmin) Val Admin = conn.getadmin//This example will manipulate the table name val usertable = tabl Ename.valueof ("user")//Create user table Val tabledescr = new Htabledescriptor (usertable) tabledescr.addfamily (New Hcolu    Mndescriptor ("Basic". GetBytes)) println ("Creating table ' user '.") if (admin.tableexists (usertable)) {ADmin.disabletable (usertable) admin.deletetable (usertable)} admin.createtable (TABLEDESCR) println ("Done!")  try{//Get user table Val table = conn.gettable (usertable) try{//prepare to insert a key for id001 data val p = New put ("id001". GetBytes)//Specify column and value for the put operation (the previous Put.add method is deprecated) p.addcolumn ("Basic". GetBytes, "name        ". GetBytes," Wuchong ". GetBytes)//Submit Table.put (P)//query a data val g = new Get (" id001 ". GetBytes)        Val result = Table.get (g) Val value = bytes.tostring (Result.getvalue ("Basic". GetBytes, "name". GetBytes)) println ("GET id001:" +value)//scan data val s = new scan () S.addcolumn ("Basic". GetBytes, "name". GetBytes            Val scanner = Table.getscanner (s) try{for (R <-Scanner) {println ("Found row:" +r) println ("Found Value:" +bytes.tostring (R.getvalue ("Basic". GetBytes, "name". getBytes))}}finall y {//ensure scannerClose Scanner.close ()}//Delete a piece of data in a way similar to Put val d = new Delete ("id001". GetBytes) D.add      Column ("Basic". GetBytes, "name". GetBytes) Table.delete (d)}finally {if (table! = null) Table.close () }}finally {Conn.close ()}}}

Import Org.apache.hadoop.hbase.client.Putimport Org.apache.hadoop.hbase.filter.CompareFilter.CompareOpimport Org.apache.hadoop.hbase.filter.SingleColumnValueFilterimport Org.apache.hadoop.hbase.io.ImmutableBytesWritableimport Org.apache.hadoop.hbase.mapred.TableOutputFormatimport Org.apache.hadoop.hbase.mapreduce.TableInputFormatimport Org.apache.hadoop.hbase.protobuf.ProtobufUtilimport Org.apache.hadoop.hbase.util. {Base64, Bytes}import org.apache.hadoop.hbase.HBaseConfigurationimport org.apache.hadoop.mapred.JobConfimport  Org.apache.spark.SparkContextimport org.apache.hadoop.hbase.client._/** * Spark reads and writes HBase **/object sparkonhbase {  def convertscantostring (Scan:scan) = {val proto = Protobufutil.toscan (scan) base64.encodebytes (Proto.tobytearray) } def main (args:array[string]) {val sc = new Sparkcontext ("local", "Sparkonhbase") val conf = hbaseconfiguration. Create () Conf.set ("Hbase.zookeeper.property.clientPort", "2181") Conf.set ("Hbase.zookeeper.quorum", "MaSter ")//======save RDD to hbase========//step 1:jobconf setup val jobconf = new jobconf (conf,this.getclass)  Jobconf.setoutputformat (Classof[tableoutputformat]) jobconf.set (tableoutputformat.output_table, "user")//Step 2: RDD Mapping to Table//the schema of the table in HBase is generally the case//*row Cf:col_1 cf:col_2//And in spark we operate an RDD tuple such as (1, "l    Ilei ", +), (2," Hanmei ", 18)//We need to convert *rdd[(Uid:int, name:string, Age:int)]* to *rdd[(immutablebyteswritable, Put)]*      We define the CONVERT function to do this conversion work def convert (triple: (int, String, int)) = {val p = new Put (bytes.tobytes (triple._1)) P.addcolumn (Bytes.tobytes ("Basic"), Bytes.tobytes ("name"), Bytes.tobytes (triple._2)) P.addcolumn (Bytes.tobytes ("b ASIC "), Bytes.tobytes (" Age "), Bytes.tobytes (Triple._3)) (new Immutablebyteswritable, p)}//step 3:read RDD dat A from somewhere and convert val rawdata = List ((1, "Lilei", +), (2, "Hanmei",), (3, "someone", and)) Val Localdata = s C.parallelize (rawdata). MaP (convert)//step 4:use ' Saveashadoopdataset ' to save RDD to HBase Localdata.saveashadoopdataset (jobconf)//= = =    ==============================//======load Rdd from hbase========//use ' Newapihadooprdd ' to the Load RDD from HBase Read data directly from HBase and turn it into a rdd[k,v that can be manipulated directly by Spark]//Set the table name of the query Conf.set (tableinputformat.input_table, "user")//Add filter, Year 18 years old Val scan = new scan () Scan.setfilter (New Singlecolumnvaluefilter ("Basic". GetBytes, "age". GetBytes, Comp Areop.greater_or_equal,bytes.tobytes ()) Conf.set (tableinputformat.scan,convertscantostring (SCAN)) Val UsersRDD      = Sc.newapihadooprdd (conf, Classof[tableinputformat], classof[org.apache.hadoop.hbase.io.immutablebyteswritable],    Classof[org.apache.hadoop.hbase.client.result]) Val count = Usersrdd.count () println ("Users RDD Count:" + count) Usersrdd.cache ()//Traversal output usersrdd.foreach{case (_,result) = Val key = Bytes.toint (result.getrow) VA L name = bytes.tostring (resUlt.getvalue ("Basic". GetBytes, "name". GetBytes)) Val age = Bytes.toint (Result.getvalue ("Basic". GetBytes, "age". getBytes) println ("Row key:" +key+ "Name:" +name+ "Age:" +age)}//=================================}}

Ext.: Https://gist.github.com/wuchong/95630f80966d07d7453b#file-hbasenewapi-scala

http://wuchong.me/blog/2015/04/04/spark-on-yarn-cluster-deploy/

Spark Operation HBase

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Spark Operation HBase

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Spark Operation HBase

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support