Import Org.apache.hadoop.hbase.util.Bytesimport org.apache.hadoop.hbase. {hcolumndescriptor, Htabledescriptor, TableName, Hbaseconfiguration}import org.apache.hadoop.hbase.client._import Org.apache.spark.SparkContextimport scala.collection.javaconversions._/** * HBase 1.0.0 new API, CRUD basic operation code example **/ Object Hbasenewapi {def main (args:array[string]) {val sc = new Sparkcontext ("local", "Sparkhbase") val conf = HB Aseconfiguration.create () conf.set ("Hbase.zookeeper.property.clientPort", "2181") Conf.set (" Hbase.zookeeper.quorum "," Master ") the creation of//connection is a heavyweight work, thread safe, is the entry for the operation hbase val conn = Connectionfactory.createconn Ection (conf)//Get the Admin object from connection (equivalent to previous hadmin) Val Admin = conn.getadmin//This example will manipulate the table name val usertable = tabl Ename.valueof ("user")//Create user table Val tabledescr = new Htabledescriptor (usertable) tabledescr.addfamily (New Hcolu Mndescriptor ("Basic". GetBytes)) println ("Creating table ' user '.") if (admin.tableexists (usertable)) {ADmin.disabletable (usertable) admin.deletetable (usertable)} admin.createtable (TABLEDESCR) println ("Done!") try{//Get user table Val table = conn.gettable (usertable) try{//prepare to insert a key for id001 data val p = New put ("id001". GetBytes)//Specify column and value for the put operation (the previous Put.add method is deprecated) p.addcolumn ("Basic". GetBytes, "name ". GetBytes," Wuchong ". GetBytes)//Submit Table.put (P)//query a data val g = new Get (" id001 ". GetBytes) Val result = Table.get (g) Val value = bytes.tostring (Result.getvalue ("Basic". GetBytes, "name". GetBytes)) println ("GET id001:" +value)//scan data val s = new scan () S.addcolumn ("Basic". GetBytes, "name". GetBytes Val scanner = Table.getscanner (s) try{for (R <-Scanner) {println ("Found row:" +r) println ("Found Value:" +bytes.tostring (R.getvalue ("Basic". GetBytes, "name". getBytes))}}finall y {//ensure scannerClose Scanner.close ()}//Delete a piece of data in a way similar to Put val d = new Delete ("id001". GetBytes) D.add Column ("Basic". GetBytes, "name". GetBytes) Table.delete (d)}finally {if (table! = null) Table.close () }}finally {Conn.close ()}}}
Import Org.apache.hadoop.hbase.client.Putimport Org.apache.hadoop.hbase.filter.CompareFilter.CompareOpimport Org.apache.hadoop.hbase.filter.SingleColumnValueFilterimport Org.apache.hadoop.hbase.io.ImmutableBytesWritableimport Org.apache.hadoop.hbase.mapred.TableOutputFormatimport Org.apache.hadoop.hbase.mapreduce.TableInputFormatimport Org.apache.hadoop.hbase.protobuf.ProtobufUtilimport Org.apache.hadoop.hbase.util. {Base64, Bytes}import org.apache.hadoop.hbase.HBaseConfigurationimport org.apache.hadoop.mapred.JobConfimport Org.apache.spark.SparkContextimport org.apache.hadoop.hbase.client._/** * Spark reads and writes HBase **/object sparkonhbase { def convertscantostring (Scan:scan) = {val proto = Protobufutil.toscan (scan) base64.encodebytes (Proto.tobytearray) } def main (args:array[string]) {val sc = new Sparkcontext ("local", "Sparkonhbase") val conf = hbaseconfiguration. Create () Conf.set ("Hbase.zookeeper.property.clientPort", "2181") Conf.set ("Hbase.zookeeper.quorum", "MaSter ")//======save RDD to hbase========//step 1:jobconf setup val jobconf = new jobconf (conf,this.getclass) Jobconf.setoutputformat (Classof[tableoutputformat]) jobconf.set (tableoutputformat.output_table, "user")//Step 2: RDD Mapping to Table//the schema of the table in HBase is generally the case//*row Cf:col_1 cf:col_2//And in spark we operate an RDD tuple such as (1, "l Ilei ", +), (2," Hanmei ", 18)//We need to convert *rdd[(Uid:int, name:string, Age:int)]* to *rdd[(immutablebyteswritable, Put)]* We define the CONVERT function to do this conversion work def convert (triple: (int, String, int)) = {val p = new Put (bytes.tobytes (triple._1)) P.addcolumn (Bytes.tobytes ("Basic"), Bytes.tobytes ("name"), Bytes.tobytes (triple._2)) P.addcolumn (Bytes.tobytes ("b ASIC "), Bytes.tobytes (" Age "), Bytes.tobytes (Triple._3)) (new Immutablebyteswritable, p)}//step 3:read RDD dat A from somewhere and convert val rawdata = List ((1, "Lilei", +), (2, "Hanmei",), (3, "someone", and)) Val Localdata = s C.parallelize (rawdata). MaP (convert)//step 4:use ' Saveashadoopdataset ' to save RDD to HBase Localdata.saveashadoopdataset (jobconf)//= = = ==============================//======load Rdd from hbase========//use ' Newapihadooprdd ' to the Load RDD from HBase Read data directly from HBase and turn it into a rdd[k,v that can be manipulated directly by Spark]//Set the table name of the query Conf.set (tableinputformat.input_table, "user")//Add filter, Year 18 years old Val scan = new scan () Scan.setfilter (New Singlecolumnvaluefilter ("Basic". GetBytes, "age". GetBytes, Comp Areop.greater_or_equal,bytes.tobytes ()) Conf.set (tableinputformat.scan,convertscantostring (SCAN)) Val UsersRDD = Sc.newapihadooprdd (conf, Classof[tableinputformat], classof[org.apache.hadoop.hbase.io.immutablebyteswritable], Classof[org.apache.hadoop.hbase.client.result]) Val count = Usersrdd.count () println ("Users RDD Count:" + count) Usersrdd.cache ()//Traversal output usersrdd.foreach{case (_,result) = Val key = Bytes.toint (result.getrow) VA L name = bytes.tostring (resUlt.getvalue ("Basic". GetBytes, "name". GetBytes)) Val age = Bytes.toint (Result.getvalue ("Basic". GetBytes, "age". getBytes) println ("Row key:" +key+ "Name:" +name+ "Age:" +age)}//=================================}}
Ext.: Https://gist.github.com/wuchong/95630f80966d07d7453b#file-hbasenewapi-scala
http://wuchong.me/blog/2015/04/04/spark-on-yarn-cluster-deploy/
Spark Operation HBase