Normal BulkLoad of HBase

Source: Internet
Author: User
Tags date now iterable
To keep the MapReduce architecture clear, the Map and Reduce structures are retained. To facilitate subsequent expansion. PS: When writing HFile, qualifier must be ordered. Mapper: importcom. google. common. base. Strings; importorg. apache. hadoop. hbase. io. ImmutableBytesWritable; importorg. apache. hadoop. io. L

To keep the MapReduce architecture clear, the Map and Reduce structures are retained. To facilitate subsequent expansion. PS: When writing HFile, qualifier must be ordered. Mapper: import com. google. common. base. Strings; import org. apache. hadoop. hbase. io. ImmutableBytesWritable; import org. apache. hadoop. io. L

To keep the MapReduce architecture clear, the Map and Reduce structures are retained. To facilitate subsequent expansion. PS: When writing HFile, qualifier must be ordered.

Mapper:

import com.google.common.base.Strings;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import yeepay.util.HBaseUtil;public class LoadMapper extends Mapper
 
   {    protected void map(LongWritable key, Text value, Context context) {        try {            String line = value.toString();            if (Strings.isNullOrEmpty(line)) {                return;            }            String[] arr = line.split("\t", 9);            if (arr.length != 9) {                throw new RuntimeException("line.splite() not == 9");            }            if (arr.length < 1) {                return;            }            String k1 = arr[0];            ImmutableBytesWritable keyH = new ImmutableBytesWritable(HBaseUtil.getRowKey(k1));            context.write(keyH, new Text(line));        } catch (Exception e) {            throw new RuntimeException(e);        }    }}
 

Reducer

Import com. google. common. base. splitter; import org. apache. hadoop. hbase. keyValue; import org. apache. hadoop. hbase. io. immutableBytesWritable; import org. apache. hadoop. hbase. util. bytes; import org. apache. hadoop. io. text; import org. apache. hadoop. mapreduce. reducer; import java. util. iterator; import java. util. map; import java. util. treeMap; public class LoadReducer extends Reducer
 
  
{Final static String [] fileds = new String [] {"ID", "A_ACCOUNT_ID", "A_TRX_ID", "P_ID", "P_TRXORDER_ID", "P_FRP_ID ", "O_PRODUCTCAT", "O_RECEIVER_ID", "O_REQUESTID"}; @ Override public void reduce (ImmutableBytesWritable rowkey, Iterable
  
   
Values, Context context) throws java. io. IOException, InterruptedException {// super. setID (stringArray [0]); // this. a_ACCOUNT_ID = stringArray [1]; // this. a_TRX_ID = stringArray [2]; // this. p_ID = stringArray [3]; // this. p_TRXORDER_ID = stringArray [4]; // this. p_FRP_ID = stringArray [5]; // this. o_PRODUCTCAT = stringArray [6]; // this. o_RECEIVER_ID = stringArray [7]; // this. o_REQUESTID = stringArray [8]; try {Text vv = values. iterator (). next (); String vs = vv. toString (); Splitter splitter = Splitter. on ("\ t "). limit (9); Iterable
   
    
Iterable = splitter. split (vs); Iterator
    
     
Iterator = iterable. iterator (); // String [] arr =. split ("\ t", 9); int I = 0; // Put put = new Put (rowkey. get ();/*** values must be written in order. */Map
     
      
Map = new TreeMap
      
        (); While (iterator. hasNext () {map. put (fileds [I ++], iterator. next () ;}for (Map. Entry
       
         Entry: map. entrySet () {KeyValue kv = new KeyValue (rowkey. copyBytes (), Bytes. toBytes ("f"), entry. getKey (). getBytes (), 0L, entry. getValue (). getBytes (); context. write (rowkey, kv) ;}} catch (Exception e) {new RuntimeException (e );}}}
       
      
     
    
   
  
 
Job & BulkLoad
Package yeepay. load; import org. apache. hadoop. conf. configuration; import org. apache. hadoop. fs. fileSystem; import org. apache. hadoop. fs. path; import org. apache. hadoop. hbase. HBaseConfiguration; import org. apache. hadoop. hbase. client. HTable; import org. apache. hadoop. hbase. io. immutableBytesWritable; import org. apache. hadoop. hbase. mapreduce. HFileOutputFormat; import org. apache. hadoop. hbase. mapreduce. loadIncrement AlHFiles; import org. apache. hadoop. io. text; import org. apache. hadoop. mapreduce. job; import org. apache. hadoop. mapreduce. lib. input. fileInputFormat; import org. apache. hadoop. mapreduce. lib. input. textInputFormat; import org. apache. hadoop. mapreduce. lib. output. fileOutputFormat; import yeepay. util. hdfsUtil; import yeepay. util. yeepayConstant; import java. util. date; public abstract class begin actjobbulkload {public Static Configuration conf = HBaseConfiguration. create (); public void run (String [] args) throws Exception {if (args. length <2) {System. err. println ("please set input dir"); System. exit (-1); return;} String txtPath = args [0]; String tableName = args [1]; Job job = new Job (conf, "txt2HBase "); HTable htable = null; try {htable = new HTable (conf, tableName); // set table name // reduce is determined based on the number of region And the rowkey range covered by each reduce. configureIncrementalLoad (job, htable); htable. close (); job. setJarByClass (AbstractJobBulkLoad. class); FileSystem fs = FileSystem. get (conf); System. out. println ("input file:" + txtPath); Path inputFile = new Path (txtPath); if (! Fs. exists (inputFile) {System. err. println ("inputFile" + txtPath + "not exist. "); throw new RuntimeException (" inputFile "+ txtPath +" not exist. ");} FileInputFormat. addInputPath (job, inputFile); // job. setMapperClass (getMapperClass (); job. setMapOutputKeyClass (ImmutableBytesWritable. class); job. setMapOutputValueClass (Text. class); job. setInputFormatClass (TextInputFormat. class); // job. setReducerC Lass (getReducerClass (); Date now = new Date (); Path output = new Path ("/output/" + tableName + "/" + now. getTime (); System. out. println ("/output/" + tableName + "/" + now. getTime (); FileOutputFormat. setOutputPath (job, output); job. waitForCompletion (true); // execute BulkLoad HdfsUtil. chmod (conf, output. toString (); HdfsUtil. chmod (conf, output + "/" + YeepayConstant. COMMON_FAMILY); htable = new HTable (c Onf, tableName); new LoadIncrementalHFiles (conf). doBulkLoad (output, htable); htable. close (); System. out. println ("HFile data load success! "); System. out. println (getJobName () +" end! ");} Catch (Throwable t) {throw new RuntimeException (t);} protected abstract Class getMapperClass (); protected abstract Class getReducerClass (); protected abstract String getTableName (); protected abstract String getJobName ();}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.