To keep the MapReduce architecture clear, the Map and Reduce structures are retained. To facilitate subsequent expansion. PS: When writing HFile, qualifier must be ordered. Mapper: importcom. google. common. base. Strings; importorg. apache. hadoop. hbase. io. ImmutableBytesWritable; importorg. apache. hadoop. io. L
To keep the MapReduce architecture clear, the Map and Reduce structures are retained. To facilitate subsequent expansion. PS: When writing HFile, qualifier must be ordered. Mapper: import com. google. common. base. Strings; import org. apache. hadoop. hbase. io. ImmutableBytesWritable; import org. apache. hadoop. io. L
To keep the MapReduce architecture clear, the Map and Reduce structures are retained. To facilitate subsequent expansion. PS: When writing HFile, qualifier must be ordered.
Mapper:
import com.google.common.base.Strings;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import yeepay.util.HBaseUtil;public class LoadMapper extends Mapper
{ protected void map(LongWritable key, Text value, Context context) { try { String line = value.toString(); if (Strings.isNullOrEmpty(line)) { return; } String[] arr = line.split("\t", 9); if (arr.length != 9) { throw new RuntimeException("line.splite() not == 9"); } if (arr.length < 1) { return; } String k1 = arr[0]; ImmutableBytesWritable keyH = new ImmutableBytesWritable(HBaseUtil.getRowKey(k1)); context.write(keyH, new Text(line)); } catch (Exception e) { throw new RuntimeException(e); } }}
Reducer
Import com. google. common. base. splitter; import org. apache. hadoop. hbase. keyValue; import org. apache. hadoop. hbase. io. immutableBytesWritable; import org. apache. hadoop. hbase. util. bytes; import org. apache. hadoop. io. text; import org. apache. hadoop. mapreduce. reducer; import java. util. iterator; import java. util. map; import java. util. treeMap; public class LoadReducer extends Reducer
{Final static String [] fileds = new String [] {"ID", "A_ACCOUNT_ID", "A_TRX_ID", "P_ID", "P_TRXORDER_ID", "P_FRP_ID ", "O_PRODUCTCAT", "O_RECEIVER_ID", "O_REQUESTID"}; @ Override public void reduce (ImmutableBytesWritable rowkey, Iterable
Values, Context context) throws java. io. IOException, InterruptedException {// super. setID (stringArray [0]); // this. a_ACCOUNT_ID = stringArray [1]; // this. a_TRX_ID = stringArray [2]; // this. p_ID = stringArray [3]; // this. p_TRXORDER_ID = stringArray [4]; // this. p_FRP_ID = stringArray [5]; // this. o_PRODUCTCAT = stringArray [6]; // this. o_RECEIVER_ID = stringArray [7]; // this. o_REQUESTID = stringArray [8]; try {Text vv = values. iterator (). next (); String vs = vv. toString (); Splitter splitter = Splitter. on ("\ t "). limit (9); Iterable
Iterable = splitter. split (vs); Iterator
Iterator = iterable. iterator (); // String [] arr =. split ("\ t", 9); int I = 0; // Put put = new Put (rowkey. get ();/*** values must be written in order. */Map
Map = new TreeMap
(); While (iterator. hasNext () {map. put (fileds [I ++], iterator. next () ;}for (Map. Entry
Entry: map. entrySet () {KeyValue kv = new KeyValue (rowkey. copyBytes (), Bytes. toBytes ("f"), entry. getKey (). getBytes (), 0L, entry. getValue (). getBytes (); context. write (rowkey, kv) ;}} catch (Exception e) {new RuntimeException (e );}}}
Job & BulkLoad
Package yeepay. load; import org. apache. hadoop. conf. configuration; import org. apache. hadoop. fs. fileSystem; import org. apache. hadoop. fs. path; import org. apache. hadoop. hbase. HBaseConfiguration; import org. apache. hadoop. hbase. client. HTable; import org. apache. hadoop. hbase. io. immutableBytesWritable; import org. apache. hadoop. hbase. mapreduce. HFileOutputFormat; import org. apache. hadoop. hbase. mapreduce. loadIncrement AlHFiles; import org. apache. hadoop. io. text; import org. apache. hadoop. mapreduce. job; import org. apache. hadoop. mapreduce. lib. input. fileInputFormat; import org. apache. hadoop. mapreduce. lib. input. textInputFormat; import org. apache. hadoop. mapreduce. lib. output. fileOutputFormat; import yeepay. util. hdfsUtil; import yeepay. util. yeepayConstant; import java. util. date; public abstract class begin actjobbulkload {public Static Configuration conf = HBaseConfiguration. create (); public void run (String [] args) throws Exception {if (args. length <2) {System. err. println ("please set input dir"); System. exit (-1); return;} String txtPath = args [0]; String tableName = args [1]; Job job = new Job (conf, "txt2HBase "); HTable htable = null; try {htable = new HTable (conf, tableName); // set table name // reduce is determined based on the number of region And the rowkey range covered by each reduce. configureIncrementalLoad (job, htable); htable. close (); job. setJarByClass (AbstractJobBulkLoad. class); FileSystem fs = FileSystem. get (conf); System. out. println ("input file:" + txtPath); Path inputFile = new Path (txtPath); if (! Fs. exists (inputFile) {System. err. println ("inputFile" + txtPath + "not exist. "); throw new RuntimeException (" inputFile "+ txtPath +" not exist. ");} FileInputFormat. addInputPath (job, inputFile); // job. setMapperClass (getMapperClass (); job. setMapOutputKeyClass (ImmutableBytesWritable. class); job. setMapOutputValueClass (Text. class); job. setInputFormatClass (TextInputFormat. class); // job. setReducerC Lass (getReducerClass (); Date now = new Date (); Path output = new Path ("/output/" + tableName + "/" + now. getTime (); System. out. println ("/output/" + tableName + "/" + now. getTime (); FileOutputFormat. setOutputPath (job, output); job. waitForCompletion (true); // execute BulkLoad HdfsUtil. chmod (conf, output. toString (); HdfsUtil. chmod (conf, output + "/" + YeepayConstant. COMMON_FAMILY); htable = new HTable (c Onf, tableName); new LoadIncrementalHFiles (conf). doBulkLoad (output, htable); htable. close (); System. out. println ("HFile data load success! "); System. out. println (getJobName () +" end! ");} Catch (Throwable t) {throw new RuntimeException (t);} protected abstract Class getMapperClass (); protected abstract Class getReducerClass (); protected abstract String getTableName (); protected abstract String getJobName ();}