The project needs to use mapreduce to read or write hbase, which can save a lot of development time.
Hbase's jar package has such an API. Below are some of the information I have found on the official website. I would like to share with you.
Address: http://hbase.apache.org/book/mapreduce.example.html
Description: tablemapper mainly reads hbase data and tablereducer mainly writes hbase data. It can be used together or separately.
(1) read hbase instances
public static class MyMapper extends TableMapper<Text, Text> { public void map(ImmutableBytesWritable row, Result value, Context context) throws InterruptedException, IOException { // process data for the row from the Result instance. }}
Configuration config = HBaseConfiguration.create();Job job = new Job(config, "ExampleRead");job.setJarByClass(MyReadJob.class); // class that contains mapperScan scan = new Scan();scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobsscan.setCacheBlocks(false); // don‘t set to true for MR jobs// set other scan attrs...TableMapReduceUtil.initTableMapperJob( tableName, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job);job.setOutputFormatClass(NullOutputFormat.class); // because we aren‘t emitting anything from mapperboolean b = job.waitForCompletion(true);if (!b) { throw new IOException("error with job!");}
(2) read/write instances
public static class MyMapper extends TableMapper<ImmutableBytesWritable, Put> {public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {// this example is just copying the data from the source table... context.write(row, resultToPut(row,value)); } private static Put resultToPut(ImmutableBytesWritable key, Result result) throws IOException { Put put = new Put(key.get()); for (KeyValue kv : result.raw()) {put.add(kv);}return put; }}
Configuration config = HBaseConfiguration.create();Job job = new Job(config,"ExampleReadWrite");job.setJarByClass(MyReadWriteJob.class); // class that contains mapperScan scan = new Scan();scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobsscan.setCacheBlocks(false); // don‘t set to true for MR jobs// set other scan attrsTableMapReduceUtil.initTableMapperJob(sourceTable, // input tablescan, // Scan instance to control CF and attribute selectionMyMapper.class, // mapper classnull, // mapper output keynull, // mapper output valuejob);TableMapReduceUtil.initTableReducerJob(targetTable, // output tablenull, // reducer classjob);job.setNumReduceTasks(0);boolean b = job.waitForCompletion(true);if (!b) { throw new IOException("error with job!");}
(3) Statistical instances
public static class MyMapper extends TableMapper<Text, IntWritable> {private final IntWritable ONE = new IntWritable(1); private Text text = new Text(); public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException { String val = new String(value.getValue(Bytes.toBytes("cf"), Bytes.toBytes("attr1"))); text.set(val); // we can only emit Writables... context.write(text, ONE); }}
public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> { public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int i = 0; for (IntWritable val : values) { i += val.get(); } Put put = new Put(Bytes.toBytes(key.toString())); put.add(Bytes.toBytes("cf"), Bytes.toBytes("count"), Bytes.toBytes(i)); context.write(null, put); }}
Configuration config = HBaseConfiguration.create();Job job = new Job(config,"ExampleSummary");job.setJarByClass(MySummaryJob.class); // class that contains mapper and reducerScan scan = new Scan();scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobsscan.setCacheBlocks(false); // don‘t set to true for MR jobs// set other scan attrsTableMapReduceUtil.initTableMapperJob(sourceTable, // input tablescan, // Scan instance to control CF and attribute selectionMyMapper.class, // mapper classText.class, // mapper output keyIntWritable.class, // mapper output valuejob);TableMapReduceUtil.initTableReducerJob(targetTable, // output tableMyTableReducer.class, // reducer classjob);job.setNumReduceTasks(1); // at least one, adjust as requiredboolean b = job.waitForCompletion(true);if (!b) {throw new IOException("error with job!");}
(4) hybrid instances. The results are stored in files.
public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int i = 0;for (IntWritable val : values) {i += val.get();}context.write(key, new IntWritable(i));}}
Configuration config = HBaseConfiguration.create();Job job = new Job(config,"ExampleSummaryToFile");job.setJarByClass(MySummaryFileJob.class); // class that contains mapper and reducerScan scan = new Scan();scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobsscan.setCacheBlocks(false); // don‘t set to true for MR jobs// set other scan attrsTableMapReduceUtil.initTableMapperJob(sourceTable, // input tablescan, // Scan instance to control CF and attribute selectionMyMapper.class, // mapper classText.class, // mapper output keyIntWritable.class, // mapper output valuejob);job.setReducerClass(MyReducer.class); // reducer classjob.setNumReduceTasks(1); // at least one, adjust as requiredFileOutputFormat.setOutputPath(job, new Path("/tmp/mr/mySummaryFile")); // adjust directories as requiredboolean b = job.waitForCompletion(true);if (!b) {throw new IOException("error with job!");}