Package com.bank.service;
Import java.io.IOException;
Import org.apache.hadoop.conf.Configuration;
Import org.apache.hadoop.conf.Configured;
Import Org.apache.hadoop.fs.Path;
Import org.apache.hadoop.hbase.HBaseConfiguration;
Import Org.apache.hadoop.hbase.client.Result;
Import Org.apache.hadoop.hbase.client.Scan;
Import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
Import Org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
Import Org.apache.hadoop.hbase.mapreduce.TableMapper;
Import org.apache.hadoop.hbase.util.Bytes;
Import Org.apache.hadoop.io.Text;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.Reducer;
Import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import Org.apache.hadoop.util.GenericOptionsParser;
Import Org.apache.hadoop.util.Tool;
Import Org.apache.hadoop.util.ToolRunner;
/**
* Query HBase table specifies all data output from the column cluster to HDFs
* @author Mengyao
*
*/
public class Readhbase extends configured implements Tool {
private static String TableName;
private static String OutputDir;
Static class Readhbasemapper extends Tablemapper<text, text> {
private static Text k = new text ();
private static Text V = new text ();
@Override
protected void Map (immutablebyteswritable key, Result value, Context context) throws IOException, Interruptedexception {
StringBuffer sb = new StringBuffer ("");
For (java.util.map.entry<byte[], byte[]> val:value.getFamilyMap (bytes.tobytes ("info")). EntrySet ()) {
String str = new String (Val.getvalue ());
if (str! = null) {
Sb.append (New String (Val.getkey ()));
Sb.append (":");
Sb.append (str);
Sb.append (",");
}
}
String line = sb.tostring ();
K.set (Key.get ());
V.set (New String (Line.substring (0,line.length ()-1));
Context.write (k, v);
}
}
Static class Readhbasereduce extends Reducer<text, text, text, text> {
Private text result = new text ();
@Override
protected void reduce (Text key, iterable<text> value, Context context) throws IOException, Interruptedexception {
for (Text Val:value) {
Result.set (Val);
Context.write (key, result);
}
}
}
@Override
public int run (string[] arg0) throws Exception {
TableName = arg0[0];
OutputDir = arg0[1];
Job Job = Job.getinstance (getconf (), ReadHbase.class.getSimpleName ());
Job.setjarbyclass (Readhbase.class);
Job.setreducerclass (Readhbasereduce.class);
Job.setoutputkeyclass (Text.class);
Job.setoutputvalueclass (Text.class);
Fileoutputformat.setoutputpath (Job, New Path (OutputDir));
Tablemapreduceutil.inittablemapperjob (TableName, New Scan (), Readhbasemapper.class, Text.class, Text.class, Job);
Tablemapreduceutil.adddependencyjars (Job);
Return Job.waitforcompletion (True)? 0:1;
}
public static void Main (string[] args) throws Exception {
string[] Otherargs = new Genericoptionsparser (args). Getremainingargs ();
if (otherargs.length! = 2) {
System.err.println ("Usage:" + ReadHbase.class.getSimpleName () + "<tableName> <outputDir>");
System.exit (2);
}
Configuration conf = hbaseconfiguration.create ();
Conf.set ("Hbase.zookeeper.quorum", "h5:2181,h6:2181,h7:2181");
Conf.set ("Hbase.zookeeper.property.clientPort", "2181");
Conf.set ("Dfs.socket.timeout", "3600000");
int status = Toolrunner.run (conf, new Readhbase (), Otherargs);
System.exit (status);
}
}
Querying hbase tables with mapreduce specify all data input for a column cluster into HDFs (i)