Import data from HDFS to HBase
Package Hbase;
Import java. text. SimpleDateFormat;
Import java. util. Date;
Import org. apache. Hadoop. conf. Configuration;
Import org. apache. hadoop. hbase. client. Put;
Import org. apache. hadoop. hbase. mapreduce. TableOutputFormat;
Import org. apache. hadoop. hbase. mapreduce. TableReducer;
Import org. apache. hadoop. hbase. util. Bytes;
Import org. apache. hadoop. io. LongWritable;
Import org. apache. hadoop. io. NullWritable;
Import org. apache. hadoop. io. Text;
Import org. apache. hadoop. mapreduce. Counter;
Import org. apache. hadoop. mapreduce. Job;
Import org. apache. hadoop. mapreduce. Mapper;
Import org. apache. hadoop. mapreduce. lib. input. FileInputFormat;
Import org. apache. hadoop. mapreduce. lib. input. TextInputFormat;
Public class BatchImport {
Public static void main (String [] args) throws Exception {
Final Configuration configuration Configuration = new Configuration ();
// Set zookeeper
Configuration. set ("hbase. zookeeper. quorum", "hadoop1 ");
// Set the hbase table name
Configuration. set (TableOutputFormat. OUTPUT_TABLE, "wlan_log ");
// Increase the value to prevent hbase from exiting upon timeout
Configuration. set ("dfs. socket. timeout", "180000 ");
Final Job job = new Job (configuration, "HBaseBatchImport ");
Job. setMapperClass (BatchImportMapper. class );
Job. setReducerClass (BatchImportReducer. class );
// Set map output without the reduce output type
Job. setMapOutputKeyClass (LongWritable. class );
Job. setMapOutputValueClass (Text. class );
Job. setInputFormatClass (TextInputFormat. class );
// No longer set the output path, but set the output format type
Job. setOutputFormatClass (TableOutputFormat. class );
FileInputFormat. setInputPaths (job, "hdfs: // hadoop1: 9000/HTTP *");
Job. waitForCompletion (true );
}
Static class BatchImportMapper extends Mapper <LongWritable, Text, LongWritable, Text> {
SimpleDateFormat dateformat1 = new SimpleDateFormat ("yyyyMMddHHmmss ");
Text v2 = new Text ();
Protected void map (LongWritable key, Text value, Context context) throws java. io. IOException, InterruptedException {
Final String [] splited = value. toString (). split ("\ t ");
Try {
Final Date date = new Date (Long. parseLong (splited [0]. trim ()));
Final String dateFormat = dateformat1.format (date );
String rowKey = splited [1] + ":" + dateFormat;
V2.set (rowKey + "\ t" + value. toString ());
Context. write (key, v2 );
} Catch (NumberFormatException e ){
Final Counter counter = context. getCounter ("BatchImport", "ErrorFormat ");
Counter. increment (1L );
System. out. println ("error" + splited [0] + "" + e. getMessage ());
}
};
}
Static class BatchImportReducer extends TableReducer <LongWritable, Text, NullWritable> {
Protected void reduce (LongWritable key, java. lang. Iterable <Text> values, Context context) throws java. io. IOException, InterruptedException {
For (Text text: values ){
Final String [] splited = text. toString (). split ("\ t ");
Final Put put = new Put (Bytes. toBytes (splited [0]);
Put. add (Bytes. toBytes ("cf"), Bytes. toBytes ("date"), Bytes. toBytes (splited [1]);
// If other fields are omitted, call put. add (...).
Context. write (NullWritable. get (), put );
}
};
}
}
HBase details: click here
HBase: click here
Hadoop + HBase cloud storage creation summary PDF
Regionserver startup failed due to inconsistent time between HBase nodes
Hadoop + ZooKeeper + HBase cluster configuration
Hadoop cluster Installation & HBase lab environment setup
HBase cluster configuration based on Hadoop cluster'
Hadoop installation and deployment notes-HBase full distribution mode installation
Detailed tutorial on creating HBase environment for standalone Edition