HBase BULK INSERT API

Source: Internet
Author: User
Tags bulk insert

1. Data format a.txt:

13631579850661372623050300-fd-07-a4-72-b8:cmcc 120.196.100.82I02.c.aliimg.com 2427 248124681 200
1363157995052138265441015c-0e-8b-c7-f1-e0:cmcc 120.197.40.44 0 264 0 200
13631579910761392643565620-10-7a-28-cc-0a:cmcc 120.196.100.992 4 132 1512 200
1363154400022139262511065c-0e-8b-8b-b1-50:cmcc 120.197.40.44 0 240 0 200
13631579930441821157596194-71-ac-cd-e6-18:cmcc-easy 120.196.100.99Iface.qiyi.com Video website15 12 1527 2106 200
1363157995074841384135c-0e-8b-8c-e8-20:7daysinn 120.197.40.4122.72.52.12 2016 41161432 200
136315799305513560439658C4-17-fe-ba-de-d9:cmcc 120.196.100.9918 15 1116 954 200
1363157995033159201332575c-0e-8b-c7-ba-20:cmcc 120.197.40.4sug.so.360.cn Information security20 20 3156 2936 200
13631579830191371919941968-a1-b7-03-07-b1:cmcc-easy 120.196.100.824 0 240 0 200
1363157984041136605779915c-0e-8b-92-5c-20:cmcc-easy 120.197.40.4S19.cnzz.com Site statistics24 9 6960 690 200
1363157973098150136858585c-0e-8b-c7-f7-90:cmcc 120.197.40.4Rank.ie.sogou.com Search engine28 27 3659 3538 200
136315798602915989002119E8-99-c4-4e-93-e0:cmcc-easy 120.196.100.99Www.umeng.com Site statistics3 3 1938 180 200
136315799209313560439658C4-17-fe-ba-de-d9:cmcc 120.196.100.9915 9 918 4938 200
1363157986041134802531045c-0e-8b-c7-fc-80:cmcc-easy 120.197.40.43 3 180 180 200
1363157984040136028465655c-0e-8b-8b-b6-00:cmcc 120.197.40.42052.flash2-http.qq.com Integrated Portal15 12 1938 2910 200
13631579950931392231446600-fd-07-a2-ec-ba:cmcc 120.196.100.82img.qfc.cn 1212 30083720 200
1363157982040135024688235c-0a-5b-6a-0b-d4:cmcc-easy 120.196.100.99Y0.ifengimg.com Integrated Portal57 102 7335 110349 200
13631579860721832017338284-25-db-4f-10-1a:cmcc-easy 120.196.100.99Input.shouji.sogou.com Search engine21st 18 9531 2412 200
13631579900431392505741300-1f-64-e1-e6-9a:cmcc 120.196.100.55T3.baidu.com Search engine69 63 11058 48243 200
13631579880721376077871000-fd-07-a4-7b-08:cmcc 120.196.100.822 2 120 120 200
13631579850791382307000120-7c-8f-70-68-1f:cmcc 120.196.100.996 3 360 180 200
13631579850691360021750200-1f-64-e2-e8-b1:cmcc 120.196.100.5518 138 1080 186852 200


2. hbase creates table create ' WLAN ', ' CF '


3. Code

Package com.utils;


Import java.io.IOException;
Import Java.text.SimpleDateFormat;
Import Java.util.Date;


Import org.apache.hadoop.conf.Configuration;
Import org.apache.hadoop.hbase.HBaseConfiguration;
Import org.apache.hadoop.hbase.client.Mutation;
Import Org.apache.hadoop.hbase.client.Put;
Import Org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
Import Org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
Import Org.apache.hadoop.hbase.mapreduce.TableReducer;
Import org.apache.hadoop.io.LongWritable;
Import org.apache.hadoop.io.NullWritable;
Import Org.apache.hadoop.io.Text;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.Mapper;
Import Org.apache.hadoop.mapreduce.Reducer;
Import Org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Import Org.apache.hadoop.mapreduce.lib.input.TextInputFormat;


public class Hbaseimport {
Static class Batchmapper extends Mapper<longwritable, Text, longwritable, text>{
@Override
protected void Map (longwritable key, Text value,
Mapper<longwritable, Text, longwritable, Text> Context context)
Throws IOException, Interruptedexception {
String line = value.tostring ();
string[] splited = line.split ("\ t");
SimpleDateFormat Simpledateformatimpledateformat = new SimpleDateFormat ("Yyyymmddhhmmss");
String format = Simpledateformatimpledateformat.format (New Date (Long.parselong (Splited[0].trim ())));
String rowkey=splited[1]+ "_" +format;
Text V2s = new text ();
V2s.set (rowkey+ "\ t" +line);
Context.write (key, V2S);
}
}
Static class Batchreducer extends Tablereducer<longwritable, Text, nullwritable>{
Private String family= "CF";//Column family


@Override
protected void reduce (longwritable arg0, iterable<text> V2s,
Reducer<longwritable, Text, nullwritable, Mutation> Context context)
Throws IOException, Interruptedexception {
for (Text v2:v2s) {
string[] splited = v2.tostring (). Split ("\ t");
String RowKey = splited[0];
Put put = new put (rowkey.getbytes ());
Put.add (Family.getbytes (), "Raw". GetBytes (), v2.tostring (). GetBytes ());
Put.add (Family.getbytes (), "Reporttime". GetBytes (), splited[1].getbytes ());
Put.add (Family.getbytes (), "Msisdn". GetBytes (), splited[2].getbytes ());
Put.add (Family.getbytes (), "Apmac". GetBytes (), splited[3].getbytes ());
Put.add (Family.getbytes (), "Acmac". GetBytes (), splited[4].getbytes ());
Put.add (Family.getbytes (), "host". GetBytes (), splited[5].getbytes ());
Put.add (Family.getbytes (), "SiteType". GetBytes (), splited[6].getbytes ());
Put.add (Family.getbytes (), "Uppacknum". GetBytes (), splited[7].getbytes ());
Put.add (Family.getbytes (), "Downpacknum". GetBytes (), splited[8].getbytes ());
Put.add (Family.getbytes (), "Uppayload". GetBytes (), splited[9].getbytes ());
Put.add (Family.getbytes (), "Downpayload". GetBytes (), splited[10].getbytes ());
Put.add (Family.getbytes (), "Httpstatus". GetBytes (), splited[11].getbytes ());
Context.write (Nullwritable.get (), put);
}
}
}
private static final String TableName = "Waln_log";
public static void Main (string[] args) throws Exception {
Configuration conf = hbaseconfiguration.create ();
Conf.set ("Hbase.zookeeper.quorum", "192.168.80.20,192.168.80.21,192.168.80.22");
Conf.set ("Hbase.rootdir", "hdfs://cluster/hbase");
Conf.set ("Hbase.rootdir", "hdfs://192.168.80.20:9000/hbase");
Conf.set (tableoutputformat.output_table, TableName);

Job Job = new Job (conf, HBaseImport.class.getSimpleName ());
Tablemapreduceutil.adddependencyjars (Job);
Job.setjarbyclass (Hbaseimport.class);

Job.setmapperclass (Batchmapper.class);
Job.setreducerclass (Batchreducer.class);

Job.setmapoutputkeyclass (Longwritable.class);
Job.setmapoutputvalueclass (Text.class);

Job.setinputformatclass (Textinputformat.class);
Job.setoutputformatclass (Tableoutputformat.class);

Fileinputformat.setinputpaths (Job, "Hdfs://192.168.80.20:9000/data");
System.out.println ("xxxxxxx1xxxxxxxx");
Job.waitforcompletion (TRUE);
}
}



Copyright NOTICE: This article for Bo Master original article, without Bo Master permission not reproduced.

HBase BULK INSERT API

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.