Hadoop implements custom data types

Source: Internet
Author: User
Keywords Rita http value data type
Hadoop implements custom http://www.aliyun.com/zixun/aggregation/18278.html "> Data Type Blog Category: Hadoop hadoopmrunit custom Data type

For a custom data type, a clearer explanation and explanation is given in the http://book.douban.com/annotation/17067489/.

Take WordCount as an example

Define your own data type HTTP class

Import Java.io.datainput;import Java.io.dataoutput;import Java.io.ioexception;import Org.apache.hadoop.io.writablecomparable;public class HTTP implements writablecomparable<http>{public http () {} private String value; Public Http (String value) {SetValue (value);} public String GetValue () {return value;} public void SetValue (string value) { This.value = value; ReadFields (Datainput in) throws IOException {value = In.readutf (),} public void write (DataOutput out) throws IOException {Out.writeutf (value);} public int CompareTo (http http) {return (Value.compareto (Http.value)); @Override public int hashcode () {final int prime =--result = 1, result = Prime * result + ((value = = null)? 0:value.hashcode ()); re Turn result; @Override public boolean equals (Object obj) {if (! obj instanceof Http) return false; HTTP other = (http) obj; Return This.value.equals (Other.value); @Override public String toString () {return value}}

  Writing WordCount programs

Import Java.io.ioexception;import Java.util.stringtokenizer;import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.LongWritable; Import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.mapper;import Org.apache.hadoop.mapreduce.reducer;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import Org.apache.hadoop.util.GenericOptionsParser; public class wordcountentry{public static class Tokenizermapper extends Mapper<longwritable, HTTP, http, intwritable > {Private final static intwritable one = new intwritable (1); private Http word = new http (); public void Map (longwritable Key, Http value, Context context) throws IOException, interruptedexception {stringtokenizer ITR = new StringTokenizer ( Value.tostring ()); while (Itr.hasmoretokens ()) {Word.setvalue (Itr.nexttoken ()); Context.write (Word, one);} } newlineIC static class Intsumreducer extends Reducer<http, intwritable, Http, intwritable> {private intwritable result = new Intwritable (); public void reduce (Http key, iterable<intwritable> values, context context) throws IOException, interruptedexception {int sum = 0; for (intwritable val:values) {sum = Val.get ();} result.set (sum); Context.write (Key, result); } public static void main (string] args) throws IOException, Interruptedexception, classnotfoundexception { Revisit conf = new revisit (); String] Otherargs = new Genericoptionsparser (conf, args). Getremainingargs (); if (otherargs.length!= 2) {System.err.println ("Usage:wordcount <in> <out>"); System.exit (2); Path input = new path (args[0]); Path output = new path (args[1]); Job Job = new Job (conf, word count); Job.setjarbyclass (Wordcountentry.class); Job.setmapperclass (Tokenizermapper.class); Job.setcombinerclass (Intsumreducer.class); Job.setreducerclass (Intsumreducer.class); Job.setoutPutkeyclass (Http.class); Job.setoutputvalueclass (Intwritable.class); Fileinputformat.addinputpath (Job, input); Fileoutputformat.setoutputpath (job, output); System.exit (Job.waitforcompletion (true)? 0:1); }}

  Write Mrunit test cases for MapReduce program testing

Import Java.util.arraylist;import Java.util.list;import Org.apache.hadoop.io.intwritable;import Org.apache.hadoop.io.longwritable;import Org.apache.hadoop.mrunit.mapreduce.mapdriver;import Org.apache.hadoop.mrunit.mapreduce.reducedriver;import Org.junit.before;import Org.junit.Test;import Com.geo.dmp.wordcountentry.intsumreducer;import Com.geo.dmp.wordcountentry.tokenizermapper;public Class wordcountentrytest{Private mapdriver<longwritable, HTTP, http, intwritable> mapdriver; private reducedriver< HTTP, intwritable, HTTP, intwritable> Reducedriver; @Before public void Setupbeforeclass () throws Exception {Tokenizermapper TM = new Tokenizermapper (); mapdriver = Mapdriver.newmapdriver (tm); Intsumreducer ISR = new Intsumreducer (); Reducedriver = Reducedriver.newreducedriver (ISR); @Test public void Tokenizermappertest () {mapdriver.withinput (New longwritable (), New Http ("01A55\TABLSD")); Mapdriver.withoutput (New Http ("01a55"), New Intwritable (1)); Mapdriver.withoutput (New HttP ("ABLSD"), New Intwritable (1)); Mapdriver.runtest (); @Test public void Intsumreducertest () {list<intwritable> values = new arraylist<intwritable> (); Values.add (New Intwritable (1)); Values.add (New intwritable (1)); Reducedriver.withinput (New Http ("01a55"), values); Reducedriver.withoutput (New Http ("01a55"), New Intwritable (2)); Reducedriver.runtest (); }}

 

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.