Last Update:2014-12-23
Source: Internet
Author: User
Keywords
Rita
http
value
data type
Hadoop implements custom http://www.aliyun.com/zixun/aggregation/18278.html "> Data Type Blog Category: Hadoop hadoopmrunit custom Data type
For a custom data type, a clearer explanation and explanation is given in the http://book.douban.com/annotation/17067489/.
Take WordCount as an example
Define your own data type HTTP class
Import Java.io.datainput;import Java.io.dataoutput;import Java.io.ioexception;import Org.apache.hadoop.io.writablecomparable;public class HTTP implements writablecomparable<http>{public http () {} private String value; Public Http (String value) {SetValue (value);} public String GetValue () {return value;} public void SetValue (string value) { This.value = value; ReadFields (Datainput in) throws IOException {value = In.readutf (),} public void write (DataOutput out) throws IOException {Out.writeutf (value);} public int CompareTo (http http) {return (Value.compareto (Http.value)); @Override public int hashcode () {final int prime =--result = 1, result = Prime * result + ((value = = null)? 0:value.hashcode ()); re Turn result; @Override public boolean equals (Object obj) {if (! obj instanceof Http) return false; HTTP other = (http) obj; Return This.value.equals (Other.value); @Override public String toString () {return value}}
Writing WordCount programs
Import Java.io.ioexception;import Java.util.stringtokenizer;import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.LongWritable; Import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.mapper;import Org.apache.hadoop.mapreduce.reducer;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import Org.apache.hadoop.util.GenericOptionsParser; public class wordcountentry{public static class Tokenizermapper extends Mapper<longwritable, HTTP, http, intwritable > {Private final static intwritable one = new intwritable (1); private Http word = new http (); public void Map (longwritable Key, Http value, Context context) throws IOException, interruptedexception {stringtokenizer ITR = new StringTokenizer ( Value.tostring ()); while (Itr.hasmoretokens ()) {Word.setvalue (Itr.nexttoken ()); Context.write (Word, one);} } newlineIC static class Intsumreducer extends Reducer<http, intwritable, Http, intwritable> {private intwritable result = new Intwritable (); public void reduce (Http key, iterable<intwritable> values, context context) throws IOException, interruptedexception {int sum = 0; for (intwritable val:values) {sum = Val.get ();} result.set (sum); Context.write (Key, result); } public static void main (string] args) throws IOException, Interruptedexception, classnotfoundexception { Revisit conf = new revisit (); String] Otherargs = new Genericoptionsparser (conf, args). Getremainingargs (); if (otherargs.length!= 2) {System.err.println ("Usage:wordcount <in> <out>"); System.exit (2); Path input = new path (args[0]); Path output = new path (args[1]); Job Job = new Job (conf, word count); Job.setjarbyclass (Wordcountentry.class); Job.setmapperclass (Tokenizermapper.class); Job.setcombinerclass (Intsumreducer.class); Job.setreducerclass (Intsumreducer.class); Job.setoutPutkeyclass (Http.class); Job.setoutputvalueclass (Intwritable.class); Fileinputformat.addinputpath (Job, input); Fileoutputformat.setoutputpath (job, output); System.exit (Job.waitforcompletion (true)? 0:1); }}
Write Mrunit test cases for MapReduce program testing
Import Java.util.arraylist;import Java.util.list;import Org.apache.hadoop.io.intwritable;import Org.apache.hadoop.io.longwritable;import Org.apache.hadoop.mrunit.mapreduce.mapdriver;import Org.apache.hadoop.mrunit.mapreduce.reducedriver;import Org.junit.before;import Org.junit.Test;import Com.geo.dmp.wordcountentry.intsumreducer;import Com.geo.dmp.wordcountentry.tokenizermapper;public Class wordcountentrytest{Private mapdriver<longwritable, HTTP, http, intwritable> mapdriver; private reducedriver< HTTP, intwritable, HTTP, intwritable> Reducedriver; @Before public void Setupbeforeclass () throws Exception {Tokenizermapper TM = new Tokenizermapper (); mapdriver = Mapdriver.newmapdriver (tm); Intsumreducer ISR = new Intsumreducer (); Reducedriver = Reducedriver.newreducedriver (ISR); @Test public void Tokenizermappertest () {mapdriver.withinput (New longwritable (), New Http ("01A55\TABLSD")); Mapdriver.withoutput (New Http ("01a55"), New Intwritable (1)); Mapdriver.withoutput (New HttP ("ABLSD"), New Intwritable (1)); Mapdriver.runtest (); @Test public void Intsumreducertest () {list<intwritable> values = new arraylist<intwritable> (); Values.add (New Intwritable (1)); Values.add (New intwritable (1)); Reducedriver.withinput (New Http ("01a55"), values); Reducedriver.withoutput (New Http ("01a55"), New Intwritable (2)); Reducedriver.runtest (); }}