Hadoop Reading Notes series article:http://blog.csdn.net/caicongyang/article/category/2166855
1.partition Grouping
Partition is the specified grouping algorithm, and the number of tasks to set reduce by setnumreducetasks
2. Code Kpiapp.ava
Package Cmd;import Java.io.datainput;import java.io.dataoutput;import java.io.ioexception;import Java.net.URI; Import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.conf.configured;import Org.apache.hadoop.fs.filesystem;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.longwritable;import Org.apache.hadoop.io.text;import Org.apache.hadoop.io.writable;import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.mapper;import Org.apache.hadoop.mapreduce.reducer;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.input.textinputformat;import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import Org.apache.hadoop.mapreduce.lib.output.textoutputformat;import Org.apache.hadoop.mapreduce.lib.partition.hashpartitioner;import Org.apache.hadoop.util.tool;import org.apache.hadoop.util.toolrunner;/** * * <p> * Title:KpiApp.java * Package mapReduce * </p> * <p> * Description: This example demoIn the statistics given traffic file per mobile phone number of traffic, using Partitioner grouping, if Setnumreducetasks is greater than 1, the code must be called Jar * <p> * @author Tom.cai * @created 2014-11-25 pm 10:23:33 * @version V1.0 * */public class Kpiapp extends configured implements tool{public static void main (string[] args) throws Exception {Toolrunner.run (New Kpiapp (), args);} @Overridepublic int Run (string[] arg0) throws Exception {String Input_path = arg0[0]; String Out_path = arg0[1]; FileSystem FileSystem = filesystem.get (new URI (Input_path), New Configuration ()); Path Outpath = new Path (Out_path), if (Filesystem.exists (Outpath)) {Filesystem.delete (Outpath, True);} Job Job = new Job (new Configuration (), KpiApp.class.getSimpleName ()); Fileinputformat.setinputpaths (Job, Input_path); Job.setinputformatclass (Textinputformat.class); Job.setmapperclass (Kpimapper.class); Job.setmapoutputkeyclass (Text.class); Job.setmapoutputvalueclass ( Kpiwite.class); Job.setpartitionerclass (Kpipartitioner.class); Job.setnumreducetasks (2); Job.setReducerClass ( Kpireducer.class); JOB.SETOUTPUTKEYCLass (Text.class); Job.setoutputvalueclass (Kpiwite.class); Fileoutputformat.setoutputpath (Job, New Path (Out_path)); Job.setoutputformatclass (Textoutputformat.class); Job.waitforcompletion (true); return 0;} Static class Kpimapper extends Mapper<longwritable, text, text, kpiwite> {@Overrideprotected void map (longwritable Key, Text value, Context context) throws IOException, interruptedexception {string[] splited = value.tostring (). Split ("\ t "); String num = splited[1]; Kpiwite KPI = new Kpiwite (splited[6], splited[7], splited[8], splited[9]); Context.write (new Text (num), KPI);}} Static class Kpireducer extends Reducer<text, kpiwite, text, kpiwite> {@Overrideprotected void reduce (Text key, Iter Able<kpiwite> value, Context context) throws IOException, interruptedexception {long uppacknum = 0l;long DOWNPACKNU m = 0l;long Uppayload = 0l;long downpayload = 0l;for (Kpiwite kpi:value) {uppacknum + = Kpi.uppacknum;downpacknum + = KPI. Downpacknum;uppayload + = kpi.uppayload;downpayload + = KPI. Downpayload;} Context.write (Key, New Kpiwite (String.valueof (Uppacknum), string.valueof (Downpacknum), string.valueof (UpPayLoad), String.valueof (Downpayload)));}} Static class Kpipartitioner extends Hashpartitioner<text, kpiwite>{@Overridepublic int getpartition (Text key, Kpiwite value, int numreducetasks) {return (key.tostring (). Length () = = 11)? 0:1;}} Class Kpiwite implements writable {long Uppacknum;long downpacknum;long uppayload;long downpayload;public KpiWite () {} Public Kpiwite (String uppacknum, String downpacknum, String uppayload, String downpayload) {This.uppacknum = Long.parselo Ng (uppacknum); this.downpacknum = Long.parselong (downpacknum); this.uppayload = Long.parselong (upPayLoad); This.downpayload = Long.parselong (downpayload);} @Overridepublic void ReadFields (Datainput in) throws IOException {this.uppacknum = In.readlong (); this.downpacknum = In.readlong (); this.uppayload = In.readlong (); this.downpayload = In.readlong ();} @Overridepublic void Write (DataOutput out) throws IOException {Out.writelong (uppacknum); Out.writelong (Downpacknum); Out.writelong (uppayload); Out.writeLong ( downpayload);}}
3. Remark:using Partitioner grouping, if Setnumreducetasks is greater than 1, the code must be run as a jar packageOtherwise, the following error will be reported:
Java.io.IOException:Illegal partition for 84138413 (1) at Org.apache.hadoop.mapred.maptask$mapoutputbuffer.collect ( maptask.java:1073) at Org.apache.hadoop.mapred.maptask$newoutputcollector.write (maptask.java:691) at Org.apache.hadoop.mapreduce.TaskInputOutputContext.write (taskinputoutputcontext.java:80) at mapreduce.kpiapp$ Kpimapper.map (kpiapp.java:75) at Mapreduce.kpiapp$kpimapper.map (kpiapp.java:1) at Org.apache.hadoop.mapreduce.Mapper.run (mapper.java:144) at Org.apache.hadoop.mapred.MapTask.runNewMapper ( maptask.java:764) at Org.apache.hadoop.mapred.MapTask.run (maptask.java:370) at Org.apache.hadoop.mapred.localjobrunner$job.run (localjobrunner.java:214)
Welcome everybody to discuss the study together!
Useful Self-collection!
Record and share, let you and I grow together! Welcome to my other blogs, my blog address: Http://blog.csdn.net/caicongyang
Hadoop reading notes (11) partition grouping in MapReduce