Example content break the same phone number in the same reduce if you do not specify a cell phone number segment partition is in the same partition without the set number segment
import java.util.hashmap;import org.apache.hadoop.io.text;import org.apache.hadoop.mapreduce.partitioner;import cn.com.bigdata.mr.flowcount.flowbean;/** * Define your own data (group) distribution rules from map to reduce distribute (group) according to the province to which the phone number belongs provincepartitioner * the default grouping component is Hashpartitioner * * @author * */public class provincepartitioner extends Partitioner<Text, FlowBean> {static HashMap<String, Integer> Provincemap = new hashmap<string, integer> (); Static {provincemap.put ("135", 0);p rovincemap.put ("136", 1);p rovincemap.put ("137", 2);p rovincemap.put ("138", 3); Provincemap.put ("139", 4);} @Overridepublic int getpartition (text key, flowbean value, int numpartitions) {integer code = provincemap.get (key.tostring (). substring (0, 3)); return code == null ? 5 : code;}}
import java.io.ioexception;import org.apache.commons.lang.stringutils;import org.apache.hadoop.conf.configuration;import org.apache.hadoop.fs.path;import org.apache.hadoop.io.longwritable;import org.apache.hadoop.io.text;import org.apache.hadoop.mapreduce.job;import org.apache.hadoop.mapreduce.mapper;import Org.apache.hadoop.mapreduce.reducer;import org.apache.hadoop.mapreduce.lib.input.fileinputformat;import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import cn.com.bigdata.mr.flowcount.flowbean ;p Ublic class flowcountprovince {static class flowcountprovincemapper extends mapper<longwritable, text, text, flowbean> {@Overrideprotected void Map (longwritable key, text value, context context) throws ioexception, InterruptedException {// convert this line of data into stringstring line = value.tostring ();/ Slice this line, get the fields String[] fields = stringutils.split (line, "\ T");// get the phone number string phone = fields[1];long upflow = long.parselong (fields[fields.length - &NBSP;3]); Long dflow = long.parselong (fields[fields.length - 2]); Flowbean bean = new flowbean (Upflow, dflow); Context.write (New Text (phone), Bean);}} Static class flowcountprovincereducer extends reducer<text, flowbean, text, flowbean> {@Overrideprotected void reduce (text key, iterable<flowbean> beans, context context) throws IOException, InterruptedException {// Define two counters long upamount = 0;long damount = 0;// traverse all of the user's traffic beans, accumulate sum for (Flowbean bean : beans) {upamount += bean.getupflow ();d amount += bean.getdflow ();} Constructs a final result for outputFlowbeanflowbean countbean = new flowbean (Upamount, damount);// Output Context.write (Key, countbean);}} Public static void main (String[] args) throws exception {configuration conf = new configuration (); Job job = job.getinstance (conf); Job.setjarbyclass (Flowcountprovince.class); Job.setMapperClass ( Flowcountprovincemapper.class); Job.setreducerclass (Flowcountprovincereducer.class);/** * If the output kv type of the map and reduce is consistent, then the output KV type */ /* * job.setmapoutputkeyclass (text.class) of the map is not specifically set; * job.setmapoutputvalueclass (Flowbean.class); */job.setoutputkeyclass (Text.class); Job.setoutputvalueclass (Flowbean.class), the default input and output components in/** * hadoop are Textinputformat and Textoutputformat, so These two lines of code can also be omitted */ /* * job.setinputformatclass (Textinputformat.class); * Job.setoutputformatclass (Textoutputformat.class); */fileinputformat.setinputpaths (Job, new path (Args[0])); Fileoutputformat.setoutputpath (Job, new path (args[1]));/** * tell the job, Use our custom data Grouping component Provincepartitioner */job.setpartitionerclass (Provincepartitioner.class);/** * In order to match the number of groupings in a custom grouping policy, you need to set the concurrency number of the corresponding reduce if "concurrency" > Group number  , an empty result file will appear * if "concurrency" < number of groupings, * will error: illegal partition number, but if the number of concurrent = 1, then the packet logic will not be called, all the data into a unique reducetask, will not be error */job.setnumreducetasks ( Integer.parseint (args[2]); Boolean res = job.waitforcompletion (true); System.exit (res ? 0 : 1);}}
11: partitioner Example implementation