11: partitioner Example implementation

Source: Internet
Author: User

Example content break the same phone number in the same reduce if you do not specify a cell phone number segment partition is in the same partition without the set number segment

import java.util.hashmap;import org.apache.hadoop.io.text;import  org.apache.hadoop.mapreduce.partitioner;import cn.com.bigdata.mr.flowcount.flowbean;/** *  Define your own data (group) distribution rules from map to reduce   distribute (group) according to the province to which the phone number belongs provincepartitioner *  the default grouping component is Hashpartitioner  *  *  @author  *  */public class provincepartitioner extends  Partitioner<Text, FlowBean> {static HashMap<String, Integer>  Provincemap = new hashmap<string, integer> (); Static {provincemap.put ("135",  0);p rovincemap.put ("136",  1);p rovincemap.put ("137",  2);p rovincemap.put ("138",  3); Provincemap.put ("139",  4);} @Overridepublic  int getpartition (text key, flowbean value, int  numpartitions)  {integer code = provincemap.get (key.tostring (). substring (0, 3)); return  code == null ? 5 :  code;}} 
import java.io.ioexception;import org.apache.commons.lang.stringutils;import  org.apache.hadoop.conf.configuration;import org.apache.hadoop.fs.path;import  org.apache.hadoop.io.longwritable;import org.apache.hadoop.io.text;import  org.apache.hadoop.mapreduce.job;import org.apache.hadoop.mapreduce.mapper;import  Org.apache.hadoop.mapreduce.reducer;import org.apache.hadoop.mapreduce.lib.input.fileinputformat;import  org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import cn.com.bigdata.mr.flowcount.flowbean ;p Ublic class flowcountprovince {static class flowcountprovincemapper extends  mapper<longwritable, text, text, flowbean> {@Overrideprotected  void  Map (longwritable key, text value, context context)  throws ioexception,  InterruptedException {//  convert this line of data into stringstring line = value.tostring ();/  Slice this line, get the fields String[] fields = stringutils.split (line,  "\ T");//  get the phone number string  phone = fields[1];long upflow = long.parselong (fields[fields.length - &NBSP;3]); Long dflow = long.parselong (fields[fields.length - 2]); Flowbean bean = new flowbean (Upflow, dflow); Context.write (New Text (phone),  Bean);}} Static class flowcountprovincereducer extends reducer<text, flowbean, text,  flowbean> {@Overrideprotected  void reduce (text key, iterable<flowbean>  beans, context context)  throws IOException, InterruptedException {//  Define two counters long upamount = 0;long damount = 0;//  traverse all of the user's traffic beans, accumulate sum for   (Flowbean bean : beans)  {upamount += bean.getupflow ();d amount +=  bean.getdflow ();}   Constructs a final result for outputFlowbeanflowbean countbean = new flowbean (Upamount, damount);//  Output Context.write (Key, countbean);}} Public static void main (String[] args)  throws exception {configuration  conf = new configuration (); Job job = job.getinstance (conf); Job.setjarbyclass (Flowcountprovince.class); Job.setMapperClass ( Flowcountprovincemapper.class); Job.setreducerclass (Flowcountprovincereducer.class);/** *  If the output kv type of the map and reduce is consistent, then the output KV type  */ /* * job.setmapoutputkeyclass (text.class) of the map is not specifically set;  * job.setmapoutputvalueclass (Flowbean.class);  */job.setoutputkeyclass (Text.class); Job.setoutputvalueclass (Flowbean.class), the default input and output components in/** * hadoop are Textinputformat and Textoutputformat, so These two lines of code can also be omitted  */ /* * job.setinputformatclass (Textinputformat.class); *  Job.setoutputformatclass (Textoutputformat.class);  */fileinputformat.setinputpaths (Job, new path (Args[0])); Fileoutputformat.setoutputpath (Job, new path (args[1]));/** *  tell the job, Use our custom data Grouping component Provincepartitioner */job.setpartitionerclass (Provincepartitioner.class);/** *  In order to match the number of groupings in a custom grouping policy, you need to set the concurrency number of the corresponding reduce   if "concurrency" > Group number &nbsp, an empty result file will appear  *  if "concurrency" < number of groupings,  *   will error: illegal partition number, but if the number of concurrent = 1, then the packet logic will not be called, all the data into a unique reducetask, will not be error  */job.setnumreducetasks ( Integer.parseint (args[2]); Boolean res = job.waitforcompletion (true); System.exit (res ? 0 : 1);}}


11: partitioner Example implementation

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.