Hadoop Auxiliary Sorting sample Two

Source: Internet
Author: User

1. Requirements
The highest temperature per year

2. Sample Data
1995101996111995161995221996261995319967199610199620199633199521199691995311995-131995221997-219972819971519958


3. Ideas, Code
The records are grouped by year and sorted by temperature, before all records of the same year are sent to a reducer group, the first record of each group is the highest temperature of the year. The key points for implementing this scenario are:
A. Define a key combination that includes a natural key (year) and a natural value (temperature).
B. Sort the records according to the key combination.
C. Only natural keys are considered for partitioning and grouping of key combinations.
Import Org.apache.hadoop.io.intwritable;import Org.apache.hadoop.io.writablecomparable;import java.io.DataInput; Import java.io.dataoutput;import java.io.ioexception;/** * Key combination, this example is used for auxiliary sorting, including year and temperature.    */public class Intpair implements writablecomparable<intpair> {private intwritable first;    Private intwritable second;        Public Intpair () {This.first = new intwritable ();        This.second = new Intwritable ();        If the above two lines are commented out, an exception occurs when using Java.lang.NullPointerException at Intpair.readfields} public intpair (int first, int second) {    Set (new Intwritable (first), New Intwritable (second));    } public Intpair (intwritable first, intwritable second) {Set (first, second);        } public void set (intwritable first, intwritable second) {this.first = first;    This.second = second;    } public intwritable GetFirst () {return first;    } public intwritable Getsecond () {return second; ' public void write ' (DataOutput out) throwsIOException {first.write (out);    Second.write (out);        } public void ReadFields (Datainput in) throws IOException {First.readfields (in);    Second.readfields (in);    } @Override public int hashcode () {return First.hashcode () * 163 + second.hashcode (); } @Override public boolean equals (Object obj) {if (obj instanceof intpair) {intpair IP = (intpai            R) obj;        return First.get () = = Ip.first.get () && second.get () = = Ip.second.get ();    } return false;    } @Override Public String toString () {return first + "\ T" + second;        } public int compareTo (Intpair o) {int cmp = First.compareto (O.first);        if (cmp = = 0) {cmp = Second.compareto (O.second);    } return CMP; }}


Import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.conf.configured;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.LongWritable; Import Org.apache.hadoop.io.nullwritable;import Org.apache.hadoop.io.text;import Org.apache.hadoop.io.writablecomparable;import Org.apache.hadoop.io.writablecomparator;import Org.apache.hadoop.io.writableutils;import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.mapper;import Org.apache.hadoop.mapreduce.partitioner;import Org.apache.hadoop.mapreduce.reducer;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import Org.apache.hadoop.util.GenericOptionsParser; Import Org.apache.hadoop.util.tool;import Org.apache.hadoop.util.toolrunner;import Java.io.ioexception;public Class Maxtemperatureusingsecondarysort extends configured implements Tool {static class Maxtemperaturemapper extends M Apper<longWritable, text, Intpair, nullwritable> {@Override protected void map (longwritable key, text value, Conte            XT context) throws IOException, Interruptedexception {string[] val = value.tostring (). Split ("\\t"); if (val.length = = 2) {context.write (New Intpair (Integer.parseint (val[0]), Integer.parseint (val[1)), Nul            Lwritable.get ()); }}} static class Maxtemperaturereducer extends Reducer<intpair, nullwritable, Intpair, nullwritable> {@Override protected void reduce (Intpair key, iterable<nullwritable> values, context context) throws IOException, interruptedexception {context.write (key, Nullwritable.get ());//output only the first line}}//Only according to Fir St Partition public static class Firstpartitioner extends Partitioner<intpair, nullwritable> {@Override p ublic int getpartition (intpair key, nullwritable value, int numpartitions) {return (Key.getfirst (). hashcodE () & integer.max_value)% Numpartitions; }}//Only based on first group public static class Groupcomparator extends Writablecomparator {private static final I        Ntwritable.comparator int_comparator = new Intwritable.comparator ();        Protected Groupcomparator () {super (Intpair.class, true);                } @Override public int compare (byte[] b1, int s1, int L1, byte[] b2, int s2, int l2) {try {                int firstL1 = Writableutils.decodevintsize (b1[s1]) + Readvint (B1, S1);                int firstL2 = Writableutils.decodevintsize (B2[s2]) + readvint (B2, S2);            Return Int_comparator.compare (B1, S1, firstL1, B2, S2, firstL2);            } catch (IOException e) {throw new IllegalArgumentException (e); }} @Override public int compare (writablecomparable A, writablecomparable b) {if (a Insta nceof Intpair && b instanceof Intpair) {return (IntpaiR) a). GetFirst (). CompareTo (((Intpair) b). GetFirst ());        } return Super.compare (A, b);            }}//Sort public static class Keycomparator extends Writablecomparator {protected keycomparator () according to key combinations {        Super (Intpair.class, true); } @Override public int compare (writablecomparable A, writablecomparable b) {if (a instanceof int                Pair && b instanceof intpair) {Intpair ip1 = (Intpair) A;                Intpair ip2 = (Intpair) b; int cmp = Ip1.getfirst (). CompareTo (Ip2.getfirst ());                Ascending (year) if (cmp! = 0) {return CMP; } return-ip1.getsecond (). CompareTo (Ip2.getsecond ());        Descending (temperature)} return Super.compare (A, b);        }} public int run (string[] args) throws Exception {configuration conf = new Configuration (); string[] Otherargs = new Genericoptionsparser (conf, args). GetremaininGargs (); if (otherargs.length! = 2) {System.err.println ("Parameter" is wrong, please enter the PARAMETERS:&LT;INP            Ut> <output> ");        System.exit (-1);        } Path InputPath = new Path (otherargs[0]);        Path OutputPath = new Path (otherargs[1]);        Conf.set ("Fs.defaultfs", "hdfs://vmnode.zhch:9000");        Job Job = job.getinstance (conf, "Maxtemperatureusingsecondarysort");        Job.setjar ("F:/workspace/assistranking2/target/assistranking2-1.0-snapshot.jar");        Job.setmapperclass (Maxtemperaturemapper.class);        Job.setpartitionerclass (Firstpartitioner.class); Job.setsortcomparatorclass (Keycomparator.class);        By default Job.setgroupingcomparatorclass (Groupcomparator.class) is sorted according to the CompareTo function of Key;        Job.setreducerclass (Maxtemperaturereducer.class);        Job.setmapoutputkeyclass (Intpair.class);        Job.setoutputkeyclass (Intpair.class);        Job.setoutputvalueclass (Nullwritable.class); FileinpuTformat.addinputpath (Job, InputPath);        Fileoutputformat.setoutputpath (Job, OutputPath); Return Job.waitforcompletion (True)?    0:1; } public static void Main (string[] args) throws Exception {int exitCode = Toolrunner.run (New Maxtemperatureusin        Gsecondarysort (), args);    System.exit (ExitCode); }}


4. Running


Note: This example originates from the third version of the Hadoop authoritative guide 8.2.4

Hadoop Auxiliary Sorting sample Two

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.