1. Requirements
The highest temperature per year
2. Sample Data
1995101996111995161995221996261995319967199610199620199633199521199691995311995-131995221997-219972819971519958
3. Ideas, Code
The records are grouped by year and sorted by temperature, before all records of the same year are sent to a reducer group, the first record of each group is the highest temperature of the year. The key points for implementing this scenario are:
A. Define a key combination that includes a natural key (year) and a natural value (temperature).
B. Sort the records according to the key combination.
C. Only natural keys are considered for partitioning and grouping of key combinations.
Import Org.apache.hadoop.io.intwritable;import Org.apache.hadoop.io.writablecomparable;import java.io.DataInput; Import java.io.dataoutput;import java.io.ioexception;/** * Key combination, this example is used for auxiliary sorting, including year and temperature. */public class Intpair implements writablecomparable<intpair> {private intwritable first; Private intwritable second; Public Intpair () {This.first = new intwritable (); This.second = new Intwritable (); If the above two lines are commented out, an exception occurs when using Java.lang.NullPointerException at Intpair.readfields} public intpair (int first, int second) { Set (new Intwritable (first), New Intwritable (second)); } public Intpair (intwritable first, intwritable second) {Set (first, second); } public void set (intwritable first, intwritable second) {this.first = first; This.second = second; } public intwritable GetFirst () {return first; } public intwritable Getsecond () {return second; ' public void write ' (DataOutput out) throwsIOException {first.write (out); Second.write (out); } public void ReadFields (Datainput in) throws IOException {First.readfields (in); Second.readfields (in); } @Override public int hashcode () {return First.hashcode () * 163 + second.hashcode (); } @Override public boolean equals (Object obj) {if (obj instanceof intpair) {intpair IP = (intpai R) obj; return First.get () = = Ip.first.get () && second.get () = = Ip.second.get (); } return false; } @Override Public String toString () {return first + "\ T" + second; } public int compareTo (Intpair o) {int cmp = First.compareto (O.first); if (cmp = = 0) {cmp = Second.compareto (O.second); } return CMP; }}
Import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.conf.configured;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.LongWritable; Import Org.apache.hadoop.io.nullwritable;import Org.apache.hadoop.io.text;import Org.apache.hadoop.io.writablecomparable;import Org.apache.hadoop.io.writablecomparator;import Org.apache.hadoop.io.writableutils;import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.mapper;import Org.apache.hadoop.mapreduce.partitioner;import Org.apache.hadoop.mapreduce.reducer;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import Org.apache.hadoop.util.GenericOptionsParser; Import Org.apache.hadoop.util.tool;import Org.apache.hadoop.util.toolrunner;import Java.io.ioexception;public Class Maxtemperatureusingsecondarysort extends configured implements Tool {static class Maxtemperaturemapper extends M Apper<longWritable, text, Intpair, nullwritable> {@Override protected void map (longwritable key, text value, Conte XT context) throws IOException, Interruptedexception {string[] val = value.tostring (). Split ("\\t"); if (val.length = = 2) {context.write (New Intpair (Integer.parseint (val[0]), Integer.parseint (val[1)), Nul Lwritable.get ()); }}} static class Maxtemperaturereducer extends Reducer<intpair, nullwritable, Intpair, nullwritable> {@Override protected void reduce (Intpair key, iterable<nullwritable> values, context context) throws IOException, interruptedexception {context.write (key, Nullwritable.get ());//output only the first line}}//Only according to Fir St Partition public static class Firstpartitioner extends Partitioner<intpair, nullwritable> {@Override p ublic int getpartition (intpair key, nullwritable value, int numpartitions) {return (Key.getfirst (). hashcodE () & integer.max_value)% Numpartitions; }}//Only based on first group public static class Groupcomparator extends Writablecomparator {private static final I Ntwritable.comparator int_comparator = new Intwritable.comparator (); Protected Groupcomparator () {super (Intpair.class, true); } @Override public int compare (byte[] b1, int s1, int L1, byte[] b2, int s2, int l2) {try { int firstL1 = Writableutils.decodevintsize (b1[s1]) + Readvint (B1, S1); int firstL2 = Writableutils.decodevintsize (B2[s2]) + readvint (B2, S2); Return Int_comparator.compare (B1, S1, firstL1, B2, S2, firstL2); } catch (IOException e) {throw new IllegalArgumentException (e); }} @Override public int compare (writablecomparable A, writablecomparable b) {if (a Insta nceof Intpair && b instanceof Intpair) {return (IntpaiR) a). GetFirst (). CompareTo (((Intpair) b). GetFirst ()); } return Super.compare (A, b); }}//Sort public static class Keycomparator extends Writablecomparator {protected keycomparator () according to key combinations { Super (Intpair.class, true); } @Override public int compare (writablecomparable A, writablecomparable b) {if (a instanceof int Pair && b instanceof intpair) {Intpair ip1 = (Intpair) A; Intpair ip2 = (Intpair) b; int cmp = Ip1.getfirst (). CompareTo (Ip2.getfirst ()); Ascending (year) if (cmp! = 0) {return CMP; } return-ip1.getsecond (). CompareTo (Ip2.getsecond ()); Descending (temperature)} return Super.compare (A, b); }} public int run (string[] args) throws Exception {configuration conf = new Configuration (); string[] Otherargs = new Genericoptionsparser (conf, args). GetremaininGargs (); if (otherargs.length! = 2) {System.err.println ("Parameter" is wrong, please enter the PARAMETERS:<INP Ut> <output> "); System.exit (-1); } Path InputPath = new Path (otherargs[0]); Path OutputPath = new Path (otherargs[1]); Conf.set ("Fs.defaultfs", "hdfs://vmnode.zhch:9000"); Job Job = job.getinstance (conf, "Maxtemperatureusingsecondarysort"); Job.setjar ("F:/workspace/assistranking2/target/assistranking2-1.0-snapshot.jar"); Job.setmapperclass (Maxtemperaturemapper.class); Job.setpartitionerclass (Firstpartitioner.class); Job.setsortcomparatorclass (Keycomparator.class); By default Job.setgroupingcomparatorclass (Groupcomparator.class) is sorted according to the CompareTo function of Key; Job.setreducerclass (Maxtemperaturereducer.class); Job.setmapoutputkeyclass (Intpair.class); Job.setoutputkeyclass (Intpair.class); Job.setoutputvalueclass (Nullwritable.class); FileinpuTformat.addinputpath (Job, InputPath); Fileoutputformat.setoutputpath (Job, OutputPath); Return Job.waitforcompletion (True)? 0:1; } public static void Main (string[] args) throws Exception {int exitCode = Toolrunner.run (New Maxtemperatureusin Gsecondarysort (), args); System.exit (ExitCode); }}
4. Running
Note: This example originates from the third version of the Hadoop authoritative guide 8.2.4
Hadoop Auxiliary Sorting sample Two