Prerequisite Preparation:
1.hadoop installation is operating normally. Hadoop installation Configuration Please refer to: Ubuntu under Hadoop 1.2.1 Configuration installation
2. The integrated development environment is normal. Integrated development environment Configuration Please refer to: Ubuntu building Hadoop Source Reading environment
MapReduce Programming Examples:
MapReduce Programming Example (i), detailing running the first MapReduce program in an integrated environment WordCount and Code Analysis
MapReduce Programming Example (ii), calculating average student scores
MapReduce Programming Example (iii), data deduplication
MapReduce Programming Example (iv), sorting
MapReduce Programming Example (v), MapReduce implements single-table association
MapReduce Programming Example (vi), MapReduce implements multi-table Association
Sort, relatively simple, on the code, the code has comments, welcome to communicate.
The whole is to use the MapReduce itself to sort key and assign to different partition by key value. MapReduce sorts each reduce by the text type key alphabetically by default, sorting the intwritable type by size.
Package com.t.hadoop;
Import java.io.IOException;
Import org.apache.hadoop.conf.Configuration;
Import Org.apache.hadoop.fs.Path;
Import org.apache.hadoop.io.IntWritable;
Import Org.apache.hadoop.io.Text;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.Mapper;
Import Org.apache.hadoop.mapreduce.Partitioner;
Import Org.apache.hadoop.mapreduce.Reducer;
Import Org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import Org.apache.hadoop.util.GenericOptionsParser;
/** * Sort * Use MapReduce to sort key by default * Inherit Partitioner class, override getpartition to make mapper result whole ordered to corresponding partition, input to reduce to sort separately. * Use global variables to position * @author DaT dev.tao@gmail.com * */public class Sort {public static class Sortmapper extends Mapper< Object, Text, intwritable, intwritable>{//Direct output Key,value,key is a value that needs to be sorted, value arbitrarily @Override protected void map (Obje
CT key, Text value, Context context) throws IOException, Interruptedexception { System.out.println ("Key:" +key+ "" + "Value:" +value);
Context.write (New Intwritable (Integer.valueof (value.tostring ())), New intwritable (1)); }} public static class Sortreducer extends Reducer<intwritable, intwritable, intwritable, intwritable>{publi
c static Intwritable linenum = new intwritable (1);//Record the location of the data//query the value of the number of how many of the output key value. @Override protected void reduce (intwritable key, iterable<intwritable> value, Context context) throws Ioexcept
Ion, Interruptedexception {System.out.println ("LineNum:" +linenum);
for (intwritable i:value) {context.write (linenum, key);
} linenum = new intwritable (Linenum.get () +1); }} public static class Sortpartitioner extends Partitioner<intwritable, intwritable>{//assigning data based on key @Ov Erride public int getpartition (intwritable key, intwritable value, int partitionnum) {System.out.println ("Partitionn
Um: "+partitionnum"); int maxnum = 23492;//The maximum value of the input, defined by itself. MapreduceThe implementation of the own sampling algorithm and partition can be used, this example is not used.
int bound = Maxnum/partitionnum;
int keynum = Key.get ();
for (int i=0;i<partitionnum;i++) {if (keynum>bound*i&&keynum<=bound* (i+1)) {return i;
}} return-1; }} public static void Main (string[] args) throws IOException, ClassNotFoundException, interruptedexception{Co
Nfiguration conf = new Configuration ();
string[] Otherargs = new Genericoptionsparser (conf, args). Getremainingargs ();
if (otherargs.length<2) {System.out.println ("Input parameters errors");
System.exit (2);
} Job job= New Job (conf);
Job.setjarbyclass (Sort.class);
Job.setmapperclass (Sortmapper.class); Job.setpartitionerclass (Sortpartitioner.class);//This example is not allowed to combiner, you need to set Partitioner Job.setreducerclass (
Sortreducer.class);
Job.setoutputkeyclass (Intwritable.class);
Job.setoutputvalueclass (Intwritable.class);
Fileinputformat.addinputpath (Job, New Path (Otherargs[0])); Fileoutputformat.setoutputpath (Job, nEW Path (otherargs[1]));
System.exit (Job.waitforcompletion (true)? 0:1);
}
}