Import Java.io.ioexception;import Java.util.stringtokenizer;import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.text;import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.mapper;import Org.apache.hadoop.mapreduce.reducer;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.input.FileSplit; Import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;public class Invertedindex {public static class Inversedindexmapper extends Mapper<object, text, text, text> {private Text Outkey = new text ();p rivate text OutVal = New Text (); @Overridepublic void map (Object key,text value,context Context) {StringTokenizer tokens = new StringTokenizer (Value.tostring ()); Filesplit split = (filesplit) context.getinputsplit (); while (Tokens.hasmoretokens ()) {String token = Tokens.nexttoken () ; try {outkey.set (token + ":" + Split.getpath ()); Outval.set ("1"); Context.write (Outkey, outval);} CaTCH (IOException e) {e.printstacktrace ();} catch (Interruptedexception e) {e.printstacktrace ();}} SYSTEM.OUT.PRINTLN ("Map phase finished ...");} public static class Inversedindexcombiner extends Reducer<text, text, text, text> {private Text Outkey = new text (); Private text OutVal = new text (), @Overridepublic void reduce (Text key,iterable<text> values,context Context) {Strin G[] keys = key.tostring (). Split (":"); int sum = 0;for (Text val:values) {sum + = Integer.parseint (val.tostring ());} try {outkey.set (keys[0]), int index = Keys[keys.length-1].lastindexof ('/'); Outval.set (keys[keys.length-1].substring (index+1) + ":" + sum); Context.write (Outkey, outval);} catch (IOException e) {e.printstacktrace ();} catch (Interruptedexception e) {e.printstacktrace ();} SYSTEM.OUT.PRINTLN ("Combine phase finished ...");} public static class Inversedindexreducer extends Reducer<text, text, text, text> {@Overridepublic void reduce (text Key,iterable<text> Values,context Context) {stringbufFer sb = new StringBuffer (); for (Text text:values) {sb.append (text.tostring () + ",");} try {context.write (key, New Text (Sb.tostring ())),} catch (IOException e) {e.printstacktrace ();} catch ( Interruptedexception e) {e.printstacktrace ();} SYSTEM.OUT.PRINTLN ("Reduce phase finished ...");} public static void Main (string[] args) throws IOException, Interruptedexception, classnotfoundexception {Configuration conf = new Configuration (), @SuppressWarnings ("deprecation") job Job = new Job (conf, "index inverted"); Job.setjarbyclass ( Invertedindex.class); Job.setmapperclass (Inversedindexmapper.class); Job.setcombinerclass ( Inversedindexcombiner.class); Job.setreducerclass (Inversedindexreducer.class); Job.setmapoutputkeyclass ( Text.class); Job.setmapoutputvalueclass (Text.class); Job.setoutputkeyclass (Text.class); Job.setOutputValueClass ( Text.class); job.setnumreducetasks (1); Fileinputformat.addinputpath (Job, new Path ("input")); Fileoutputformat.setoutputpath (Job, new Path ("output")); if (Job.waitforcompletion (true)){System.out.println ("All job finished ..."); System.exit (0);}}
Hadoop Inverted Index-Distributed Jobs II