Importjava.io.IOException;ImportJava.util.StringTokenizer;ImportOrg.apache.hadoop.fs.Path;Importorg.apache.hadoop.io.IntWritable;ImportOrg.apache.hadoop.io.Text;Importorg.apache.hadoop.mapred.JobConf;ImportOrg.apache.hadoop.mapreduce.Job;ImportOrg.apache.hadoop.mapreduce.Mapper;ImportOrg.apache.hadoop.mapreduce.Reducer;ImportOrg.apache.hadoop.mapreduce.lib.input.FileInputFormat;ImportOrg.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;ImportOrg.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;ImportOrg.apache.hadoop.mapreduce.lib.output.FileOutputFormat; Public classJobctrltest {//the map function for the first job Public Static classMap_firstextendsMapper<object, text, text, intwritable> { Private Final StaticIntwritable one =NewIntwritable (1); PrivateText Word =NewText (); Public voidmap (Object key, Text value, context context)throwsIOException, interruptedexception {stringtokenizer ITR=NewStringTokenizer (value.tostring ()); while(Itr.hasmoretokens ()) {Word.set (Itr.nexttoken ()); Context.write (Word, one); } } } //the reduce function of the first job Public Static classReduce_firstextendsReducer<text, Intwritable, Text, intwritable> { Privateintwritable result =Newintwritable (); Public voidReduce (Text key, iterable<intwritable>values, context context)throwsIOException, interruptedexception {intsum = 0; for(intwritable value:values) {sum+=Value.get (); } result.set (sum); Context.write (key, result); } } //the map function for the second job Public Static classMap_secondextendsMapper<object, text, text, intwritable> { Private Final StaticIntwritable one =NewIntwritable (1); PrivateText Word =NewText (); Public voidmap (Object key, Text value, context context)throwsIOException, interruptedexception {stringtokenizer ITR=NewStringTokenizer (value.tostring ()); while(Itr.hasmoretokens ()) {Word.set (Itr.nexttoken ()); Context.write (Word, one); } } } //the reduce function of the second job Public Static classReduce_secondextendsReducer<text, Intwritable, Text, intwritable> { Privateintwritable result =Newintwritable (); Public voidReduce (Text key, iterable<intwritable>values, context context)throwsIOException, interruptedexception {intsum = 0; for(intwritable value:values) {sum+=Value.get (); } result.set (sum); Context.write (key, result); } } //Start function Public Static voidMain (string[] args)throwsIOException {jobconf conf=NewJobconf (jobctrltest.class); //configuration of the first jobJob job1 = job.getinstance (conf, "Join1"); Job1.setjarbyclass (jobctrltest.class); Job1.setmapperclass (Map_first.class); Job1.setreducerclass (Reduce_first.class); Job1.setmapoutputkeyclass (Text.class);//The output key of the map phaseJob1.setmapoutputvalueclass (intwritable.class);//The value of the output of the map stageJob1.setoutputkeyclass (Text.class);//The output key of the reduce phaseJob1.setoutputvalueclass (intwritable.class);//The value of the output of the reduce phase//Adding control containersControlledjob CTRLJOB1 =Newcontrolledjob (conf); Ctrljob1.setjob (JOB1); //job1 input and output file pathFileinputformat.addinputpath (JOB1,NewPath (args[0])); Fileoutputformat.setoutputpath (JOB1,NewPath (args[1])); //configuration of the second jobJob job2 = job.getinstance (conf, "Join2"); Job2.setjarbyclass (jobctrltest.class); Job2.setmapperclass (Map_second.class); Job2.setreducerclass (Reduce_second.class); Job2.setmapoutputkeyclass (Text.class);//The output key of the map phaseJob2.setmapoutputvalueclass (intwritable.class);//The value of the output of the map stageJob2.setoutputkeyclass (Text.class);//The output key of the reduce phaseJob2.setoutputvalueclass (intwritable.class);//The value of the output of the reduce phase//Job 2 Adding a control containerControlledjob CTRLJOB2 =Newcontrolledjob (conf); Ctrljob2.setjob (JOB2); //set up direct dependencies for multiple jobs//write as follows://The start of job2, depending on the completion of the JOB1 jobctrljob2.adddependingjob (CTRLJOB1); //The input path is the output path of the previous job, so fill in the args[1], and the above corresponds to a goodFileinputformat.addinputpath (JOB2,NewPath (args[1])); //the output path is passed from one parameter to another, so be aware, because our final output file must not have appeared//so we are here new Path (args[2]) because args[2] is not used above, as long as it is different from the above.Fileoutputformat.setoutputpath (JOB2,NewPath (args[2])); //master control container that controls the total two sub-jobs aboveJobcontrol Jobctrl =NewJobcontrol ("Myctrl"); //added to the total Jobcontrol for controljobctrl.addjob (CTRLJOB1); Jobctrl.addjob (CTRLJOB2); //thread start, remember to have thisThread T =NewThread (Jobctrl); T.start (); while(true) { if(Jobctrl.allfinished ()) {//Print the information for a successful job if the job completes successfullySystem.out.println (Jobctrl.getsuccessfuljoblist ()); Jobctrl.stop (); Break; } } }}
MapReduce uses Jobcontrol to manage instances