This example uses the hadoop1.1.2 version
Using meteorological data as processing data
1, multipleoutputs example, the specific explanation in the code has comments
package stationpatitioner;import java.io.ioexception;import java.util.iterator;import org.apache.hadoop.conf.configured;import org.apache.hadoop.fs.path;import org.apache.hadoop.io.longwritable;import org.apache.hadoop.io.nullwritable;import org.apache.hadoop.io.text;import org.apache.hadoop.mapred.fileinputformat;import org.apache.hadoop.mapred.fileoutputformat;import org.apache.hadoop.mapred.jobclient;import org.apache.hadoop.mapred.jobconf;import org.apache.hadoop.mapred.mapreducebase;import org.apache.hadoop.mapred.mapper;import org.apache.hadoop.mapred.outputcollector;import org.apache.hadoop.mapred.reducer;import org.apache.hadoop.mapred.reporter;import Org.apache.hadoop.mapred.textoutputformat;import org.apache.hadoop.mapred.lib.multipleoutputs;import org.apache.hadoop.mapred.lib.NullOutputFormat;import org.apache.hadoop.util.Tool;import Org.apache.hadoop.util.ToolRunner;/** * hadoop version 1.1.2 * multipleoutputs example * @author Chocolate Black * */public class PatitionByStationUsingMultipleOutputs extends Configured implements tool {enum counter {lineskip,//Line}static class stationmapper of error extends mapreducebase implements mapper<longwritable , text, text , text>{private ncdcrecordparser parser = new ncdcrecordparser (); @ Overridepublic void map (longwritable key, text value,outputcollector<text, Text> output, reporter reporter) Throws ioexception {try {parser.parse (value) ; Output.collect (New text (Parser.getstationid ()), value);} catch (exception e) {reporter.getcounter (Counter.lineskip). Increment (1);//Error Order counter +1}}} static class multipleoutputreducer extends mapreducebase implements reducer<text, text, nullwritable, text>{private multipleoutputs multipleOutputs; @Overridepublic void configure (jobconf jobconf) {multipleoutputs = new multipleoutputs (jobconf);//Initialize a multipleoutputs} @Overridepublic void reduce ( text key, iterator<text> values,outputcollector<nullwritable, text> Output, reporter reporter) throws ioexception {//Get outputcollectoroutputcollector Collector = multipleoutputs.getcollector ("Station", key.tostring (). Replace ("-", ""), Reporter); while (Values.hasnext ()) {Collector.collect (Nullwritable.get (), values.next ());// Multipleoutputs output data with Outputcollector}} @Overridepublic void close () throws IOException {Multipleoutputs.close ();}} @Overridepublic int run (String[] as) throws exception {system.setproperty (" Hadoop_user_name ", " Root ");//windows users and Linux users do not always use this method to avoid reporting permission related errors jobconf conf = new jobconf (); Conf.setmapperclass (Stationmapper.class); Conf.setreducerclass (Multipleoutputreducer.class); Conf.setmapoutputkeyclass (Text.class); Conf.setoutputkeyclass (Nullwritable.class); Conf.setOutputFormat ( Nulloutputformat.class); fileinputformat.setinputpaths (Conf, new Path ("hdfs:/ /ubuntu:9000/sample1.txt ")//input path fileoutputformat.setoutputpath (conf, new path ("Hdfs://ubuntu:9000/temperature"));//output path Multipleoutputs.addmultinamedoutput (conf, " Station ", textoutputformat.class, nullwritable.class, text.class); Jobclient.runjob (conf); return 0;} Public static void main (String[] args) throws exception{int exitcode = toolrunner.run (New patitionbystationusingmultipleoutputs (), args); System.exit (ExitCode);}}
2, the analysis of meteorological data classes
Package stationpatitioner;import org.apache.hadoop.io.text;public class ncdcrecordparser {private static final int missing_temperature = 9999;private string year;private int airTemperature;private String quality;private String Stationid;public void parse (String record) {stationid = record.substring (0, &NBSP;5); year = record.substring (15, 19); string airtemperaturestring;// remove leading plus sign as parseint Doesn ' t like themif (Record.charat () == ' + ') {airtemperaturestring = record.substring (88, 92);} else {airtemperaturestring = record.substring (87, 92);} Airtemperature = integer.parseint (airtemperaturestring); Quality = record.substring (92, 93);} Public string getstationid () {Return stationid;} public&Nbsp;void parse (Text record) {parse (record.tostring ());} Public boolean isvalidtemperature () {return airtemperature != missing_ Temperature && quality.matches ("[01459]");} Public string getyear () {return year;} Public int getairtemperature () {return airtemperature;}}
Multipleoutputs Practices in Hadoop