1. Sample Data
011990-99999 sihccajavri012650-99999 Tynset-hansmoen
012650-99999 194903241200 111012650-99999 194903241800 78011990-99999 195005150700 0011990-99999 195005151200 22011990-99999 195005151800 -11
2. Requirements
3. Ideas, Code
Will
Small Enough(That is, the Meteorological observatory information) is added to the distributed cache and then read on each Mapper side is cached to the
LocalOf
Full VolumeMeteorological Observatory information, and then associated with the weather information.
Import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.longwritable;import Org.apache.hadoop.io.text;import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.mapper;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import Org.apache.hadoop.util.GenericOptionsParser; Import Java.io.bufferedreader;import java.io.filereader;import Java.io.ioexception;import Java.util.HashMap;import Java.util.map;public class Mapjoin {static Class Recordmapper extends Mapper<longwritable, text, text, text> { Private map<string, string> stationmap = new hashmap<string, string> (); @Override protected void Setup (context context) throws IOException, Interruptedexception {//preprocessing, putting the file to be associated Load into cache path[] paths = Context.getlocalcachefiles (); The new API for retrieving cache files is Context.getcachefiles (), while Context.getlocalcachEfiles () is deprecated//however context.getcachefiles () returns the HDFS path; Context.getlocalcachefiles () returns the local path//here only Cache a file, so take the first one to BufferedReader reader = new BufferedReader (New FileReader (Paths[0].tostring ())); String line = null; try {while (line = Reader.readline ()) = null) {string[] Vals = Line.split ("\\t"); if (vals.length = = 2) {stationmap.put (vals[0], vals[1]); }}} catch (Exception e) {e.printstacktrace (); } finally {reader.close (); } super.setup (context); } @Override protected void map (longwritable key, Text value, Context context) throws IOException, Interrupte dexception {string[] Vals = value.tostring (). Split ("\\t"); if (vals.length = = 3) {String stationname = Stationmap.get (Vals[0]);//Join StationName = StationName = = null? "": stationname; Context.write (new text (Vals[0]), new text (StationName + "\ T" + vals[1] + "\ T" + vals[2]); }}} public static void Main (string[] args) throws Exception {Configuration conf = new Configura tion (); string[] Otherargs = new Genericoptionsparser (conf, args). Getremainingargs (); if (otherargs.length! = 3) {System.err.println ("Parameter number is wrong, please enter three Parameters:<n CDC input> <station input> <output> "); System.exit (-1); } Path InputPath = new Path (otherargs[0]); Path Stationpath = new Path (otherargs[1]); Path OutputPath = new Path (otherargs[2]); Job Job = job.getinstance (conf, "Mapjoin"); Job.setjarbyclass (Mapjoin.class); Fileinputformat.addinputpath (Job, InputPath); Fileoutputformat.setoutputpath (Job, OutputPath);Job.addcachefile (Stationpath.touri ()); Add a cache file to add multiple Job.setmapperclass (Recordmapper.class); Job.setmapoutputkeyclass (Text.class); System.exit (Job.waitforcompletion (true)? 0:1); }}
4. Running Results
MapReduce Map Join Sample