Task Requirements:
Input file format
18661629496 110
13107702446 110
1234567 120
2345678 120
987654 110
2897839274 18661629496
Output file Format format
18661629496|13107702446|987654|18661629496|13107702446|987654|
1234567|2345678|1234567|2345678|
186616294962897839274|2897839274|
The MapReduce program is written:
import java.io.ioexception;import java.util.stringtokenizer;import org.apache.hadoop.conf.configuration;import org.apache.hadoop.fs.path;import org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.longwritable;import org.apache.hadoop.io.text;import org.apache.hadoop.mapreduce.job;import org.apache.hadoop.mapreduce.mapper;import org.apache.hadoop.mapreduce.reducer;import org.apache.hadoop.mapreduce.lib.input.fileinputformat;import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;public class test2 {enum counter{ lineskip,//record the line with the error} public static class map extends mapper< longwritable, text, text, text>{ public Void map (Longwritable key, text value, context context) throws IOException, InterruptedException { string line = value.tostring ();//Read source data try { //Data processing string [] linesplit = line.split (" ");//18661629496,110 String anum = lineSplit[0]; String bnum = lineSplit[1]; //output Format:110,18661629496 &nbsP; context.write (New Text (Bnum), new text (anum)); } catch (arrayindexoutofboundsexception e) { context.getcounter (Counter.lineskip). Increment (1);//Counter +1 On Error return; } } } public static class reduce extends reducer<text, Text, text, texT> { public void reduce (Text key, Iterable<text> values, context context) throws ioexception, interruptedexception { String valueString; string out= ""; for (text value:values) { Valuestring=value.tostring (); out+= valuestring+ "|"; } &nbsP; context.write (Key, new text (out)); } } public static void main (String[] args) throws exception { configuration conf = new configuration (); if (args.length &NBSP;!=&NBSP;2) { SYSTEM.ERR.PRINTLN ("Configure input and Output path "); System.exit (2); } //various configuration job job = new job (conf, " telephone ");//Job name Configuration //class configuration job.setjarbyclass (TesT2.class); job.setmapperclass (Map.class); job.setreducerclass (Reduce.class); //map output Format configuration job.setmapoutputkeyclass (Text.class); job.setmapoutputvalueclass (Text.class); //Job output format configuration job.setoutputkeyclass ( Text.class); job.setoutputvalueclass (Text.class); //add input/output path Fileinputformat.addinputpath (Job, new path (args[0)); fileoutputformat.setoutputpath (Job, new path (args[1)); //Exit when task is completed system.exit (Job.waitforcompletion (true) ? 0 : 1); }}
To package a mapreduce program as a jar file:
1. Right-click the project name->export->java->jar file
650) this.width=650; "src=" http://s3.51cto.com/wyfs02/M00/6C/DB/wKioL1VUYYzjA-ylAAF_0LIsLr0525.jpg "title=" 2.PNG " Width= "height=" 291 "border=" 0 "hspace=" 0 "vspace=" 0 "style=" width:500px;height:291px; "alt=" Wkiol1vuyyzja-ylaaf _0lislr0525.jpg "/>
2. Configure the jar file storage location
650) this.width=650; "src=" http://s3.51cto.com/wyfs02/M02/6C/DB/wKioL1VUYamQxFdoAAGM0hfINFo653.jpg "title=" 3.PNG " alt= "Wkiol1vuyamqxfdoaagm0hfinfo653.jpg"/>
3. Select Main CALSS
650) this.width=650; "src=" http://s3.51cto.com/wyfs02/M02/6C/E0/wKiom1VUYG6BLj34AAG8CzMeGWU381.jpg "title=" 4.PNG " alt= "Wkiom1vuyg6blj34aag8czmegwu381.jpg"/>
4. Run the jar file
[Email protected] hadoop-0.20.2]$ Bin/hadoop jar/home/liuqingjie/test2.jar/user/liuqingjie/in/user/liuqingjie/out
15/05/14 01:46:47 WARN mapred. Jobclient:use Genericoptionsparser for parsing the arguments. Applications should implement Tool for the same.
15/05/14 01:46:47 INFO input. Fileinputformat:total input paths to Process:2
15/05/14 01:46:48 INFO mapred. Jobclient:running job:job_201505132004_0005
15/05/14 01:46:49 INFO mapred. Jobclient:map 0% Reduce 0%
15/05/14 01:46:57 INFO mapred. Jobclient:map 100% Reduce 0%
15/05/14 01:47:09 INFO mapred. Jobclient:map 100% Reduce 100%
................................................................................................
View Results
[Email protected] hadoop-0.20.2]$ bin/hadoop dfs-cat./out/*
Cat:source must be a file.
18661629496|13107702446|987654|18661629496|13107702446|987654|
1234567|2345678|1234567|2345678|
186616294962897839274|2897839274|
MapReduce Programming Combat 2--Inverted Index (JAR package)