Java Programming MapReduce Implementation WordCount
1. Writing mapper
Package Net.toocruel.yarn.mapreduce.wordcount;import Org.apache.hadoop.io.intwritable;import Org.apache.hadoop.io.text;import Org.apache.hadoop.mapreduce.mapper;import Java.io.ioexception;import java.util.stringtokenizer;/** * @author: Song Tong * @version: 1.0 * @createTime: 2017/4/12 14:15 * @description: */public C Lass Wordcountmapper extends mapper<object,text,text,intwritable>{//For each word given the occurrence of the frequency 1, because the word is one to take out, so each quantity is 1 PR Ivate final static intwritable one = new intwritable (1); Store the extracted line word private text word = new text (); @Override protected void Map (Object key, Text value, Context context) throws IOException, Interruptedexception { StringTokenizer the input word stringtokenizer ITR = new StringTokenizer (value.tostring ()); while (Itr.hasmoretokens ()) {Word.set (Itr.nexttoken ()); Context.write (Word, one); }}}123456789101112131415161718192021222324252627282930313233
2. Writing Reducer
package net.toocruel.yarn.mapreduce.wordcount;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;/** * @author : 宋同煜 * @version : 1.0 * @createTime : 2017/4/12 14:16 * @description : */public class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>{ //存取对应单词总频数 private IntWritable result = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //计算频数 int sum = 0; for(IntWritable value:values){ sum+=value.get(); } result.set(sum); //写入输出 context.write(key, result); }}12345678910111213141516171819202122232425262728293031
3. Writing the Job submitter
Package Net.toocruel.yarn.mapreduce.wordcount;import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.fs.filesystem;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.intwritable;import Org.apache.hadoop.io.text;import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;/** * WordCount Submitter Package executes Hadoop jar on any machine in Hadoop cluster Xxx.jar Net.toocruel.yarn.mapreduce.wordcount wordcount * @author: Song Tong * @version: 1.0 * @createTime: 2017/4/12 14:15 * @descri Ption: */public class WordCount {public static void main (string[] args) throws Exception {//Initialize configuration Confi Guration conf = new Configuration (); System.setproperty ("Hadoop_user_name", "HDFs"); Create a job Submitter object Job Job = job.getinstance (conf); Job.setjobname ("WordCount"); Job.setjarbyclass (Wordcount.class); Set Map,reduce processing Job.setmapperclass (wordcountmapper.cLASS); Job.setreducerclass (Wordcountreducer.class); Set output format processing class Job.setoutputkeyclass (Text.class); Job.setoutputvalueclass (Intwritable.class); Set the input and output path Filesystem.get (new Configuration ()). Delete (New path ("/sty/wordcount/output")); First empty the Output directory Fileinputformat.addinputpath (Job, New Path ("Hdfs://cloudera:8020/sty/wordcount/input")); Fileoutputformat.setoutputpath (Job, New Path ("Hdfs://cloudera:8020/sty/wordcount/output")); Boolean res = Job.waitforcompletion (true); SYSTEM.OUT.PRINTLN ("Task Name:" +job.getjobname ()); SYSTEM.OUT.PRINTLN ("Task succeeded:" + (res? ") Yes ":" No ")); System.exit (res?0:1); }}123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
4. Packaging
I use MAVEN to package, or Eclipse's direct export jar package or idea's build artifacts
Hadoopsimple-1.0.jar
5. Running
Run on Yarn's ResourceManager or NodeManager node machine
hadoop jar hadoopSimple-1.0.jar net.toocruel.yarn.mapreduce.wordcount.WordCount
6. Running Results
[[email protected] ~]# Hadoop jar Hadoopsimple-1.0.jar NET.TOOCRUEL.YARN.MAPREDUCE.WORDCOUNT.WORDCOUNT17/04/13 12:57:13 INFO Client. Rmproxy:connecting to ResourceManager at CLOUDERA/192.168.254.203:803217/04/13 12:57:14 WARN MapReduce. Jobresourceuploader:hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with Toolrunner to remedy THIS.17/04/13 12:57:18 INFO input. Fileinputformat:total input paths to PROCESS:117/04/13 12:57:18 INFO MapReduce. Jobsubmitter:number of SPLITS:117/04/13 12:57:18 INFO MapReduce. Jobsubmitter:submitting tokens for JOB:JOB_1491999347093_001217/04/13 12:57:19 INFO impl. yarnclientimpl:submitted application APPLICATION_1491999347093_001217/04/13 12:57:19 INFO mapreduce. Job:the URL to track the JOB:HTTP://CLOUDERA:8088/PROXY/APPLICATION_1491999347093_0012/17/04/13 12:57:19 INFO Mapreduce. Job:running JOB:JOB_1491999347093_001217/04/13 12:57:32 INFO mapreduce. Job:job job_1491999347093_0012 RunNing in Uber MODE:FALSE17/04/13 12:57:32 INFO MapReduce. Job:map 0% reduce 0%17/04/13 12:57:39 INFO mapreduce. Job:map 100% reduce 0%17/04/13 12:57:47 INFO mapreduce. Job:map 100% reduce 33%17/04/13 12:57:49 INFO mapreduce. Job:map 100% reduce 67%17/04/13 12:57:53 INFO mapreduce. Job:map 100% reduce 100%17/04/13 12:57:54 INFO mapreduce. Job:job job_1491999347093_0012 completed SUCCESSFULLY17/04/13 12:57:54 INFO mapreduce. Job:counters:49file System Countersfile:number of bytes Read=162file:number of bytes Written=497579file:number of Rea D Operations=0file:number of large read operations=0file:number of write operations=0hdfs:number of bytes Read=233hdfs: Number of bytes Written=62hdfs:number of read Operations=12hdfs:number of large read Operations=0hdfs:number of write Operations=6job counterslaunched map tasks=1launched reduce tasks=3data-local map tasks=1total time spent by all maps in O Ccupied Slots (ms) =5167total time spent by all reduces in occupied slots (ms) =18520Total time spent by all maps tasks (MS) =5167total time spent by all reduce tasks (ms) =18520total Vcore-seconds taken by All maps Tasks=5167total Vcore-seconds taken by all reduce tasks=18520total megabyte-seconds taken by all map tasks=5291008 Total Megabyte-seconds taken by all reduce tasks=18964480map-reduce frameworkmap input Records=19map output Records=18map Output Bytes=193map output materialized bytes=150input split bytes=111combine input records=0combine output records=0red UCE input Groups=7reduce Shuffle bytes=150reduce input records=18reduce output records=7spilled records=36shuffled Maps = 3Failed shuffles=0merged Map OUTPUTS=3GC time Elapsed (ms) =320CPU time spent (ms) =4280physical memory (bytes) snapshot=805 298176Virtual memory (bytes) Snapshot=11053834240total committed heap usage (bytes) =529731584shuffle errorsbad_id= 0connection=0io_error=0wrong_length=0wrong_map=0wrong_reduce=0file Input Format countersbytes Read=122File Output Format countersbytes written=62 Task Name: WordcoUNT Mission Success: Yes
Java programming MapReduce implementation WordCount