hadoop-2.6.0 Pseudo-distribution Run WordCount
1 , start Hadoop:
2 , create the file folder:
This is built on a local hard drive:
To view the file files that were created:
Enter the directory to create two txt files:
The results are as follows:
3 , create an Input folder directory on HDFS input:
Transfer the files created on the local hard drive into input:
View results:
4 , Hadoop comes with a jar package that runs the WordCount example:
5 , start running WordCount:
Process:
To view the results of a run:
Complete operating procedures are included:
Attached code:
Import Java.io.ioexception;import Java.util.iterator;import Java.util.stringtokenizer;import Org.apache.hadoop.fs.path;import Org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.LongWritable; Import Org.apache.hadoop.io.text;import Org.apache.hadoop.mapred.fileinputformat;import Org.apache.hadoop.mapred.fileoutputformat;import Org.apache.hadoop.mapred.jobclient;import Org.apache.hadoop.mapred.jobconf;import Org.apache.hadoop.mapred.mapreducebase;import Org.apache.hadoop.mapred.mapper;import Org.apache.hadoop.mapred.outputcollector;import Org.apache.hadoop.mapred.reducer;import Org.apache.hadoop.mapred.reporter;import Org.apache.hadoop.mapred.textinputformat;import org.apache.hadoop.mapred.textoutputformat;/** * Description: WordCount Explains by Felix * @author Hadoop Dev Group */public class wordcount{/** * Mapreducebase class: Implements the base class for Mapper and Reducer interfaces (its The methods in the method just implement the interface, without doing anything) * Mapper Interface: * Writablecomparable interface: Classes that implement writablecomparable can be compared to each other. All classes that are used as keys should implement this interface. * Reporter can be used to report the entire applicationThe progress of the operation, not used in this example. * */public static class Map extends Mapreducebase implements Mapper<longwritable, text, text, Intwri table> {/** * longwritable, intwritable, Text are classes implemented in Hadoop to encapsulate Java data types that implement Writablecomparable interfaces, * can be serialized to facilitate data exchange in a distributed environment, and you can treat them as alternatives to long,int,string. */private final static intwritable one = new intwritable (1); Private text Word = new text (); /** * Map method in Mapper interface: * Void map (K1 key, V1 value, outputcollector<k2,v2> output, Reporter Reporter) * Mapping a single input k/v pair to an intermediate k/v to an output pair is not required and the input pair is the same type, the input pair can be mapped to 0 or more output pairs. * Outputcollector Interface: Collects <k,v> pairs of mapper and reducer outputs. * Outputcollector interface Collect (k, V) method: Add a (k,v) pair to output */public void map (longwritable key, Text value, Outputcollector<text, intwritable> output, Reporter Reporter) throws IOException { String line = ValuE.tostring (); StringTokenizer tokenizer = new StringTokenizer (line); while (Tokenizer.hasmoretokens ()) {Word.set (Tokenizer.nexttoken ()); Output.collect (Word, one); }}} public static class Reduce extends Mapreducebase implements Reducer<text, Intwritable, Te XT, intwritable> {public void reduce (Text key, iterator<intwritable> values, Outputcoll Ector<text, intwritable> output, Reporter Reporter) throws IOException {int sum = 0; while (Values.hasnext ()) {sum + = Values.next (). get (); } output.collect (Key, New intwritable (sum)); }} public static void Main (string[] args) throws Exception {/** * jobconf:map/reduce job Configuration class, to Hado The OP framework describes the work performed by Map-reduce * Construction methods: jobconf (), jobconf (Class exampleclass), jobconf (Configuration conf), etc. */jobconf conf = new jobconf (wordcount.class); Conf.setjobname ("WordCount"); Set a user-defined job name Conf.setoutputkeyclass (Text.class); Set the key class Conf.setoutputvalueclass (Intwritable.class) for the job's output data; Set the value class Conf.setmapperclass (Map.class) for the job output; Set the Mapper class Conf.setcombinerclass (Reduce.class) for the job; Set the Combiner class Conf.setreducerclass (Reduce.class) for the job; Set the Reduce class Conf.setinputformat (Textinputformat.class) for the job; Set InputFormat Implementation class Conf.setoutputformat (Textoutputformat.class) for map-reduce tasks; Set the OutputFormat implementation class for the Map-reduce task/** * InputFormat describes the input definition of the job in Map-reduce * setinputpaths (): For Map-reduc E job set path array as input list * Setinputpath (): Set path array for map-reduce job as output list */fileinputformat.setinputpaths (conf, New Path (Args[0]); Fileoutputformat.setoutputpath (conf, new Path (Args[1])); Jobclient.runjob (conf); Run a Job}}
Run WordCount with Jar complete.
hadoop-2.6.0 Pseudo-distributed running WordCount