Hadoop-2.6.0 pseudo distribution run WordCount
Hadoop-2.6.0 pseudo distribution run WordCount
1. Start Hadoop:
2. Create a file folder:
This is created on the local hard disk:
View the created file:
Go to the directory and create two txt files:
The result is as follows:
3. Create the input Folder directory input on HDFS:
Upload the files created on the local hard disk to the input file:
View results:
4. The jar package for running the wordcount example in Hadoop:
5. Start running wordcount:
Process:
View the running result:
Complete running process:
Code:
Import java. io. IOException; import java. util. iterator; import java. util. stringTokenizer; import org. apache. hadoop. fs. path; import org. apache. hadoop. io. intWritable; import org. apache. hadoop. io. longWritable; import org. apache. hadoop. io. text; import org. apache. hadoop. mapred. fileInputFormat; import org. apache. hadoop. mapred. fileOutputFormat; import org. apache. hadoop. mapred. jobClient; import org. apache. hadoop. mapr Ed. jobConf; import org. apache. hadoop. mapred. mapReduceBase; import org. apache. hadoop. mapred. mapper; import org. apache. hadoop. mapred. outputCollector; import org. apache. hadoop. mapred. reducer; import org. apache. hadoop. mapred. reporter; import org. apache. hadoop. mapred. textInputFormat; import org. apache. hadoop. mapred. textOutputFormat;/***** Description: WordCount explains by Felix * @ author Hadoop Dev Group */public CIA Ss WordCount {/*** MapReduceBase class: implements the base class of Mapper and Reducer interfaces (the method only implements interfaces without doing anything) * Mapper interface: * WritableComparable interface: classes that implement WritableComparable can be compared with each other. All classes used as keys should implement this interface. * Reporter can be used to report the running Progress of the entire application, which is not used in this example. **/Public static class Map extends MapReduceBase implements Mapper <LongWritable, Text, Text, IntWritable> {/*** LongWritable, IntWritable, text is a class implemented in Hadoop to encapsulate Java data types. These classes implement the WritableComparable interface, and * Can be serialized to facilitate data exchange in a distributed environment, you can regard them as substitutes for long, int, and String respectively. */Private final static IntWritable one = new IntWritable (1); private Text word = new Text ();/*** map Method in ER interface: * void map (K1 key, v1 value, OutputCollector <K2, V2> output, Reporter reporter) * map a single k/v pair to an intermediate k/v pair * the output Pair does not need to be of the same type as the input pair, the input pair can be mapped to 0 or multiple output pairs. * OutputCollector interface: Collects the <k, v> pairs output by Mapper and Reducer. * Collect (k, v) method of the OutputCollector interface: Add a (k, v) pair to output */public void map (LongWritable key, Text value, OutputCollector <Text, intWritable> output, Reporter reporter) throws IOException {String line = value. toString (); StringTokenizer tokenizer = new StringTokenizer (line); while (tokenizer. hasMoreTokens () {word. set (tokenizer. nextToken (); output. collect (word, one) ;}} public static class Reduce extends MapReduceBase implements extends CER <Text, IntWritable, Text, IntWritable> {public void reduce (Text key, iterator <IntWritable> values, OutputCollector <Text, IntWritable> output, Reporter reporter) throws IOException {int sum = 0; while (values. hasNext () {sum + = values. next (). get ();} output. collect (key, new IntWritable (sum) ;}} public static void main (String [] args) throws Exception {/*** JobConf: job configuration class of map/reduce, description of the jobs executed by map-reduce to the hadoop framework * Construction Method: JobConf (), JobConf (Class exampleClass), JobConf (Configuration conf), and so on */JobConf conf = new JobConf (WordCount. class); conf. setJobName ("wordcount"); // set a user-defined job name conf. setOutputKeyClass (Text. class); // set the Key class conf for the output data of the job. setOutputValueClass (IntWritable. class); // set the value class conf for the job output. setMapperClass (Map. class); // set the conf of the ER er class for the job. setCombinerClass (Reduce. class); // set the Combiner class conf for the job. setReducerClass (Reduce. class); // set the Reduce class conf for the job. setInputFormat (TextInputFormat. class); // set the InputFormat implementation class conf for the map-reduce task. setOutputFormat (TextOutputFormat. class); // set the OutputFormat implementation class for the map-reduce task/*** InputFormat to describe the input definition of the job in map-reduce * setInputPaths (): set path array for map-reduce job as input list * setInputPath (): set path array for map-reduce job as output list */FileInputFormat. setInputPaths (conf, new Path (args [0]); FileOutputFormat. setOutputPath (conf, new Path (args [1]); JobClient. runJob (conf); // run a job }}
Use jar to run wordcount.