I don't know why I don't really want to learn about mapreduce, but now I think this may take some time to study. Here I will record the wordcount code of the next mapreduce instance.
1,
Pom. xml:
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.8.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.8.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.8.3</version>
</dependency>
</dependencies>
2、
WordCountMapper:
Import org. Apache. hadoop. Io. intwritable;
Import org. Apache. hadoop. Io. longwritable;
Import org. Apache. hadoop. Io. text;
Import org. Apache. hadoop. mapreduce. mapper;
Import java. Io. ioexception;
Public class wordcountmapper extends mapper <longwritable, text, text, intwritable> {
Text K = new text ();
Intwritable v = new intwritable (1 );
@ Override
Protected void map (longwritable key, text value, context) throws ioexception, interruptedexception {
// Obtain a line of text
String line = value. tostring ();
// Delimiter-based splitting
String [] words = line. Split ("");
// Output
For (string word: words ){
K. Set (Word );
Context. Write (K, V );
}
}
}
3,
Wordcountreducer:
Import org. Apache. hadoop. Io. intwritable;
Import org. Apache. hadoop. Io. text;
Import org. Apache. hadoop. mapreduce. Cer CER;
Import java. Io. ioexception;
Public class wordcountreducer extends CER <text, intwritable, text, intwritable> {
Int sum;
Intwritable v = new intwritable ();
@ Override
Protected void reduce (Text key, iterable <intwritable> values, context) throws ioexception, interruptedexception {
// Sum
Sum = 0;
For (intwritable count: values ){
Sum + = count. Get ();
}
// Output
V. Set (SUM );
Context. Write (Key, V );
}
}
4,
Wordcountdriver:
Import org. Apache. hadoop. conf. configuration;
Import org. Apache. hadoop. fs. path;
Import org. Apache. hadoop. Io. intwritable;
Import org. Apache. hadoop. Io. text;
Import org. Apache. hadoop. mapreduce. job;
Import org. Apache. hadoop. mapreduce. Lib. Input. fileinputformat;
Import org. Apache. hadoop. mapreduce. Lib. Output. fileoutputformat;
Import java. Io. ioexception;
Public class wordcountdriver {
Public static void main (string [] ARGs) throws ioexception, classnotfoundexception, interruptedexception {
// Obtain configuration information and encapsulate tasks
Configuration configuration = new configuration ();
Job job = job. getinstance (configuration );
// Set the jar loading path
Job. setjarbyclass (wordcountdriver. Class );
// Set map and reduce classes
Job. setmapperclass (wordcountmapper. Class );
Job. setreducerclass (wordcountreducer. Class );
// Set map output
Job. setmapoutputkeyclass (text. Class );
Job. setoutputvalueclass (intwritable. Class );
// Set reduce output
Job. setoutputkeyclass (text. Class );
Job. setoutputvalueclass (intwritable. Class );
// Set the input and output paths
Fileinputformat. setinputpaths (job, new path (ARGs [0]);
Fileoutputformat. setoutputpath (job, new path (ARGs [1]);
// Submit
Boolean result = job. waitforcompletion (true );
System. Exit (result? 0: 1 );
}
}
5. parameter settings:
6. query results:
Mapreduce simple example: wordcount-the fifth record of the big data documentary