MapReduce implements matrix multiplication-implementation code
Previously I wrote an article on how MapReduce implements the Matrix Multiplication Algorithm: Mapreduce implements the algorithm idea of matrix multiplication.
To give you a more intuitive understanding of program execution, we have compiled the implementation code for your reference.
Programming Environment:
- Java version "1.7.0 _ 40"
- Eclipse Kepler
- Windows 7 x64
- Ubuntu 12.04 LTS
- Hadoop2.2.0
- Vmware 9.0.0 build-812388
Input data:
A matrix storage address: hdfs: // singlehadoop: 8020/wordspace/dataguru/hadoopdev/week09/matrixmultiply/matrixA/matrixa
A matrix content:
3 4 6
4 0 8
B Matrix storage address: hdfs: // singlehadoop: 8020/wordspace/dataguru/hadoopdev/week09/matrixmultiply/matrixB/matrixb
B Matrix content:
2 3
3 0
4 1
Implementation Code:
There are three classes in total:
- Driver Class MMDriver
- Map class MMMapper
- Reduce class MMReducer
You can combine them into a class based on your habits.
MMDriver. java
Package upload Uru. matrixmultiply;
Import org. apache. hadoop. conf. Configuration;
Import org. apache. hadoop. fs. FileSystem;
Import org. apache. hadoop. fs. Path;
Import org. apache. hadoop. io. Text;
Import org. apache. hadoop. mapreduce. Job;
Import org. apache. hadoop. mapreduce. lib. input. FileInputFormat;
Import org. apache. hadoop. mapreduce. lib. output. FileOutputFormat;
Public class MMDriver {
Public static void main (String [] args) throws Exception {
// Set configuration
Configuration conf = new Configuration ();
// Create job
Job job = new Job (conf, "MatrixMultiply ");
Job. setJarByClass (dataguru. matrixmultiply. MMDriver. class );
// Specify Mapper & Reducer
Job. setMapperClass (dataguru. matrixmultiply. MMMapper. class );
Job. setReducerClass (dataguru. matrixmultiply. MMReducer. class );
// Specify output types of mapper and CER
Job. setOutputKeyClass (Text. class );
Job. setOutputValueClass (Text. class );
Job. setMapOutputKeyClass (Text. class );
Job. setMapOutputValueClass (Text. class );
// Specify input and output DIRECTORIES
Path inPathA = new Path ("hdfs: // singlehadoop: 8020/wordspace/dataguru/hadoopdev/week09/matrixmultiply/matrixA ");
Path inPathB = new Path ("hdfs: // singlehadoop: 8020/wordspace/dataguru/hadoopdev/week09/matrixmultiply/matrixB ");
Path outPath = new Path ("hdfs: // singlehadoop: 8020/wordspace/dataguru/hadoopdev/week09/matrixmultiply/matrixC ");
FileInputFormat. addInputPath (job, inPathA );
FileInputFormat. addInputPath (job, inPathB );
FileOutputFormat. setOutputPath (job, outPath );
// Delete output directory
Try {
FileSystem hdfs = outPath. getFileSystem (conf );
If (hdfs. exists (outPath ))
Hdfs. delete (outPath );
Hdfs. close ();
} Catch (Exception e ){
E. printStackTrace ();
Return;
}
// Run the job
System. exit (job. waitForCompletion (true )? 0: 1 );
}
}
MMMapper. java
Package upload Uru. matrixmultiply;
Import java. io. IOException;
Import java. util. StringTokenizer;
Import org. apache. hadoop. io. IntWritable;
Import org. apache. hadoop. io. Text;
Import org. apache. hadoop. mapreduce. Mapper;
Import org. apache. hadoop. mapreduce. lib. input. FileSplit;
Public class MMMapper extends Mapper <Object, Text> {
Private String tag; // current matrix
Private int crow = 2; // number of rows in matrix
Private int ccol = 2; // Number of columns in matrix B
Private static int arow = 0; // current arow
Private static int brow = 0; // current brow
@ Override
Protected void setup (Context context) throws IOException,
InterruptedException {
// TODO get inputpath of input data, set to tag
FileSplit fs = (FileSplit) context. getInputSplit ();
Tag = fs. getPath (). getParent (). getName ();
}
/**
* Input data include two matrix files
*/
Public void map (Object key, Text value, Context context)
Throws IOException, InterruptedException {
StringTokenizer str = new StringTokenizer (value. toString ());
If ("matrixA". equals (tag) {// left matrix, output key: x, y
Int col = 0;
While (str. hasMoreTokens ()){
String item = str. nextToken (); // current x, y = line, col
For (int I = 0; I <ccol; I ++ ){
Text outkey = new Text (arow + "," + I );
Text outvalue = new Text ("a," + col + "," + item );
Context. write (outkey, outvalue );
System. out. println (outkey + "|" + outvalue );
}
Col ++;
}
Arow ++;
} Else if ("matrixB". equals (tag )){
Int col = 0;
While (str. hasMoreTokens ()){
String item = str. nextToken (); // current x, y = line, col
For (int I = 0; I <crow; I ++ ){
Text outkey = new Text (I + "," + col );
Text outvalue = new Text ("B," + brow + "," + item );
Context. write (outkey, outvalue );
System. out. println (outkey + "|" + outvalue );
}
Col ++;
}
Brow ++;
}
}
}
MMReducer. java
Package upload Uru. matrixmultiply;
Import java. io. IOException;
Import java. util. HashMap;
Import java. util. Iterator;
Import java. util. Map;
Import java. util. StringTokenizer;
Import org. apache. hadoop. io. IntWritable;
Import org. apache. hadoop. io. Text;
Import org. apache. hadoop. mapreduce. Cer CER;
Import org. apache. hadoop. mapreduce. Cer. Context;
Public class MMReducer extends Reducer <Text, Text> {
Public void reduce (Text key, Iterable <Text> values, Context context)
Throws IOException, InterruptedException {
Map <String, String> matrixa = new HashMap <String, String> ();
Map <String, String> matrixb = new HashMap <String, String> ();
For (Text val: values) {// values example: B, or
StringTokenizer str = new StringTokenizer (val. toString (),",");
String sourceMatrix = str. nextToken ();
If ("a". equals (sourceMatrix )){
Matrixa. put (str. nextToken (), str. nextToken (); // (0, 4)
}
If ("B". equals (sourceMatrix )){
Matrixb. put (str. nextToken (), str. nextToken (); // (0, 2)
}
}
Int result = 0;
Iterator <String> iter = matrixa. keySet (). iterator ();
While (iter. hasNext ()){
String mapkey = iter. next ();
Result + = Integer. parseInt (matrixa. get (mapkey) * Integer. parseInt (matrixb. get (mapkey ));
}
Context. write (key, new Text (String. valueOf (result )));
}
}
Build a Hadoop environment on Ubuntu 13.04
Cluster configuration for Ubuntu 12.10 + Hadoop 1.2.1
Build a Hadoop environment on Ubuntu (standalone mode + pseudo Distribution Mode)
Configuration of Hadoop environment in Ubuntu
Detailed tutorial on creating a Hadoop environment for standalone Edition
Build a Hadoop environment (using virtual machines to build two Ubuntu systems in a Winodws environment)