1 Importjava.io.IOException;2 Importorg.apache.hadoop.conf.Configuration;3 ImportOrg.apache.hadoop.io.*;4 ImportOrg.apache.hadoop.mapreduce.Job;5 ImportOrg.apache.hadoop.mapreduce.Mapper;6 ImportOrg.apache.hadoop.mapreduce.Reducer;7 ImportOrg.apache.hadoop.mapreduce.lib.input.FileInputFormat;8 ImportOrg.apache.hadoop.mapreduce.lib.input.FileSplit;9 ImportOrg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;Ten ImportOrg.apache.hadoop.fs.Path; One Public classMatrix { A Public Static intRowm=0; - Public Static intColumnm=0; - Public Static intColumnn=0; the Public Static classMymapperextendsMapper<object, text, text, text>{ - PrivateText map_key=NewText (); - PrivateText map_value=NewText (); - Public voidSetup (Context context) { +Configuration conf=context.getconfiguration (); -Columnn=integer.parseint (Conf.get ("ColumnN")); +Rowm=integer.parseint (Conf.get ("ROWM")); A } at Public voidMap (Object key,text value,context Context)throwsIOException, interruptedexception{ -Filesplit filesplit=(Filesplit) context.getinputsplit (); -String filename=Filesplit.getpath (). GetName (); -SYSTEM.OUT.PRINTLN ("Map's data shard length is:" +filesplit.getlength ()); -System.out.println ("The starting position of the data shard is:" +Filesplit.getstart ()); -String[] Templocation=filesplit.getlocations (); in for(String string:templocation) { -System.out.println ("The host on which the data shard resides is:" +string); to } + if(Filename.contains ("M")){ -String[] Tuple=value.tostring (). Split (","); the intI=integer.parseint (tuple[0]); *String[] Tuples=tuple[1].split ("\ t"); $ intJ=integer.parseint (tuples[0]);Panax Notoginseng intMij=integer.parseint (tuples[1]); - for(intK = 1; K <columnn+1; k++) { theMap_key.set (i+ "," +k); +Map_value.set ("M" + "," +j+ "," +mij); A Context.write (Map_key, Map_value); the } + } - Else if(Filename.contains ("N")){ $String[] Tuple=value.tostring (). Split (","); $ intJ=integer.parseint (tuple[0]); -String[] Tuples=tuple[1].split ("\ t"); - intK=integer.parseint (tuples[0]); the intNjk=integer.parseint (tuples[1]); - for(inti = 1; I <rowm+1; i++) {WuyiMap_key.set (i+ "," +k); theMap_value.set ("N" + "," +j+ "," +NJK); - Context.write (Map_key, Map_value); Wu } - } About } $ } - Public Static classMyreducerextendsReducer<text, text, text, text>{ - Private intSum=0; - Public voidSetup (Context context)throwsioexception{ AConfiguration conf=context.getconfiguration (); +Columnm=integer.parseint (Conf.get ("COLUMNM")); the } - Public voidReduce (Text key,iterable<text> value,context Context)throwsioexception,interruptedexception{ $ int[] m=New int[Columnm+1]; the int[] n=New int[Columnm+1]; theSystem.out.println (key.tostring () + "corresponding value list all values are:"); the for(Text val:value) { the System.out.println (val.tostring ()); -String[] Tuple=val.tostring (). Split (","); in if(Tuple[0].equals ("M")){ theM[integer.parseint (Tuple[1])]=integer.parseint (tuple[2]); the}Else { AboutN[integer.parseint (Tuple[1])]=integer.parseint (tuple[2]); the } the } the for(intj=1;j<columnm+1;++j) { +sum+=m[j]*N[j]; - } theContext.write (Key,NewText (integer.tostring (sum)));BayiSum=0; the } the } - Public Static voidMain (string[] args)throwsException { - if(args.length!=3){ theSystem.err.println ("usage:matrixmultiply <inputPathM> <inputPathN> <outputPath>"); theSystem.exit (2); the } the Else{ -System.out.println ("M File path:" +args[0]); theString[] Infotuplem=args[0].split ("_"); theRowm=integer.parseint (infotuplem[1]); theColumnm=integer.parseint (infotuplem[2]);94String[] Infotuplen=args[1].split ("_"); theColumnn=integer.parseint (infotuplen[2]); the } theConfiguration conf=NewConfiguration ();98Conf.set ("COLUMNM", Integer.tostring (COLUMNM)); AboutConf.set ("Rowm", Integer.tostring (ROWM)); -Conf.set ("ColumnN", Integer.tostring (COLUMNN));101Job job=NewJob (conf, "Matrix");102Job.setjarbyclass (Matrix.class);103Job.setmapperclass (Mymapper.class);104Job.setreducerclass (Myreducer.class); theJob.setoutputkeyclass (Text.class);106Job.setoutputvalueclass (Text.class);107Fileinputformat.setinputpaths (Job,NewPath (Args[0]),NewPath (args[1]));108Fileoutputformat.setoutputpath (Job,NewPath (args[2]));109System.exit (Job.waitforcompletion (true)? 0:1); the }111}
The above is the source of the m*n result of the J calculation matrix, the total need three input parameters, namely: The Path of M matrix, the path of n matrix and the input path of the result. Where M is stored in the file, the format of the file is "M_rows_columns", the experimental matrix file is m_300_500. The file of n in the experiment is n_500_700. and the formats in the M and N files are the same, all in the form of "I,j\tmij", where I represents the number of rows of the matrix in which the element is located, J represents the number of columns of the matrix in which the element is located, and mij represents the matrix element. As shown (also in the form of n_500_700):
The matrix is a random number, and the matrix file is generated by the following shell script:
#!/bin/Bash forIinch`seq 1$1` Do forJinch`seq 1$2` Dos=$ ($RANDOM% -)) Echo-E"$i, $j \t$s">>m_$1_$2 Done Done
2) The output form of map:
Suppose M is a matrix of i*j, and N is the matrix of j*k.
For M-matrices: The output form of map is (<i,k>,< "M",j,mij>), where <i,k> is key,< "M",j,mij> is value.
M indicates that this key-value pair is the content of the M-matrix
Mij is an element in the M-matrix
I and J are the positions of this element in the matrix.
K is the number of columns of matrix n
Mr Implementation--matrix multiplication