Import Java. io. ioexception; import Org. apache. hadoop. conf. configuration; import Org. apache. hadoop. FS. path; import Org. apache. hadoop. io. text; import Org. apache. hadoop. mapreduce. job; import Org. apache. hadoop. mapreduce. mapper; import Org. apache. hadoop. mapreduce. reducer; import Org. apache. hadoop. mapreduce. lib. input. fileinputformat; import Org. apache. hadoop. mapreduce. lib. input. filesplit; import Org. apache. hadoop. Mapreduce. lib. output. fileoutputformat; public class matrixmultiply {/** three necessary variables required by mapper and reducer are defined by Conf. get the get () method **/public static int rowm = 0; public static int columnm = 0; public static int columnn = 0; public static class matrixmapper extends mapper <object, text, text, text> {private text map_key = new text (); private text map_value = new text ();/*** run the map () function first by Conf. get () to get the necessary variables provided in the main function, This is also a way to share variables in mapreduce */Public void setup (context) throws ioexception {configuration conf = context. getconfiguration (); columnn = integer. parseint (Conf. get ("columnn"); rowm = integer. parseint (Conf. get ("rowm");} public void map (Object key, text value, context) throws ioexception, interruptedexception {/** get the input file name, to distinguish the input matrix m from N **/filesplit = (filesplit) context. geti Nputsplit (); string filename = filesplit. getpath (). getname (); If (filename. contains ("M") {string [] tuple = value. tostring (). split (","); int I = integer. parseint (tuple [0]); string [] tuples = tuple [1]. split ("\ t"); Int J = integer. parseint (tuples [0]); int mij = integer. parseint (tuples [1]); For (int K = 1; k <columnn + 1; k ++) {map_key.set (I + "," + k ); map_value.set ("M" + "," + J + "," + mij); Co Ntext. write (map_key, map_value) ;}} else if (filename. contains ("N") {string [] tuple = value. tostring (). split (","); Int J = integer. parseint (tuple [0]); string [] tuples = tuple [1]. split ("\ t"); int K = integer. parseint (tuples [0]); int njk = integer. parseint (tuples [1]); For (INT I = 1; I <rowm + 1; I ++) {map_key.set (I + "," + k ); map_value.set ("N" + "," + J + "," + njk); context. write (map_key, Ma P_value) ;}}} public static class matrixreducer extends CER <text, text> {private int sum = 0; Public void setup (context) throws ioexception {configuration conf = context. getconfiguration (); columnm = integer. parseint (Conf. get ("columnm");} public void reduce (Text key, iterable <text> values, context) throws ioexception, interruptedexception {int [] M = new int [Columnm + 1]; int [] n = new int [columnm + 1]; for (Text VAL: values) {string [] tuple = Val. tostring (). split (","); If (tuple [0]. equals ("M") {M [integer. parseint (tuple [1])] = integer. parseint (tuple [2]);} else N [integer. parseint (tuple [1])] = integer. parseint (tuple [2]);}/** Based on the J value, multiply M [J] and N [J] to accumulate the data in the product matrix. **/For (Int J = 1; j <columnm + 1; j ++) {sum + = m [J] * n [J];} context. write (Key, new te XT (integer. tostring (SUM); sum = 0 ;}}/*** main function * <p> * usage: ** <p> * <code> matrixmultiply inputpathm inputpathn outputpath </code> ** <p> * obtain the number of rows and columns of matrix m from the input file name, and the number of columns in matrix N, passed as an important parameter to Mapper and reducer ** @ Param ARGs input file directory address M and N and output directory address ** @ throws exception */public static void main (string [] ARGs) throws exception {If (ARGs. length! = 3) {system. err. println ("Usage: matrixmultiply <inputpathm> <inputpathn> <outputpath>"); system. exit (2);} else {string [] infotuplem = ARGs [0]. split ("_"); rowm = integer. parseint (infotuplem [1]); columnm = integer. parseint (infotuplem [2]); string [] infotuplen = ARGs [1]. split ("_"); columnn = integer. parseint (infotuplen [2]);} configuration conf = new configuration ();/** set three global shared variables **/CONF. setint ("Rowm", rowm); Conf. setint ("columnm", columnm); Conf. setint ("columnn", columnn); job = new job (Conf, "matrixmultiply"); job. setjarbyclass (matrixmultiply. class); job. setmapperclass (matrixmapper. class); job. setreducerclass (matrixreducer. class); job. setoutputkeyclass (text. class); job. setoutputvalueclass (text. class); fileinputformat. setinputpaths (job, new path (ARGs [0]), new path (ARGs [1]); fileou Tputformat. setoutputpath (job, new path (ARGs [2]); system. Exit (job. waitforcompletion (true )? 0: 1 );}}
Below is:
Mapreduce achieves Matrix Multiplication