Due to the requirements of the project, it is necessary to submit yarn MapReduce computing tasks through Java programs. Unlike the general task of submitting MapReduce through jar packages, a small change is required to submit mapreduce tasks through the program, as detailed in the following code.
The following is the MapReduce main program, there are a few points to mention:
1, in the program, I will file read into the format set to Wholefileinputformat, that is, not to the file segmentation.
2. In order to control the processing of reduce, the output key of map is formatted as a combination key. Unlike conventional <key,value>, this becomes the <textpair,value>,textpair format for <key1,key2>.
3, in order to adapt to the combination of keys, reset the grouping function, namely groupcomparator. The grouping rule is that the data is assigned to a reduce container as long as the key1 in the Textpair is the same (no key2 is required). Thus, when the same Key1 data enters the reduce container, Key2 plays a role in data identification.
Package web. Hadoop;
Import java.io.IOException;
Import org.apache.hadoop.conf.Configuration;
Import Org.apache.hadoop.fs.Path;
Import org.apache.hadoop.io.BytesWritable;
Import org.apache.hadoop.io.WritableComparable;
Import Org.apache.hadoop.io.WritableComparator;
Import org.apache.hadoop.mapred.JobClient;
Import org.apache.hadoop.mapred.JobConf;
Import Org.apache.hadoop.mapred.JobStatus;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.Partitioner;
Import Org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
Import util. Utils;
public class Gemimain {
Public Gemimain () {
Job = null;
}
public job Job;
public static class Namepartitioner extends
Partitioner<textpair, byteswritable= "" > {
@Override
public int getpartition (Textpair key, byteswritable value,
int numpartitions) {
Return Math.Abs (Key.getfirst (). Hashcode () * 127)% Numpartitions;
}
}
/**
* Group settings class, they belong to the same group as long as the first key of the two Textpair is the same. Their value is placed in a value iterator,
* then enter the reducer reduce method.
*
* @author HDUser
*
*/
public static class Groupcomparator extends Writablecomparator {
Public Groupcomparator () {
Super (Textpair.class, true);
}
@Override
public int Compare (writablecomparable A, writablecomparable b) {
Textpair T1 = (Textpair) A;
Textpair t2 = (Textpair) b;
The comparison returns 0, compared to 1.
Return T1.getfirst (). CompareTo (T2.getfirst ()); As long as the first field is the same, it is divided into the same group
}
}
public boolean runjob (string] args) throws IOException,
ClassNotFoundException, Interruptedexception {
Revisit conf = new revisit ();
Set the Outputath variable in conf to get the value of the parameter in the Reduce function
Conf.set ("OutputPath", args[args.length-1].tostring ());
Set up HDFs, where each task generates the product's Quality file folder. The penultimate number of args arrays is the folder where the quality files are located
Conf.set ("Qualityfolder", args[args.length-2].tostring ());
If you are running in server, you need to get the root path of the Web project, and if you are debugging in Java application, read the configuration file in the/opt/hadoop-2.5.0/etc/hadoop/directory
Mapreduceprogress mprogress = new mapreduceprogress ();
String rootpath= Mprogress.rootpath;
String rootpath= "/opt/hadoop-2.5.0/etc/hadoop/";
Conf.addresource (New Path (rootpath+ "Yarn-site.xml"));
Conf.addresource (New Path (rootpath+ "Core-site.xml"));
Conf.addresource (New Path (rootpath+ "Hdfs-site.xml"));
Conf.addresource (New Path (rootpath+ "Mapred-site.xml"));
This.job = new Job (conf);
Job.setjobname ("Job name:" + args[0]);
Job.setjarbyclass (Gemimain.class);
Job.setmapperclass (Gemimapper.class);
Job.setmapoutputkeyclass (Textpair.class);
Job.setmapoutputvalueclass (Byteswritable.class);
Set partition
Job.setpartitionerclass (Namepartitioner.class);
GROUP by specified criteria after partition
Job.setgroupingcomparatorclass (Groupcomparator.class);
Job.setreducerclass (Gemireducer.class);
Job.setinputformatclass (Wholefileinputformat.class);
Job.setoutputformatclass (Nulloutputformat.class);
Job.setoutputkeyclass (Nullwritable.class);
Job.setoutputvalueclass (Text.class);
Job.setnumreducetasks (8);
Set the path for calculating input data
for (int i = 1; i < args.length-2 i++) {
Fileinputformat.addinputpath (Job, New Path (args[i));
}
The last element of the args array is the output path
Fileoutputformat.setoutputpath (Job, New Path (args[args.length-1));
Boolean flag = Job.waitforcompletion (true);
return flag;
}
@SuppressWarnings ("Static-access")
public static void Main (string] args) throws ClassNotFoundException,
IOException, Interruptedexception {
String] inputpaths = new string] {"Normalizejob",
"Hdfs://192.168.168.101:9000/user/hduser/red1/",
"hdfs://192.168.168.101:9000/user/hduser/nir1/", "quality11111",
"Hdfs://192.168.168.101:9000/user/hduser/test"};
Gemimain test = new Gemimain ();
Boolean result = Test.runjob (inputpaths);
}
}
The following is the Textpair class
public class Textpair implements Writablecomparable {
Private Text A;
Private Text second;
Public Textpair () {
Set (new text (), new text ());
}
Public Textpair (String A, string second) {
Set (new text, new text (second));
}
Public Textpair (text A, text second) {
Set (second);
}
public void Set (text second) {
This.first = A;
This.second = second;
}
Public Text GetFirst () {
return A;
}
Public Text Getsecond () {
return second;
}
@Override
public void Write (DataOutput out) throws IOException {
First.write (out);
Second.write (out);
}
@Override
public void ReadFields (Datainput in) throws IOException {
First.readfields (in);
Second.readfields (in);
}
@Override
public int hashcode () {
return First.hashcode () * 163 + second.hashcode ();
}
@Override
public boolean equals (Object o) {
if (o instanceof textpair) {
Textpair TP = (textpair) o;
Return First.equals (Tp.first) && second.equals (Tp.second);
}
return false;
}
@Override
Public String toString () {
Return the "\ T" + second;
}
@Override
/**a.compareto (B)
* If the comparison is the same, the result is 0
* If A is greater than B, the comparison is 1
* If A is less than B, the result is 1
*
*/
public int compareTo (Textpair tp) {
int cmp = First.compareto (Tp.first);
if (CMP!= 0) {
return CMP;
}
The ascending arrangement is implemented at this time
Return Second.compareto (Tp.second);
}
}
The following is Wholefileinputformat, whose control data is not split in the MapReduce process
Package web.hadoop;
Import java.io.IOException;
Import Org.apache.hadoop.fs.Path;
Import org.apache.hadoop.io.BytesWritable;
Import Org.apache.hadoop.io.Text;
Import Org.apache.hadoop.mapreduce.InputSplit;
Import Org.apache.hadoop.mapreduce.JobContext;
Import Org.apache.hadoop.mapreduce.RecordReader;
Import Org.apache.hadoop.mapreduce.TaskAttemptContext;
Import Org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class Wholefileinputformat extends Fileinputformat<text, byteswritable= "" > {
@Override
Public Recordreader<text, byteswritable= "" > Createrecordreader (
Inputsplit arg0, Taskattemptcontext arg1) throws IOException,
interruptedexception {
TODO auto-generated Method Stub
return new Wholefilerecordreader ();
}
@Override
Protected Boolean issplitable (Jobcontext context, Path filename) {
TODO auto-generated Method Stub
return false;
}
}
The following is the Wholefilerecordreader class
Package web.hadoop;
Import java.io.IOException;
Import org.apache.hadoop.conf.Configuration;
Import Org.apache.hadoop.fs.FSDataInputStream;
Import Org.apache.hadoop.fs.FileSystem;
Import Org.apache.hadoop.fs.Path;
Import org.apache.hadoop.io.BytesWritable;
Import Org.apache.hadoop.io.IOUtils;
Import Org.apache.hadoop.io.Text;
Import Org.apache.hadoop.mapreduce.InputSplit;
Import Org.apache.hadoop.mapreduce.RecordReader;
Import Org.apache.hadoop.mapreduce.TaskAttemptContext;
Import Org.apache.hadoop.mapreduce.lib.input.FileSplit;
public class Wholefilerecordreader extends Recordreader<text, byteswritable= "" > {
Private Filesplit Filesplit;
Private Fsdatainputstream FIS;
Private Text key = null;
Private byteswritable value = null;
Private Boolean processed = false;
@Override
public void Close () throws IOException {
TODO auto-generated Method Stub
Fis.close ();
}
@Override
Public Text Getcurrentkey () throws IOException, Interruptedexception {
TODO auto-generated Method Stub
return this.key;
}
@Override
Public byteswritable GetCurrentValue () throws IOException,
interruptedexception {
TODO auto-generated Method Stub
return this.value;
}
@Override
public void Initialize (Inputsplit inputsplit, Taskattemptcontext tacontext)
Throws IOException, Interruptedexception {
Filesplit = (filesplit) inputsplit;
Revisit job = Tacontext.getconfiguration ();
Path file = Filesplit.getpath ();
FileSystem fs = File.getfilesystem (Job);
FIS = fs.open (file);
}
@Override
public Boolean nextkeyvalue () {
if (key = = null) {
Key = new Text ();
}
if (value = = null) {
Value = new byteswritable ();
}
if (!processed) {
BYTE] content = new byte (int) filesplit.getlength ()];
Path file = Filesplit.getpath ();
System.out.println (File.getname ());
Key.set (File.getname ());
try {
ioutils.readfully (FIS, content, 0, content.length);
Value.set (content, 0, content.length);
Value.set (content) (new byteswritable);
catch (IOException e) {
TODO auto-generated Catch block
E.printstacktrace ();
finally {
Ioutils.closestream (FIS);
}
processed = true;
return true;
}
return false;
}
@Override
public float getprogress () throws IOException, Interruptedexception {
TODO auto-generated Method Stub
Return processed? Filesplit.getlength (): 0;
}
}