Data sets
User (id name) users table
1 user1
2 user2
3 user3
4 user4
5 user5
6 User6
Post (userid PostID postname) post table
1 1 post1
1 2 post2
2 3 post3
4 4 post4
5 5 post5
8 6 post6
8 7 post7
8 8 Post8
Package com.test;
Import Java.io.DataInput;
Import Java.io.DataOutput;
Import java.io.IOException;
Import java.util.ArrayList;
Import java.util.List;
Import org.apache.hadoop.conf.Configuration;
Import Org.apache.hadoop.fs.FileSystem;
Import Org.apache.hadoop.fs.Path;
Import org.apache.hadoop.io.LongWritable;
Import Org.apache.hadoop.io.Text;
Import org.apache.hadoop.io.Writable;
Import Org.apache.hadoop.io.compress.GzipCodec;
Import Org.apache.hadoop.mapreduce.Job;
Import Org.apache.hadoop.mapreduce.Mapper;
Import Org.apache.hadoop.mapreduce.Reducer;
Import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
Import Org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import Org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; * * MapReduce implement Left, right, full connection, anti-join/public class Userandpostjoinjob {static class Userandpostwritable implements writ
able{/* * Type U denotes user, p = post/private String type; prIvate String data;
Public userandpostwritable () {} public userandpostwritable (string type, string data) {super ();
This.type = type;
This.data = data;
Public String GetType () {return type;
public void SetType (String type) {this.type = type;
Public String GetData () {return data;
public void SetData (String data) {this.data = data; @Override public void ReadFields (Datainput input) throws IOException {//TODO auto-generated method stub Typ
E = Input.readutf ();
data = Input.readutf (); @Override public void Write (DataOutput output) throws IOException {//TODO auto-generated method stub output
. writeUTF (type);
OUTPUT.WRITEUTF (data); } static class Usermapper extends Mapper<longwritable, text, text, userandpostwritable> {protected void Map (longwritable key, Text value, Context context) throws IOException, interruptedexception {string[] arr = Value.tos
Tring (). Split ("T"); TexT userId = new Text (arr[0]);
Context.write (UserId, New userandpostwritable ("U", Value.tostring ())); } static class Postmapper extends Mapper<longwritable, text, text, userandpostwritable> {protected void map (longwritable key, Text value, Context context) throws IOException, interruptedexception{string[] arr = value.tostring
(). Split ("T");
Text userId = new text (arr[0]);
Context.write (UserId, New userandpostwritable ("P", Value.tostring ())); } static class Postreducer extends Reducer<text, userandpostwritable, Text, text> {private list<text& Gt
Users = new arraylist<text> ();
Private list<text> posts = new arraylist<text> ();
Private String JoinType;
protected void Setup (context context) throws Ioexception,interruptedexception {Super.setup (context);
JoinType = Context.getconfiguration (). Get ("JoinType"); } protected void reduce (Text key, iterable<userandpostwritable> iterable, Context context) Throws IOException, interruptedexception{users.clear ();
Posts.clear ();
for (userandpostwritable data:iterable) {if (Data.gettype (). Equals ("U")) {Users.add (New Text (Data.getdata ()));
else {posts.add (new Text (Data.getdata ())); } if ("Innerjoin". Equals (JoinType)) {if (users.size () > 0 && posts.size () > 0) {for-Text use
R:users) {for (Text post:posts) {context.write (new text (user), new text (POST));
}}}else if ("Leftouterjoin". Equals (JoinType)) {for (Text user:users) {if (posts.size () > 0) {
for (Text post:posts) {context.write (new text (user), new text (POST));
} else {context.write (new text (user), new text ("t \ T"));
}}else if ("Rightouterjoin". Equals (JoinType)) {for (Text post:posts) {if (users.size () > 0) {
for (Text user:users) {context.write (new text (user), new text (POST)); }} ELSE {Context.write (new text ("T"), new text (POST));
}}else if ("Fullouterjoin". Equals (JoinType)) {if (users.size () > 0) {for (Text user:users) {
if (posts.size () > 0) {for (Text post:posts) {context.write (new text (user), new text (POST));
} else {context.write (new text (user), new text ("t \ T"));
}} else {for (text post:posts) {if (users.size () > 0) {for (text user:users) {
Context.write (new text (user), new text (POST));
} else {context.write (new text ("T"), new text (POST)); }}}else if ("Anti". Equals (JoinType)) {if (users.size () = 0 ^ posts.size () = 0) {for (Text User:
Users) {context.write (new text (user), new text ("t \ T"));
for (Text post:posts) {context.write (new text ("T"), new text (POST)); "}}}} public static void Main (string[] args) throws Exception {Configuration conf = new Configuration ();
String queue = "HQL";
if (Args.length > 4) {queue = Args[4].matches ("Hql|dstream|mapred|udw|user|common")? Args[1]: "HQL";
} String JoinType = Args[0];
String Userinputpath = args[1];
String Postinputpath = args[2];
String OutputPath = args[3];
Conf.set ("Mapreduce.job.queuename", queue);
Job Job = job.getinstance (conf, "jointest");
Job.getconfiguration (). Set ("JoinType", JoinType);
Job.getconfiguration (). Set ("Mapred.textoutputformat.separator", "T");
Job.setjarbyclass (Userandpostjoinjob.class);
Multipleinputs.addinputpath (Job, New Path (Userinputpath), Textinputformat.class, Usermapper.class);
Multipleinputs.addinputpath (Job, New Path (Postinputpath), Textinputformat.class, Postmapper.class);
Job.setreducerclass (Postreducer.class);
Job.setmapoutputkeyclass (Text.class);
Job.setmapoutputvalueclass (Userandpostwritable.class);
Job.setoutputkeyclass (Text.class); Job.setoutpuTvalueclass (Text.class);
Job.setoutputformatclass (Textoutputformat.class);
FileSystem fs = Filesystem.get (conf);
Path Outpath = new Path (OutputPath);
if (fs.exists (Outpath)) {//Fs.delete (Outpath);//} Fileoutputformat.setoutputpath (Job, Outpath);
Fileoutputformat.setcompressoutput (Job, true);
Fileoutputformat.setoutputcompressorclass (Job, Gzipcodec.class);
Job.waitforcompletion (TRUE);
}
}