person table: 分別是name class age
huangyiyang 1 24lixun 1 25lihao 1 27liguangxu 2 26zhangzenglei 2 25wuzhipeng 2 25hujintao 3 65xijinping 3 55
class table:第一個列是class,也就是join條件
1 cs xjtu2 phy jilin3 nathion china
map中要判斷是person表還是class表,將class作為key,剩下的資訊作為value輸出,並加上相應標記(區分是那個表)
reduce中分別提取value中不同表的內容,放到2個list中,在串連即可
package com.huang.join;import java.io.IOException;import java.util.ArrayList;import java.util.Iterator;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class Join extends Configured implements Tool{/** * @param args * @throws Exception */public static void main(String[] args) throws Exception {// TODO Auto-generated method stubint ret = ToolRunner.run(new Join(), args);System.exit(ret);}@Overridepublic int run(String[] args) throws Exception {// TODO Auto-generated method stubConfiguration conf = new Configuration();Job job = new Job(conf, "join");job.setInputFormatClass(TextInputFormat.class);job.setOutputFormatClass(TextOutputFormat.class);job.setMapOutputKeyClass(IntWritable.class);job.setMapOutputValueClass(Text.class);job.setJarByClass(Join.class);job.setMapperClass(Map.class);job.setReducerClass(Reduce.class);FileInputFormat.setInputPaths(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));boolean success = job.waitForCompletion(true);return success ? 0 : 1;}}class Map extends Mapper<LongWritable, Text, IntWritable, Text>{public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{//decide the input (key, value) belongs to which table//the first table's(class.txt) first column is number,the second table's first column is String//the join condition is class(first column in table one)String line = value.toString();String[] words = line.split(" ");IntWritable k = new IntWritable();Text v = new Text();if( ((int)words[0].charAt(0)) >= (int)'A' ){//table personk.set(Integer.parseInt(words[1]));v.set(words[0] + " " + words[2] + " " +"person");} else {//table classk.set(Integer.parseInt(words[0]));v.set(words[1] + " " + words[2] + " " + "class");}context.write(k, v);}}class Reduce extends Reducer<IntWritable, Text, Text, Text>{public void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException{Iterator<Text> it = values.iterator();ArrayList<String> list = new ArrayList<String>();while(it.hasNext()){String s = it.next().toString();list.add(s);}ArrayList<String> personList = new ArrayList<String>();ArrayList<String> classList = new ArrayList<String>();for(int i = 0; i < list.size(); i++){String temp = list.get(i);String[] words = temp.split(" ");if(words[2].equals("class")){classList.add(words[0] + " " + words[1]);} else {personList.add(words[0] + " " + words[1]);}}for(int i = 0; i < personList.size(); i++){for(int j = 0; j < classList.size(); j++){context.write(new Text(personList.get(i)), new Text(classList.get(j)));}}}}
在eclipse 的run Configuration中配置好輸入和輸出 運行
輸出結果是: