Last Update:2014-12-22
Source: Internet
Author: User
Keywords
value
nbsp ;
reason
name
Type one: one by one correspondence
file1:
a & http: //www.aliyun.com/zixun/aggregation/37954.html "> nbsp; 1 b 2 c 3
file2:
1 ! 2 @ 3 #
file1 and file2 associated, the desired result:
a!
b @
3 #
Idea:
1, mark different input files
2, the file1 key, value reversed; file1 and file2 the same key, file1 value key, file2 value value output.
program:
package smiple; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop .mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public org.apache.hadoop.util.GenericOptionsParser; public class FileJoin {public static class MyMap extends Mapper <LongWritable, Text, Text, Text> {public void map (LongWritable key, Text value, Context context) throws IOException, InterruptedException {/ / String String = new String (value.getBytes (), 0, value.getLength (), "GBK"); StringTokenizer tokenizer = new StringTokenizer (line); String keyst StringNametr = tokenizer.nextToken (); // Get the filename InputSplit inputSplit = context.getInputSplit (); String fileName = ((FileSplit) inputSplit) .getPath (). getName (); if ("file1" .equals (fileName)) {// tag context.write (new Text (valuestr), new Text ("file1 _" + keystr));} else if ("file2" .equals (fileName)) { public static class MyReduce extends Reducer {public void reduce (Text key, Iterable <Text (TextText, Text, Text) > values, Context context) throws IOException, InterruptedException {Text resultKey = new Text ("key0"); Text resultValue = new Text ("value0"); for (Text val: values) substring (). substring () substring () substring () substring () substring (ResultKey.toString () + "" + resultValue.toString ()); context.write (resultValue = new Text (val.toString () substring (6));}} System.out.println (resultKey, resultValue) public static void main (String [] args) throws Exception {Configuration conf = new Configuration (); String [] ioArgs = new String [] {"hdfs: // ip: port / mr / join / in String (] otherArgs = new GenericOptionsParser (conf, ioArgs) .getRemainingArgs (); if (otherArgs.length! = 2) {System.err., "hdfs: // ip: port / mr / join / System.exit (2);} Job job = new Job (conf, "file join"); job.setJarByClass (Sort.class); // Set Map and Reduce processing class job.setMapperClass (MyMap.class); job.setReducerClass (MyReduce.class); / / set the output type job.setOutputKeyClass (Text.class); job.setOutputValueClass (Text.class); / / set the input And output directory FileInputFormat.addInputPath (job, new Path (otherArgs [0])); FileOutputFormat.setOutputPath (job, new Path (otherArgs [1])); System.exit (job.waitForCompletion (true) ;}}
result: