Custom output data format, output path, output file name
Output Format OutputFormat
1, OutputFormat abstract class
2. fileoutputformat file output format
3, Textoutputformat text format of the file output format
4. Sequencefileoutputformat normal sequence file output format
5. Sequencefileasbinaryoutputformat binary sequence file output format
6. Filteroutputformat Filter Output format
7. Dboutputformat Database Output format
8. multipleoutputs Multiple output formats
Custom
1. Define a class inheriting Fileoutputformat class override Getrecordwriter () method
2. Define a class to inherit the Recordwriter class write and close
Code
Here we take wordcount as an example:
Data preparation
1.txt
Hadoop MapReduce
Hive Hadoop
Oracle
Java Hadoop hbase
2.txt
Spark
Hadoop
Spark Hive Mangodb Nginx
Tomcat JBoss Apache
WebLogic Oracle
Java C C + +
Custom output format codes
Import Java.io.ioexception;import Java.io.printwriter;import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.fs.fsdataoutputstream;import Org.apache.hadoop.fs.path;import org.apache.hadoop.io.IntWritable ; Import Org.apache.hadoop.io.text;import Org.apache.hadoop.mapreduce.recordwriter;import Org.apache.hadoop.mapreduce.taskattemptcontext;import Org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Customfileoutputformat extends Fileoutputformat<text, intwritable> {@Overridepublic recordwriter <text, intwritable> getrecordwriter (Taskattemptcontext job) throws IOException, interruptedexception {//TODO Auto-generated method stub//Get the file output directory path Filedir = Fileoutputformat.getoutputpath (Job);//Specify the file name of the output, here we name the file 1.txt/ /If there is a parent directory to handle the path FileName = new Path (filedir.tostring () + "/1.txt"); System.out.println (Filename.getname ()); Configuration conf = job.getconfiguration (); Fsdataoutputstream file = Filename.getfilesystem (conf). Create (fileName); return new CustOmrecordwrite (file);}} Class Customrecordwrite extends Recordwriter<text, intwritable> {private PrintWriter write = Null;public Customrecordwrite (fsdataoutputstream file) {this.write = new PrintWriter (file);} @Overridepublic void Write (Text key, intwritable value) throws Ioexception,interruptedexception {//TODO auto-generated m Ethod stubwrite.println ("Word:" + key.tostring () + "\ T" + "Counts:" + value);} @Overridepublic void Close (Taskattemptcontext context) throws Ioexception,interruptedexception {//TODO auto-generated Method Stubwrite.close ();}}
wordcount Code
Import Java.io.ioexception;import Java.util.stringtokenizer;import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.conf.configured;import Org.apache.hadoop.fs.path;import org.apache.hadoop.io.IntWritable; Import Org.apache.hadoop.io.text;import Org.apache.hadoop.mapreduce.job;import Org.apache.hadoop.mapreduce.Mapper ; Import Org.apache.hadoop.mapreduce.reducer;import Org.apache.hadoop.mapreduce.lib.input.fileinputformat;import Org.apache.hadoop.mapreduce.lib.input.textinputformat;import Org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import Org.apache.hadoop.mapreduce.lib.output.textoutputformat;import Org.apache.hadoop.util.tool;import Org.apache.hadoop.util.toolrunner;public class WordCount extends configured implements Tool {@Overridepublic int run ( String[] arg0) throws Exception {//TODO auto-generated method Stubconfiguration conf = getconf (); Job Job = new Job (conf, "Worldcount"); Job.setjarbyclass (Wordcount.class); Fileinputformat.addinputpath (Job, New Path ("/vaLue/*.txt ")); Fileoutputformat.setoutputpath (Job, New Path ("/wordcount/out")); Job.setmapperclass (Wordcountmap.class); Job.setreducerclass (Wordcountreduce.class); Job.setmapoutputkeyclass (Text.class); Job.setmapoutputvalueclass ( Intwritable.class); Job.setoutputkeyclass (Text.class); Job.setoutputvalueclass (Intwritable.class); Job.setinputformatclass (Textinputformat.class);//default is textoutputformat,//here we set the custom output format Job.setoutputformatclass ( Customfileoutputformat.class); Job.submit (); return job.issuccessful ()? 0:1;} public static void Main (string[] args) throws Exception {Toolrunner.run (New Configuration (), New WordCount (), null);}} Class Wordcountmap extends Mapper<object, text, text, intwritable> {Private final static intwritable one = new INTWR Itable (1);p rivate Text word = new text ();p ublic void map (Object key, text value,mapper<object, text, text, intwritable& gt;. Context context) throws IOException, interruptedexception {stringtokenizer ITR = new StringTokenizer (value.tostring ()); while (iTr.hasmoretokens ()) {Word.set (Itr.nexttoken ()); System.out.println (Word.tostring ()); Context.write (Word, one);}}} Class Wordcountreduce extends Reducer<text, intwritable, Text, intwritable> {private intwritable result = new Intwri Table ();p ublic void reduce (Text key, iterable<intwritable> values, context context) throws IOException, interruptedexception {int sum = 0;for (intwritable val:values) {sum + = Val.get ();} Result.set (sum); Context.write (key, result);}}
Run results
Filename
File data
Word:c counts:1
Word:c++counts:1
Word:apachecounts:1
Word:hadoopcounts:4
Word:hbasecounts:1
Word:hivecounts:2
Word:javacounts:2
Word:jbosscounts:1
Word:mangodbcounts:1
Word:mapreducecounts:1
Word:nginxcounts:1
Word:oraclecounts:2
Word:spakrcounts:1
Word:sparkcounts:1
Word:tomcatcounts:1
Word:weblogiccounts:1
Custom output formats