New API multi-file input

Source: Internet
Author: User

/**
* Licensed to the Apache Software Foundation (ASF) under one
* Or more contributor license agreements. See the notice file
* Distributed with this work for additional information
* Regarding copyright ownership. The ASF licenses this file
* To you under the Apache license, version 2.0 (
* "License"); you may not use this file except T in compliance
* With the license. You may obtain a copy of the license
*
* Http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* Distributed under the license is distributed on an "as is" basis,
* Without warranties or conditions of any kind, either express or implied.
* See the license for the specific language governing permissions and
* Limitations under the license.
*/

Package org. Apache. hadoop. examples;

Import java. Io. ioexception;
Import java. util. stringtokenizer;

Import org. Apache. hadoop. conf. configured;
Import org. Apache. hadoop. fs. path;
Import org. Apache. hadoop. Io. longwritable;
Import org. Apache. hadoop. Io. text;
Import org. Apache. hadoop. mapreduce. job;
Import org. Apache. hadoop. mapreduce. mapper;
Import org. Apache. hadoop. mapreduce. Lib. Input. multipleinputs;
Import org. Apache. hadoop. mapreduce. Lib. Input. textinputformat;
Import org. Apache. hadoop. mapreduce. Lib. Output. fileoutputformat;
Import org. Apache. hadoop. util. tool;
Import org. Apache. hadoop. util. toolrunner;

/**
* Newmultifilewordcount is an example to demonstrate the usage
* Multipleinputs. This examples counts the occurrences of words in
* Text files under the given input directory.
*/
Public class newmultifilewordcount extends configured implements tool {
/**
* This mapper is similar to the one in {@ link hadooptest. mapclass }.
*/
Public static class mapclass extends mapper <longwritable, text, text, longwritable> {

Private Final Static longwritable one = new longwritable (1 );
Private text word = new text ();

Public void map (longwritable key, text value, context) throws ioexception, interruptedexception {
String line = value. tostring ();
Stringtokenizer itr = new stringtokenizer (line );
While (itr. hasmoretokens ()){
Word. Set (itr. nexttoken ());
Context. Write (word, one );
}
}
}

Private void printusage (){
System. Out. println ("Usage: multifilewc <input_dir> <output> ");
}

Public int run (string [] ARGs) throws exception {

If (ARGs. Length <2 ){
Printusage ();
Return 1;
}

Job job = new job (getconf ());
Job. setjobname ("multifilewordcount ");
Job. setjarbyclass (newmultifilewordcount. Class );
// The keys are words (strings)
Job. setoutputkeyclass (text. Class );
// The values are counts (ints)
Job. setoutputvalueclass (longwritable. Class );

// Use the wordcount CER
Job. setcombinerclass (longsumreducer. Class );
Job. setreducerclass (longsumreducer. Class );

Multipleinputs. addinputpath (job, new path (ARGs [0]), textinputformat. Class, mapclass. Class );

Multipleinputs. addinputpath (job, new path (ARGs [0]), textinputformat. Class, mapclass. Class );
Fileoutputformat. setoutputpath (job, new path (ARGs [1]);

System. Exit (job. waitforcompletion (true )? 0: 1 );

Return 0;
}

Public static void main (string [] ARGs) throws exception {
Int ret = toolrunner. Run (New newmultifilewordcount (), argS );
System. Exit (RET );
}

}

If the error occurs, please advise me !!!

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.