Hadoop: the second program operates HDFS-> [get datanode name] [Write File] [wordcount count]

Source: Internet
Author: User
BenCodeFunction: Get the datanode name and write it to the file in the HDFS file system.HDFS: // copyoftest. C.
 
And count filesHDFS: // wordcount count in copyoftest. C,Unlike hadoop's examples, which reads files from the local file system.
 

 Package  Com. fora;

Import Java. Io. ioexception;
Import Java. util. stringtokenizer;

Import Org. Apache. hadoop. conf. configuration;
Import Org. Apache. hadoop. fs. fsdataoutputstream;
Import Org. Apache. hadoop. fs. filestatus;
Import Org. Apache. hadoop. fs. filesystem;
Import Org. Apache. hadoop. fs. path;
Import Org. Apache. hadoop. HDFS. distributedfilesystem;
Import Org. Apache. hadoop. HDFS. Protocol. datanodeinfo;
Import Org. Apache. hadoop. Io. intwritable;
Import Org. Apache. hadoop. Io. text;
Import Org. Apache. hadoop. mapreduce. job;
Import Org. Apache. hadoop. mapreduce. mapper;
Import Org. Apache. hadoop. mapreduce. Cer CER;
Import Org. Apache. hadoop. mapreduce. Mapper. context;
Import Org. Apache. hadoop. mapreduce. Lib. Input. fileinputformat;
Import Org. Apache. hadoop. mapreduce. Lib. Output. fileoutputformat;
Import Org. Apache. hadoop. util. genericoptionsparser;


Public Class Fileoperate {

Public Static Void Main (string [] ARGs) Throws Ioexception, interruptedexception, classnotfoundexception {

Init ();/* initialize the file */

Configuration Conf = New Configuration ();

Job job = New Job (Conf, " Word Count " );
Job. setjarbyclass (fileoperate. Class );

Job. setmapperclass (tokenizermapper. Class );
Job. setcombinerclass (intsumreducer. Class );
Job. setreducerclass (intsumreducer. Class );

Job. setoutputkeyclass (text. Class );
Job. setoutputvalueclass (intwritable. Class );

/* Set the path of Input and Output */
Fileinputformat. addinputpath (job, New PATH ( " HDFS: // copyoftest. c " ));
Fileoutputformat. setoutputpath (job, New PATH ( " HDFS: // wordcount " ));

System. Exit (job. waitforcompletion ( True ) ? 0 : 1 );
}

Public Static Class Tokenizermapper
Extends Mapper < Object, text, text, intwritable > {
Private Final Static Intwritable one = New Intwritable ( 1 );
Private Text word = New Text ();

Public Void Map (Object key, text value, context) Throws Ioexception, interruptedexception {

Stringtokenizer itr = New Stringtokenizer (value. tostring ());
While (Itr. hasmoretokens ()){
Word. Set (itr. nexttoken ());
Context. Write (word, one );
}
}
}

Public Static Class Intsumreducer
Extends Reducer < Text, intwritable, text, intwritable > {
Private Intwritable result = New Intwritable ();

Public Void Reduce (Text key, iterable < Intwritable > Values, context)
Throws Ioexception, interruptedexception {

Int Sum = 0 ;
For (Intwritable VAL: values ){
Sum + = Val. Get ();
}
Result. Set (SUM );
Context. Write (Key, result );
}
}

Public Static Void Init () Throws Ioexception {

/* Copy local file to HDFS */
Configuration config = New Configuration ();
Filesystem HDFS = Null ;
String srcfile = " /Test. c " ;
String dstfile = " HDFS: // copyoftest. c " ;
System. Out. Print ( " Copy success! \ N " );
HDFS = Filesystem. Get (config );
Path srcpath = New PATH (srcfile );
Path dstpath = New PATH (dstfile );
HDFS. copyfromlocalfile (srcpath, dstpath );

String filename = " HDFS: // copyoftest. c " ;
Path = New PATH (filename );
Filestatus = Null ;

Filestatus = HDFS. getfilestatus (PATH );
System. Out. println (filestatus. getblocksize ());

Filesystem FS = Filesystem. Get (config );
Distributedfilesystem hdfs1 = (Distributedfilesystem) FS;
Datanodeinfo [] datanodestats = Hdfs1.getdatanodestats ();

/* Create a file on HDFS */
Path outputpath = New PATH ( " HDFS: // output/listofdatanode " );
Fsdataoutputstream outputstream = HDFS. Create (outputpath );

String [] Names = New String [datanodestats. Length];
For ( Int I = 0 ; I < Datanodestats. length; I ++ ){
Names [I] = Datanodestats [I]. gethostname (); /* Get the list of datanodes */
System. Out. println (Names [I]);
/* Write the list of datanodes to file on HDFS */
Outputstream. Write (Names [I]. getbytes (), 0 , Names [I]. Length ());
}
}

}
 

 
Running result:
 [Root @ master bin] # hadoop jar HDFS. Jar com. fora. fileoperate Copy success! 67108864masterslave111/07/21 15:45:23 warn mapred. jobclient: Use genericoptionsparser for parsing the arguments. applications shocould implement tool for the same.11/07/21 15:45:23 info input. fileinputformat: total input paths to process: 111/07/21 15:45:23 info mapred. jobclient: running job: job_20110721_17_000311/07/21 15:45:24 info mapred. jobclient: Map 0% reduce 0% 11/07/21 15:45:31 info mapred. jobclient: Map 100% reduce 0% 11/07/21 15:45:43 info mapred. jobclient: Map 100% reduce 100% 11/07/21 15:45:45 info mapred. jobclient: job complete: job_20110721_17_000311/07/21 15:45:45 info mapred. jobclient: counters: 1711/07/21 15:45:45 info mapred. jobclient: Job counters 11/07/21 15:45:45 info mapred. jobclient: Launched reduce tasks = 111/07/21 15:45:45 info mapred. jobclient: rack-local map tasks = 111/07/21 15:45:45 info mapred. jobclient: Launched map tasks = 111/07/21 15:45:45 info mapred. jobclient: filesystemcounters11/07/21 15:45:45 info mapred. jobclient: file_bytes_read = 22811/07/21 15:45:45 info mapred. jobclient: hdfs_bytes_read = 12611/07/21 15:45:45 info mapred. jobclient: file_bytes_written = 48811/07/21 15:45:45 info mapred. jobclient: hdfs_bytes_written = 14611/07/21 15:45:45 info mapred. jobclient: Map-Reduce framework11/07/21 15:45:45 info mapred. jobclient: Reduce input groups = 1911/07/21 15:45:45 info mapred. jobclient: Combine output records = 1911/07/21 15:45:45 info mapred. jobclient: Map input records = 811/07/21 15:45:45 info mapred. jobclient: reduce shuffle bytes = 22811/07/21 15:45:45 info mapred. jobclient: reduce output records = 1911/07/21 15:45:45 info mapred. jobclient: spilled records = 3811/07/21 15:45:45 info mapred. jobclient: map output bytes = 21111/07/21 15:45:45 info mapred. jobclient: Combine input records = 2211/07/21 15:45:45 info mapred. jobclient: map output records = 2211/07/21 15:45:45 info mapred. jobclient: Reduce input records = 19[Root @ master bin] # hadoop DFS-ls/ Found 6 items-RW-r -- 1 root supergroup 126/copyoftest. c-RW-r -- 1 root supergroup 26/listofdatanodedrwxr-XR-X-root supergroup 0/output-RW-r -- 1 root supergroup 10400 /test.txt drwxr-XR-X-root supergroup 0/tmpdrwxr-XR-X-root supergroup 0/wordcount [Root @ master bin] # hadoop DFS-ls/wordcount Found 2 itemsdrwxr-XR-X-root supergroup 0/wordcount/_ logs-RW-r -- 1 root supergroup 146/wordcount/part-r-00000 [Root @ master bin] # hadoop DFS-CAT/wordcount/part-r-00000 2011-07-21 1 file 1 hadoop 1 system! 1A 1 aimed 1at 1 coping 1 file 3 from 1 from: fora 1is 1 Local 1 System 1 Thank 1The 1 This 2to 1you! 1 [root @ master bin] #  

 
 

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.