BenCodeFunction: Get the datanode name and write it to the file in the HDFS file system.HDFS: // copyoftest. C.
And count filesHDFS: // wordcount count in copyoftest. C,Unlike hadoop's examples, which reads files from the local file system.
Package Com. fora;
Import Java. Io. ioexception;
Import Java. util. stringtokenizer;
Import Org. Apache. hadoop. conf. configuration;
Import Org. Apache. hadoop. fs. fsdataoutputstream;
Import Org. Apache. hadoop. fs. filestatus;
Import Org. Apache. hadoop. fs. filesystem;
Import Org. Apache. hadoop. fs. path;
Import Org. Apache. hadoop. HDFS. distributedfilesystem;
Import Org. Apache. hadoop. HDFS. Protocol. datanodeinfo;
Import Org. Apache. hadoop. Io. intwritable;
Import Org. Apache. hadoop. Io. text;
Import Org. Apache. hadoop. mapreduce. job;
Import Org. Apache. hadoop. mapreduce. mapper;
Import Org. Apache. hadoop. mapreduce. Cer CER;
Import Org. Apache. hadoop. mapreduce. Mapper. context;
Import Org. Apache. hadoop. mapreduce. Lib. Input. fileinputformat;
Import Org. Apache. hadoop. mapreduce. Lib. Output. fileoutputformat;
Import Org. Apache. hadoop. util. genericoptionsparser;
Public Class Fileoperate {
Public Static Void Main (string [] ARGs) Throws Ioexception, interruptedexception, classnotfoundexception {
Init ();/* initialize the file */
Configuration Conf = New Configuration ();
Job job = New Job (Conf, " Word Count " );
Job. setjarbyclass (fileoperate. Class );
Job. setmapperclass (tokenizermapper. Class );
Job. setcombinerclass (intsumreducer. Class );
Job. setreducerclass (intsumreducer. Class );
Job. setoutputkeyclass (text. Class );
Job. setoutputvalueclass (intwritable. Class );
/* Set the path of Input and Output */
Fileinputformat. addinputpath (job, New PATH ( " HDFS: // copyoftest. c " ));
Fileoutputformat. setoutputpath (job, New PATH ( " HDFS: // wordcount " ));
System. Exit (job. waitforcompletion ( True ) ? 0 : 1 );
}
Public Static Class Tokenizermapper
Extends Mapper < Object, text, text, intwritable > {
Private Final Static Intwritable one = New Intwritable ( 1 );
Private Text word = New Text ();
Public Void Map (Object key, text value, context) Throws Ioexception, interruptedexception {
Stringtokenizer itr = New Stringtokenizer (value. tostring ());
While (Itr. hasmoretokens ()){
Word. Set (itr. nexttoken ());
Context. Write (word, one );
}
}
}
Public Static Class Intsumreducer
Extends Reducer < Text, intwritable, text, intwritable > {
Private Intwritable result = New Intwritable ();
Public Void Reduce (Text key, iterable < Intwritable > Values, context)
Throws Ioexception, interruptedexception {
Int Sum = 0 ;
For (Intwritable VAL: values ){
Sum + = Val. Get ();
}
Result. Set (SUM );
Context. Write (Key, result );
}
}
Public Static Void Init () Throws Ioexception {
/* Copy local file to HDFS */
Configuration config = New Configuration ();
Filesystem HDFS = Null ;
String srcfile = " /Test. c " ;
String dstfile = " HDFS: // copyoftest. c " ;
System. Out. Print ( " Copy success! \ N " );
HDFS = Filesystem. Get (config );
Path srcpath = New PATH (srcfile );
Path dstpath = New PATH (dstfile );
HDFS. copyfromlocalfile (srcpath, dstpath );
String filename = " HDFS: // copyoftest. c " ;
Path = New PATH (filename );
Filestatus = Null ;
Filestatus = HDFS. getfilestatus (PATH );
System. Out. println (filestatus. getblocksize ());
Filesystem FS = Filesystem. Get (config );
Distributedfilesystem hdfs1 = (Distributedfilesystem) FS;
Datanodeinfo [] datanodestats = Hdfs1.getdatanodestats ();
/* Create a file on HDFS */
Path outputpath = New PATH ( " HDFS: // output/listofdatanode " );
Fsdataoutputstream outputstream = HDFS. Create (outputpath );
String [] Names = New String [datanodestats. Length];
For ( Int I = 0 ; I < Datanodestats. length; I ++ ){
Names [I] = Datanodestats [I]. gethostname (); /* Get the list of datanodes */
System. Out. println (Names [I]);
/* Write the list of datanodes to file on HDFS */
Outputstream. Write (Names [I]. getbytes (), 0 , Names [I]. Length ());
}
}
}
Running result:
[Root @ master bin] # hadoop jar HDFS. Jar com. fora. fileoperate Copy success! 67108864masterslave111/07/21 15:45:23 warn mapred. jobclient: Use genericoptionsparser for parsing the arguments. applications shocould implement tool for the same.11/07/21 15:45:23 info input. fileinputformat: total input paths to process: 111/07/21 15:45:23 info mapred. jobclient: running job: job_20110721_17_000311/07/21 15:45:24 info mapred. jobclient: Map 0% reduce 0% 11/07/21 15:45:31 info mapred. jobclient: Map 100% reduce 0% 11/07/21 15:45:43 info mapred. jobclient: Map 100% reduce 100% 11/07/21 15:45:45 info mapred. jobclient: job complete: job_20110721_17_000311/07/21 15:45:45 info mapred. jobclient: counters: 1711/07/21 15:45:45 info mapred. jobclient: Job counters 11/07/21 15:45:45 info mapred. jobclient: Launched reduce tasks = 111/07/21 15:45:45 info mapred. jobclient: rack-local map tasks = 111/07/21 15:45:45 info mapred. jobclient: Launched map tasks = 111/07/21 15:45:45 info mapred. jobclient: filesystemcounters11/07/21 15:45:45 info mapred. jobclient: file_bytes_read = 22811/07/21 15:45:45 info mapred. jobclient: hdfs_bytes_read = 12611/07/21 15:45:45 info mapred. jobclient: file_bytes_written = 48811/07/21 15:45:45 info mapred. jobclient: hdfs_bytes_written = 14611/07/21 15:45:45 info mapred. jobclient: Map-Reduce framework11/07/21 15:45:45 info mapred. jobclient: Reduce input groups = 1911/07/21 15:45:45 info mapred. jobclient: Combine output records = 1911/07/21 15:45:45 info mapred. jobclient: Map input records = 811/07/21 15:45:45 info mapred. jobclient: reduce shuffle bytes = 22811/07/21 15:45:45 info mapred. jobclient: reduce output records = 1911/07/21 15:45:45 info mapred. jobclient: spilled records = 3811/07/21 15:45:45 info mapred. jobclient: map output bytes = 21111/07/21 15:45:45 info mapred. jobclient: Combine input records = 2211/07/21 15:45:45 info mapred. jobclient: map output records = 2211/07/21 15:45:45 info mapred. jobclient: Reduce input records = 19[Root @ master bin] # hadoop DFS-ls/ Found 6 items-RW-r -- 1 root supergroup 126/copyoftest. c-RW-r -- 1 root supergroup 26/listofdatanodedrwxr-XR-X-root supergroup 0/output-RW-r -- 1 root supergroup 10400 /test.txt drwxr-XR-X-root supergroup 0/tmpdrwxr-XR-X-root supergroup 0/wordcount [Root @ master bin] # hadoop DFS-ls/wordcount Found 2 itemsdrwxr-XR-X-root supergroup 0/wordcount/_ logs-RW-r -- 1 root supergroup 146/wordcount/part-r-00000 [Root @ master bin] # hadoop DFS-CAT/wordcount/part-r-00000 2011-07-21 1 file 1 hadoop 1 system! 1A 1 aimed 1at 1 coping 1 file 3 from 1 from: fora 1is 1 Local 1 System 1 Thank 1The 1 This 2to 1you! 1 [root @ master bin] #