Hadoop: the second program operates HDFS-> [get datanode name] [Write File] [wordcount count]

Last Update:2018-12-07 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

BenCodeFunction: Get the datanode name and write it to the file in the HDFS file system.HDFS: // copyoftest. C.

 And count filesHDFS: // wordcount count in copyoftest. C,Unlike hadoop's examples, which reads files from the local file system.

 Package  Com. fora;

  Import Java. Io. ioexception;
  Import  Java. util. stringtokenizer;

  Import  Org. Apache. hadoop. conf. configuration;
  Import  Org. Apache. hadoop. fs. fsdataoutputstream;
  Import  Org. Apache. hadoop. fs. filestatus;
  Import  Org. Apache. hadoop. fs. filesystem;
  Import  Org. Apache. hadoop. fs. path;
 Import  Org. Apache. hadoop. HDFS. distributedfilesystem;
  Import  Org. Apache. hadoop. HDFS. Protocol. datanodeinfo;
  Import  Org. Apache. hadoop. Io. intwritable;
  Import  Org. Apache. hadoop. Io. text;
  Import  Org. Apache. hadoop. mapreduce. job;
  Import  Org. Apache. hadoop. mapreduce. mapper;
  Import Org. Apache. hadoop. mapreduce. Cer CER;
  Import  Org. Apache. hadoop. mapreduce. Mapper. context;
  Import  Org. Apache. hadoop. mapreduce. Lib. Input. fileinputformat;
  Import  Org. Apache. hadoop. mapreduce. Lib. Output. fileoutputformat;
  Import  Org. Apache. hadoop. util. genericoptionsparser;


  Public     Class  Fileoperate {

 Public     Static     Void  Main (string [] ARGs)  Throws  Ioexception, interruptedexception, classnotfoundexception {

Init ();/* initialize the file */

Configuration Conf  =     New  Configuration ();

Job job  =     New Job (Conf,  "  Word Count  "  );
Job. setjarbyclass (fileoperate.  Class  );

Job. setmapperclass (tokenizermapper.  Class  );
Job. setcombinerclass (intsumreducer.  Class  );
Job. setreducerclass (intsumreducer.  Class  );

Job. setoutputkeyclass (text.  Class );
Job. setoutputvalueclass (intwritable.  Class  );

  /*  Set the path of Input and Output  */  
Fileinputformat. addinputpath (job,  New  PATH (  "  HDFS: // copyoftest. c  "  ));
Fileoutputformat. setoutputpath (job,  New PATH (  "  HDFS: // wordcount  "  ));

System. Exit (job. waitforcompletion (  True  )  ?     0  :  1  );
}

  Public     Static    Class  Tokenizermapper
  Extends  Mapper  <  Object, text, text, intwritable  >  {
  Private     Final     Static  Intwritable one  =     New Intwritable (  1  );
  Private  Text word  =     New  Text ();

  Public     Void  Map (Object key, text value, context)  Throws  Ioexception, interruptedexception {

Stringtokenizer itr =     New  Stringtokenizer (value. tostring ());
  While  (Itr. hasmoretokens ()){
Word. Set (itr. nexttoken ());
Context. Write (word, one );
}
}
}

  Public     Static     Class  Intsumreducer
  Extends  Reducer  < Text, intwritable, text, intwritable  >  {
  Private  Intwritable result  =     New  Intwritable ();

  Public     Void  Reduce (Text key, iterable  <  Intwritable  > Values, context)
  Throws  Ioexception, interruptedexception {

  Int  Sum  =     0  ;
  For  (Intwritable VAL: values ){
Sum  + =  Val. Get ();
}
Result. Set (SUM );
Context. Write (Key, result );
}
}

  Public    Static     Void  Init ()  Throws  Ioexception {

  /*  Copy local file to HDFS  */  
Configuration config  =     New  Configuration ();
Filesystem HDFS  =    Null  ;
String srcfile  =     "  /Test. c  "  ;
String dstfile  =     "  HDFS: // copyoftest. c  "  ;
System. Out. Print (  " Copy success! \ N  "  );
HDFS  =  Filesystem. Get (config );
Path srcpath  =     New  PATH (srcfile );
Path dstpath  =     New  PATH (dstfile );
HDFS. copyfromlocalfile (srcpath, dstpath );

String filename  =    "  HDFS: // copyoftest. c  "  ;
Path  =     New  PATH (filename );
Filestatus  =  Null  ;

Filestatus  =  HDFS. getfilestatus (PATH );
System. Out. println (filestatus. getblocksize ());

Filesystem FS  = Filesystem. Get (config );
Distributedfilesystem hdfs1  =  (Distributedfilesystem) FS;
Datanodeinfo [] datanodestats  =  Hdfs1.getdatanodestats ();

  /*  Create a file on HDFS  */  
Path outputpath  =     New  PATH (  " HDFS: // output/listofdatanode  "  );
Fsdataoutputstream outputstream  =  HDFS. Create (outputpath );

String [] Names  =     New  String [datanodestats. Length];
  For  (  Int  I  =    0  ; I  <  Datanodestats. length; I  ++  ){
Names [I]  =  Datanodestats [I]. gethostname ();  /*  Get the list of datanodes  */  
System. Out. println (Names [I]);
  /*  Write the list of datanodes to file on HDFS */  
Outputstream. Write (Names [I]. getbytes (),  0  , Names [I]. Length ());
}
}

}

 Running result:

 [Root @ master bin] # hadoop jar HDFS. Jar com. fora. fileoperate Copy success! 67108864masterslave111/07/21 15:45:23 warn mapred. jobclient: Use genericoptionsparser for parsing the arguments. applications shocould implement tool for the same.11/07/21 15:45:23 info input. fileinputformat: total input paths to process: 111/07/21 15:45:23 info mapred. jobclient: running job: job_20110721_17_000311/07/21 15:45:24 info mapred. jobclient: Map 0% reduce 0% 11/07/21 15:45:31 info mapred. jobclient: Map 100% reduce 0% 11/07/21 15:45:43 info mapred. jobclient: Map 100% reduce 100% 11/07/21 15:45:45 info mapred. jobclient: job complete: job_20110721_17_000311/07/21 15:45:45 info mapred. jobclient: counters: 1711/07/21 15:45:45 info mapred. jobclient: Job counters 11/07/21 15:45:45 info mapred. jobclient: Launched reduce tasks = 111/07/21 15:45:45 info mapred. jobclient: rack-local map tasks = 111/07/21 15:45:45 info mapred. jobclient: Launched map tasks = 111/07/21 15:45:45 info mapred. jobclient: filesystemcounters11/07/21 15:45:45 info mapred. jobclient: file_bytes_read = 22811/07/21 15:45:45 info mapred. jobclient: hdfs_bytes_read = 12611/07/21 15:45:45 info mapred. jobclient: file_bytes_written = 48811/07/21 15:45:45 info mapred. jobclient: hdfs_bytes_written = 14611/07/21 15:45:45 info mapred. jobclient: Map-Reduce framework11/07/21 15:45:45 info mapred. jobclient: Reduce input groups = 1911/07/21 15:45:45 info mapred. jobclient: Combine output records = 1911/07/21 15:45:45 info mapred. jobclient: Map input records = 811/07/21 15:45:45 info mapred. jobclient: reduce shuffle bytes = 22811/07/21 15:45:45 info mapred. jobclient: reduce output records = 1911/07/21 15:45:45 info mapred. jobclient: spilled records = 3811/07/21 15:45:45 info mapred. jobclient: map output bytes = 21111/07/21 15:45:45 info mapred. jobclient: Combine input records = 2211/07/21 15:45:45 info mapred. jobclient: map output records = 2211/07/21 15:45:45 info mapred. jobclient: Reduce input records = 19[Root @ master bin] # hadoop DFS-ls/ Found 6 items-RW-r -- 1 root supergroup 126/copyoftest. c-RW-r -- 1 root supergroup 26/listofdatanodedrwxr-XR-X-root supergroup 0/output-RW-r -- 1 root supergroup 10400 /test.txt drwxr-XR-X-root supergroup 0/tmpdrwxr-XR-X-root supergroup 0/wordcount [Root @ master bin] # hadoop DFS-ls/wordcount Found 2 itemsdrwxr-XR-X-root supergroup 0/wordcount/_ logs-RW-r -- 1 root supergroup 146/wordcount/part-r-00000 [Root @ master bin] # hadoop DFS-CAT/wordcount/part-r-00000 2011-07-21 1 file 1 hadoop 1 system! 1A 1 aimed 1at 1 coping 1 file 3 from 1 from: fora 1is 1 Local 1 System 1 Thank 1The 1 This 2to 1you! 1 [root @ master bin] #

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Hadoop: the second program operates HDFS-> [get datanode name] [Write File] [wordcount count]

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Hadoop: the second program operates HDFS-> [get datanode name] [Write File] [wordcount count]

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support