Configuration file
m103 Replace with the HDFs service address.
To use the Java client to access the file on the HDFs, have to say is the configuration file Hadoop-0.20.2/conf/core-site.xml, originally I was here to eat a big loss, so I am not even hdfs, file can not be created, read.
<?xml version= "1.0"?>
<?xml-stylesheet type= "text/xsl" href= "configuration.xsl"?>
< Configuration>
<!---global properties-->
<property>
<name>hadoop.tmp.dir</ name>
<value>/home/zhangzk/hadoop</value>
<description>a Base for other temporary directories.</description>
</property>
<!--file System Properties-->
< property>
<name>fs.default.name</name>
<value>hdfs://linux-zzk-113:9000</ value>
</property>
</configuration>
Configuration item: Hadoop.tmp.dir represents the directory location on the named node where the metadata resides, and for the data node is the directory where the file data resides on the node.
Configuration items: Fs.default.name represents the name of the IP address and port number, the default is file:///, for JAVAAPI, connection HDFs must use the configuration URL address here, for data nodes, data nodes through the URL to access the named node.
Hdfs-site.xml
<?xml version= "1.0" encoding= "UTF-8"?> <!--autogenerated by Cloudera manager--> <configuration> <
Property> <name>dfs.namenode.name.dir</name> <value>file:///mnt/sdc1/dfs/nn</value> </property> <property> <name>dfs.namenode.servicerpc-address</name> <value>m103 :8022</value> </property> <property> <name>dfs.https.address</name> <value> m103:50470</value> </property> <property> <name>dfs.https.port</name> <value> 50470</value> </property> <property> <name>dfs.namenode.http-address</name> < value>m103:50070</value> </property> <property> <name>dfs.replication</name> < value>3</value> </property> <property> <name>dfs.blocksize</name> <value> 134217728</value> </property> <property> <name>dfs.client.use.datanode.hostname</name> <value>false</value> </property> < property> <name>fs.permissions.umask-mode</name> <value>022</value> </property> & lt;property> <name>dfs.namenode.acls.enabled</name> <value>false</value> </property > <property> <name>dfs.block.local-path-access.user</name> <value>cloudera-scm</ value> </property> <property> <name>dfs.client.read.shortcircuit</name> <value> false</value> </property> <property> <name>dfs.domain.socket.path</name> <value& gt;/var/run/hdfs-sockets/dn</value> </property> <property> <name> Dfs.client.read.shortcircuit.skip.checksum</name> <value>false</value> </property> < Property> <name>dfs.client.domain.socket.data.traffic</name> <value>false</value> </property> <property> <name> Dfs.datanode.hdfs-blocks-metadata.enabled</name> <value>true</value> </property> < Property> <name>fs.http.impl</name> <value>com.scistor.datavision.fs.httpfilesystem</
Value> </property> </configuration>
Mapred-site.xml
<?xml version= "1.0" encoding= "UTF-8"?> <!--autogenerated by Cloudera manager--> <configuration> <
Property> <name>mapreduce.job.split.metainfo.maxsize</name> <value>10000000</value> </property> <property> <name>mapreduce.job.counters.max</name> <value>120</value > </property> <property> <name>mapreduce.output.fileoutputformat.compress</name> < value>true</value> </property> <property> <name>
Mapreduce.output.fileoutputformat.compress.type</name> <value>BLOCK</value> </property> <property> <name>mapreduce.output.fileoutputformat.compress.codec</name> <value> org.apache.hadoop.io.compress.snappycodec</value> </property> <property> <name> Mapreduce.map.output.compress.codec</name> <value>org.apache.hadoop.io.compress.snappycodec</ Value&gT </property> <property> <name>mapreduce.map.output.compress</name> <value>true</ value> </property> <property> <name>zlib.compress.level</name> <value>default_ compression</value> </property> <property> <name>mapreduce.task.io.sort.factor</name > <value>64</value> </property> <property> <name>mapreduce.map.sort.spill.percent </name> <value>0.8</value> </property> <property> <name> mapreduce.reduce.shuffle.parallelcopies</name> <value>10</value> </property> <property > <name>mapreduce.task.timeout</name> <value>600000</value> </property> < property> <name>mapreduce.client.submit.file.replication</name> <value>1</value> </ Property> <property> <name>mapreduce.job.reduces</name>
<value>24</value> </property> <property> <name>mapreduce.task.io.sort.mb</name&
Gt <value>256</value> </property> <property> <name>mapreduce.map.speculative</name > <value>false</value> </property> <property> <name>mapreduce.reduce.speculative& lt;/name> <value>false</value> </property> <property> <name> Mapreduce.job.reduce.slowstart.completedmaps</name> <value>0.8</value> </property> < property> <name>mapreduce.jobhistory.address</name> <value>m103:10020</value> </ property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>m103 :19888</value> </property> <property> <name>mapreduce.jobhistory.webapp.https.address< /name> <value>m103:19890</value> </property> <property> <name>mapreduce.jobhistory.admin.address</name> <value>m103:10033</value> </ property> <property> <name>mapreduce.framework.name</name> <value>yarn</value> & lt;/property> <property> <name>yarn.app.mapreduce.am.staging-dir</name> <value>/user </value> </property> <property> <name>mapreduce.am.max-attempts</name> <value> 2</value> </property> <property> <name>yarn.app.mapreduce.am.resource.mb</name> < value>2048</value> </property> <property> <name> Yarn.app.mapreduce.am.resource.cpu-vcores</name> <value>1</value> </property> < property> <name>mapreduce.job.ubertask.enable</name> <value>false</value> </ property> <property> <name>yarn.app.mapreduce.am.command-opts</name> <value>-djava.net.preferipv4stack=true-xmx1717986918</value> </property> <property> <name>
Mapreduce.map.java.opts</name> <value>-Djava.net.preferIPv4Stack=true-Xmx1717986918</value> </property> <property> <name>mapreduce.reduce.java.opts</name> <value>- djava.net.preferipv4stack=true-xmx2576980378</value> </property> <property> <name> yarn.app.mapreduce.am.admin.user.env</name> <value>ld_library_path= $HADOOP _common_home/lib/native:$ java_library_path</value> </property> <property> <name>mapreduce.map.memory.mb</name > <value>2048</value> </property> <property> <name>mapreduce.map.cpu.vcores</ name> <value>1</value> </property> <property> <name>mapreduce.reduce.memory.mb </name> <value>3072</value> </property> <property> <name>mapreduce.reduce.cpu.vcores</name> <value>1</value> </property> <property> < Name>mapreduce.application.classpath</name> <value> $HADOOP _mapred_home/*, $HADOOP _mapred_home/lib /*, $MR 2_classpath, $CDH _hcat_home/share/hcatalog/*, $CDH _hive_home/lib/*,/etc/hive/conf,/opt/cloudera/parcels/
cdh/lib/udps/*</value> </property> <property> <name>mapreduce.admin.user.env</name>
<value>ld_library_path= $HADOOP _common_home/lib/native: $JAVA _library_path</value> </property> <property> <name>mapreduce.shuffle.max.connections</name> <value>80</value> </
Property> </configuration>
uses JAVAAPI to access HDFs files and directories
Package Com.demo.hdfs;
Import Java.io.BufferedInputStream;
Import Java.io.FileInputStream;
Import java.io.FileNotFoundException;
Import Java.io.FileOutputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import Java.io.OutputStream;
Import Java.net.URI;
Import org.apache.hadoop.conf.Configuration;
Import Org.apache.hadoop.fs.FSDataInputStream;
Import Org.apache.hadoop.fs.FSDataOutputStream;
Import Org.apache.hadoop.fs.FileStatus;
Import Org.apache.hadoop.fs.FileSystem;
Import Org.apache.hadoop.fs.Path;
Import Org.apache.hadoop.io.IOUtils;
Import org.apache.hadoop.util.Progressable;
/** * @author ZHANGZK * */public class Filecopytohdfs {public static void main (string[] args) throws Exception {
try {//uploadtohdfs ();
Deletefromhdfs ();
Getdirectoryfromhdfs ();
Appendtohdfs ();
Readfromhdfs ();
catch (Exception e) {//TODO auto-generated catch block E.printstacktrace ();
finally {System.out.println ("SUCCESS"); }/** upload file to HDFs go up * *
private static void Uploadtohdfs () throws filenotfoundexception,ioexception {String localsrc = "D://qq.txt";
String DST = "Hdfs://192.168.0.113:9000/user/zhangzk/qq.txt";
InputStream in = new Bufferedinputstream (new FileInputStream (LOCALSRC));
Configuration conf = new Configuration ();
FileSystem fs = Filesystem.get (Uri.create (DST), conf);
OutputStream out = fs.create (new Path (DST), new progressable () {public void progress () {System.out.print (".");
}
});
Ioutils.copybytes (in, out, 4096, true); /** read file from HDFs/private static void Readfromhdfs () throws Filenotfoundexception,ioexception {String DST = "HDFs
://192.168.0.113:9000/user/zhangzk/qq.txt ";
Configuration conf = new Configuration ();
FileSystem fs = Filesystem.get (Uri.create (DST), conf);
Fsdatainputstream Hdfsinstream = Fs.open (new Path (DST));
OutputStream out = new FileOutputStream ("D:/qq-hdfs.txt");
byte[] Iobuffer = new byte[1024];
int readlen = Hdfsinstream.read (Iobuffer); while (-1!)= Readlen) {out.write (iobuffer, 0, Readlen);
Readlen = Hdfsinstream.read (Iobuffer);
} out.close ();
Hdfsinstream.close ();
Fs.close (); /** append content to the end of the file on HDFs; Note: File updates, you need to add <property><name>dfs.append.support</in Hdfs-site.xml name><value>true</value></property>*/private static void Appendtohdfs () throws
filenotfoundexception,ioexception {String DST = "Hdfs://192.168.0.113:9000/user/zhangzk/qq.txt";
Configuration conf = new Configuration ();
FileSystem fs = Filesystem.get (Uri.create (DST), conf);
Fsdataoutputstream out = Fs.append (new Path (DST));
int Readlen = "Zhangzk add by HDFs Java API". GetBytes (). length;
while ( -1!= readlen) {out.write ("Zhangzk add by HDFs Java API". GetBytes (), 0, Readlen);
} out.close ();
Fs.close (); /** Delete file from HDFs/private static void Deletefromhdfs () throws Filenotfoundexception,ioexception {String DST = "HDFs
://192.168.0.113:9000/user/zhangzk/qq-bak.txt "; Configuration conf = new ConfiguRation ();
FileSystem fs = Filesystem.get (Uri.create (DST), conf);
Fs.deleteonexit (New Path (DST));
Fs.close (); /** traverse HDFS Files and directories * * private static void Getdirectoryfromhdfs () throws Filenotfoundexception,ioexception {String ds
t = "HDFS://192.168.0.113:9000/USER/ZHANGZK";
Configuration conf = new Configuration ();
FileSystem fs = Filesystem.get (Uri.create (DST), conf);
Filestatus filelist[] = fs.liststatus (new Path (DST));
int size = Filelist.length; for (int i = 0; i < size; i++) {System.out.println ("name:" + Filelist[i].getpath (). GetName () + "/t/tsize:" + filelist[i
].getlen ());
} fs.close ();
}
}
Note: for append operations, starting with the hadoop-0.21 version does not support, the append operation can refer to a document on Javaeye.