Java Invoke API operation HDFs
This article describes the Java call API to read data from HDFs
Package Mongodb;import Java.io.bufferedreader;import Java.io.ioexception;import java.io.inputstreamreader;import Java.net.uri;import Java.util.arrays;import Org.apache.commons.lang.stringutils;import Org.apache.hadoop.conf.configuration;import Org.apache.hadoop.fs.fsdatainputstream;import Org.apache.hadoop.fs.filestatus;import Org.apache.hadoop.fs.filesystem;import Org.apache.hadoop.fs.Path;import Org.apache.hadoop.io.compress.compressioncodec;import org.apache.hadoop.io.compress.CompressionCodecFactory; Import Org.apache.hadoop.io.compress.compressioninputstream;import Org.apache.hadoop.util.reflectionutils;class Item implements comparable{string value;double Weight;public item (String v) {value=v;weight = Double.parsedouble ( Value.split (":") [1]);} public int compareTo (Object o) {return this.weight = = ((Item) o). Weight? 0: (This.weight > (It EM) o). Weight? -1:1); }}public class Batchupdatesim {public static string parse (String str) {//string str= "9000320718001:1.0,2077635:1.0,2053809:1.0 "; String[] Fields=str.split ("\ t"); String[] valuearray= fields[1].split (","); item[] items = new Item[valuearray.length];for (int i=0;i<items.length;i+ +) {items[i]=new Item (valuearray[i]);} Arrays.sort (items), for (int i=0;i<valuearray.length;i++) {Valuearray[i] = "{" +items[i].value+ "}";} String valuestr = Stringutils.join (ValueArray, ","); return "{\" key\ ":" +fields[0]+ ", \" values\ ": [" +valuestr+ "]}";} public static void Main (string[] args) throws IOException, ClassNotFoundException {//TODO auto-generated method Stubconfi Guration conf = new Configuration (); Conf.addresource ("/usr/local/hadoop/conf/core-site.xml"); Conf.addresource ("/usr/local/hadoop/conf/hdfs-site.xml"); Conf.addresource ("/usr/local/hadoop/conf/mapred-site.xml");//string hdfs= "Hdfs://webdm-cluster";//String HDFS= " hdfs://localhost:9000 "; String Hdfs=args[0]; FileSystem HDFs = Filesystem.get (Uri.create (HDFS), conf); String Filepath=args[1]; Fsdatainputstream fin = hdfs.open (new Path (FilepATH));//compressioncodecfactory factory = new Compressioncodecfactory (conf); Compressioncodec codec = Factory.getcodec (new Path (FilePath)); automatically identify//class<?> codecclass = Class.forName ("Com.hadoop.compression.lzo.LzoCodec") according to the suffix type of the HDFs file;// Compressioncodec codec = (COMPRESSIONCODEC) reflectionutils.newinstance (codecclass, conf); BufferedReader reader = null; String Line;int count=0; Recsysdb db = Recsysdb.getinstance (); String Itemsimname=args[2];try {//if (codec = = null)//{reader = new BufferedReader (new InputStreamReader (Fin)) ; in = new BufferedReader (new InputStreamReader (Fin, "UTF-8")); }/* Else {System.out.println ("identify the compression type"); Compressioninputstream Cominputstream = Codec.createinputstream (Fin); reader = new BufferedReader (new InputStreamReader (Cominputstream)); } * * * (line = Reader.readline ())! = null) {//if (count==0) System.out.println (line); String Strjson=parse (line);d B.updateitemsim (Itemsimname, Strjson); couNt++;if (count%1000==0) System.out.println ("Count:" +count);} finally {if (reader! = null) {Reader.close ();} System.out.println (count);}}}
The following is a shell script that executes the above program with Java commands and requires a variety of Hadoop-related jar packages to be loaded. (PS: Later found that there is a better way to do it with the Hadoop command, because the Hadoop command executes, Hadoop itself will load some jar package, no need to manually load)
Hd_core= "/usr/local/hadoop/hadoop-core-1.1.2.jar"; s4j= "/usr/local/hadoop/lib/slf4j-log4j12-1.6.1.jar"; s4japi= " /usr/local/hadoop/lib/slf4j-api-1.6.1.jar "; log4j="/usr/local/hadoop/lib/log4j-1.2.17.jar "guva="/usr/local/ Hadoop/lib/guava-11.0.2.jar "; clog="/usr/local/hadoop/lib/commons-logging-1.1.1.jar "cconf="/usr/local/hadoop/ Lib/commons-configuration-1.6.jar "; cl="/usr/local/hadoop/lib/commons-lang-2.5.jar "ccli="/usr/local/hadoop/lib /commons-cli-1.2.jar ";p rotobuf="/usr/local/hadoop/lib/protobuf-java-2.4.0a.jar "hdfs="/usr/local/hadoop/lib/ Hadoop-hdfs-1.1.2.jar "; mongodb="/data/home/liulian/linger/jars/mongo-java-driver-2.12.4.jar "libs= ($HD _core$ S4J$S4JAPI$LOG4J$GUVA$CLOG$CCONF$CL$CCLI$PROTOBUF$HDFS$MONGODB) libstr= "" For Jarlib in ${libs[@]};d o libstr=${ Jarlib} ":" ${libstr}doneecho $libstr; java-xbootclasspath/a:${libstr}-jar. /jars/updatesim.jar hdfs://10.200.91.164:9000 Tv_sim/result/000000_0 Tvitemsimcoll
This article link: http://blog.csdn.net/lingerlanlan/article/details/42178675 This article linger
Java Invoke API Operation HDFs