Tag: Hive performs glib traversal file HDF. Text HDFs catch MitM
The Hadoop API provides some API for traversing files through which the file directory can be traversed:
Importjava.io.FileNotFoundException;Importjava.io.IOException;ImportJava.net.URI;Importjava.util.ArrayList;Importjava.util.Arrays;Importjava.util.List;ImportJava.util.concurrent.CountDownLatch;Importorg.apache.hadoop.conf.Configuration;ImportOrg.apache.hadoop.fs.FileStatus;ImportOrg.apache.hadoop.fs.FileSystem;ImportOrg.apache.hadoop.fs.Path; Public classBatchsubmitmain { Public Static voidMain (string[] args)throwsException {String mrtablename= Args[0]; String Fglibtablename= Args[1]; Configuration conf=NewConfiguration (); /** <property> <name>fs.defaultFS</name> <value>hdfs://hcluster</value> * </property>*/Conf.set ("Fs.defaultfs", "Hdfs://hcluster"); FileSystem FileSystem=filesystem.get (conf); String Mrfilepath= "/myuser/hivedb/" +Mrtablename; String Fglibfilepath= "/myuser/hivedb/" +Fglibtablename; System.out.println (Mrfilepath); List<String> Mrobjectiditems =Getobjectiditems (FileSystem, Mrfilepath); System.out.println (Fglibfilepath); List<String> Fglibobjectiditems =Getobjectiditems (FileSystem, Fglibfilepath); List<String> Objectiditems =NewArraylist<>(); for(String mrobjectid:mrobjectiditems) { for(String fglibobjectid:fglibobjectiditems) {if(Mrobjectid = =Fglibobjectid) {Objectiditems.add (Mrobjectid); }}} String Submitshpath= "/app/myaccount/service/submitsparkjob.sh"; Countdownlatch threadsignal=NewCountdownlatch (Objectiditems.size ()); for(intII = 0; II < objectiditems.size (); ii++) {String objectId=Objectiditems.get (ii); Thread Thread=NewImportthread (ObjectId, Submitshpath, threadsignal); Thread.Start (); } threadsignal.await (); System.out.println (Thread.CurrentThread (). GetName ()+ "complete"); } Private StaticList<string> Getobjectiditems (FileSystem FileSystem, String FilePath)throwsFileNotFoundException, IOException {List<String> Objectitems =NewArraylist<>(); Path Path=NewPath (FilePath); //get a list of filesfilestatus[] Files =filesystem.liststatus (path); //Display file Information for(inti = 0; i < files.length; i++) { Try { if(Files[i].isdirectory ()) {string[] Fileitems= Files[i].getpath (). GetName (). Split ("/"); String objectId= Fileitems[fileitems.length-1].replace ("objectid=", "" "); Objectitems.add (OBJECTID); System.out.println (OBJECTID); } } Catch(Exception e) {e.printstacktrace (); } } returnObjectitems; } /** * @paramHDFs * FileSystem Object *@parampath * file Paths*/ Public Static voiditeratorshowfiles (FileSystem HDFs, path path) {Try { if(HDFs = =NULL|| Path = =NULL) { return; } //get a list of filesfilestatus[] Files =hdfs.liststatus (path); //Display file Information for(inti = 0; i < files.length; i++) { Try { if(Files[i].isdirectory ()) {System.out.print (">>>" + files[i].getpath () + ", dir owner:" +Files[i].getowner ()); //Recursive invocationiteratorshowfiles (HDFs, Files[i].getpath ()); } Else if(Files[i].isfile ()) {System.out.print ("+ files[i].getpath () +", Length: "+ files[i].getlen () +", Owner: "+Files[i].getowner ()); } } Catch(Exception e) {e.printstacktrace (); } } } Catch(Exception e) {e.printstacktrace (); } }}
Threads that execute sh in parallel:
ImportJava.util.concurrent.CountDownLatch; Public classImportthreadextendsThread {Private FinalJavashellinvoker Javashellinvoker =NewJavashellinvoker (); PrivateCountdownlatch Countdownlatch; PrivateString objectId; PrivateString Submitshpath; PublicImportthread (String objectId, String submitshpath, Countdownlatch countdownlatch) { This. ObjectId =objectId; This. Submitshpath =Submitshpath; This. Countdownlatch =Countdownlatch; } @Override Public voidrun () {System.out.println (Thread.CurrentThread (). GetName ( )+ "Start ... " + This. Submitshpath + "" + This. objectid.tostring ());//Print start tag Try { intresult = This. Javashellinvoker.executeshell ("Mrraster", This. Submitshpath, This. objectId); if(Result! = 0) {System.out.println (Thread.CurrentThread (). GetName ()+ "Result type is Error"); } } Catch(Exception e) {e.printstacktrace (); System.out.println (Thread.CurrentThread (). GetName ()+ "-error:" +e.getmessage ()); } This. Countdownlatch.countdown ();//Timer minus 1System.out.println (Thread.CurrentThread (). GetName () + "Complete,last" + This. Countdownlatch.getcount () + "threads");//Print end tag }}
Java code to execute SH:
ImportJava.io.File;ImportJava.text.SimpleDateFormat;Importjava.util.Date; Public classJavashellinvoker {Private Static FinalString executeshelllogfile = "./executeshell_%s_%s.log"; Public intExecuteshell (String Shellcommandtype, String Shellcommand, String args)throwsException {intSuccess = 0; Args= (Args = =NULL) ? "": args; String Now=NewSimpleDateFormat ("Yyyy-mm-dd"). Format (NewDate ()); File LogFile=NewFile (String.Format (Executeshelllogfile, Shellcommandtype, now)); Processbuilder PB=NewProcessbuilder ("sh", Shellcommand, args); Pb.redirectoutput (ProcessBuilder.Redirect.appendTo (logFile)); Pb.redirecterror (ProcessBuilder.Redirect.appendTo (logFile)); Process PID=NULL; Try{pid=Pb.start (); Success=pid.waitfor (); } Catch(Exception ex) {success= 2; System.out.println ("Executeshell-error:" +ex.getmessage ()); Throwex; } finally { if(Pid.isalive ()) {Success=Pid.exitvalue (); Pid.destroy (); } } returnsuccess; }}
submitsparkjob.sh
#!/bin/Shsource. /Login.shspark-submit--master yarn-cluster-class Mysparkjobmainclass--driver-class- Path/app/myaccount/service/jars/ojdbc7.jar--jars/app/myaccount/service/jars/ojdbc7.jar--num-20-- Driver-memory 6g--executor-cores 1--executor-memory 8g Mysparkjobjar.jar $
command to execute Batchsubmit.jar:
Hadoop jar Batchsubmit.jar
Hadoop API: Traverse the file partition directory and submit the spark task in parallel according to the data in the directory