Package Main.java;
Import java.io.*;
Import java.util.LinkedList;
Import java.util.List;
Import java.util.zip.*;
Import org.apache.commons.compress.archivers.ArchiveException;
Import Org.apache.commons.compress.archivers.ArchiveInputStream;
Import Org.apache.commons.compress.archivers.ArchiveStreamFactory;
Import Org.apache.commons.compress.archivers.tar.TarArchiveEntry;
Import java.io.IOException;
Import Java.net.URI;
Import Org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
Import org.apache.hadoop.conf.Configuration;
Import Org.apache.hadoop.fs.FileSystem;
Import Org.apache.hadoop.fs.Path;
Import Org.apache.hadoop.fs.FSDataInputStream;
import Org.apache.hadoop.fs.FSDataOutputStream;;
/**
* Unzip the tar.gz zip GZ package where the data source and output directory are HDFs
*
*/
public class Gziphdfs {
Private Bufferedoutputstream Bufferedoutputstream;
String zipfilename = null;
Public Gziphdfs (String fileName) {
This.zipfilename = FileName;
}
/*
* Execute the Portal, Rarfilename for the file path to be extracted (specific to the file), Destdir to extract the target path path to HDFs
*/
Public list<string> Untargzfile (string rarfilename, String destDir) throws IOException {
Gziphdfs Gziphdfs = new Gziphdfs (rarfilename);
Configuration conf = new configuration ();
FileSystem fs = Filesystem.get (Uri.create (DestDir), conf);
Boolean result = Fs.isdirectory (new Path (DestDir));
if (!result) {
Fs.mkdirs (New Path (DestDir));
}
String outputdirectory = DestDir;
list<string> r = gziphdfs.defuntargzfile (OutputDirectory, FS);
Fs.close ();
return R;
}
Public list<string> defuntargzfile (String outputdirectory, FileSystem FS) {
FileInputStream FIS = null;
Archiveinputstream in = null;
Bufferedinputstream bufferedinputstream = null;
list<string> tarlist = new linkedlist<string> ();
try {
Fsdatainputstream Hdfsinputstream = Fs.open (new Path (Zipfilename));
Gzipinputstream is = new Gzipinputstream (New Bufferedinputstream (
Hdfsinputstream));
in = new Archivestreamfactory (). Createarchiveinputstream ("Tar", is);
Bufferedinputstream = new Bufferedinputstream (in);
Tararchiveentry entry = (tararchiveentry) in.getnextentry ();
while (entry! = NULL) {
String name = Entry.getname ();
string[] names = Name.split ("/");
String fileName = outputdirectory;
for (int i = 0; i < names.length; i++) {
String str = names[i];
filename = filename + "/" + str;
}
Fsdataoutputstream Hdfsoutstream = fs.create (new Path (FileName));
Bufferedoutputstream = new Bufferedoutputstream (
Hdfsoutstream);
int b;
while ((b = Bufferedinputstream.read ())! =-1) {
Bufferedoutputstream.write (b);
}
Bufferedoutputstream.flush ();
Bufferedoutputstream.close ();
Entry = (tararchiveentry) in.getnextentry ();
Tarlist.add (name);
}
} catch (FileNotFoundException e) {
E.printstacktrace ();
} catch (IOException e) {
E.printstacktrace ();
} catch (Archiveexception e) {
E.printstacktrace ();
} finally {
try {
if (bufferedinputstream! = null) {
Bufferedinputstream.close ();
}
} catch (IOException e) {
E.printstacktrace ();
}
}
return tarlist;
}
/*
* Execute the Portal, Rarfilename for the file path to be extracted (specific to the file), Destdir to extract the target path path to HDFs
*/
Public list<string> Unzipfile (string rarfilename, String destDir) throws IOException {
Gziphdfs Gziphdfs = new Gziphdfs (rarfilename);
Configuration conf = new configuration ();
FileSystem fs = Filesystem.get (Uri.create (DestDir), conf);
Boolean result = Fs.isdirectory (new Path (DestDir));
if (!result) {
Fs.mkdirs (New Path (DestDir));
}
String outputdirectory = DestDir;
list<string> r = gziphdfs.defunzipfile (OutputDirectory, FS);
Fs.close ();
return R;
}
Public list<string> defunzipfile (String outputdirectory, FileSystem FS) {
FileInputStream FIS = null;
Archiveinputstream in = null;
Bufferedinputstream bufferedinputstream = null;
list<string> ziplist = new linkedlist<string> ();
try {
Fsdatainputstream Hdfsinputstream = Fs.open (new Path (Zipfilename));
Zipinputstream is = new Zipinputstream (New Bufferedinputstream (
Hdfsinputstream));
Bufferedinputstream = new Bufferedinputstream (IS);
ZipEntry entry =is.getnextentry ();
while (entry! = NULL) {
String name = Entry.getname ();
string[] names = Name.split ("/");
String fileName = outputdirectory;
for (int i = 0; i < names.length; i++) {
String str = names[i];
filename = filename + "/" + str;
}
Fsdataoutputstream Hdfsoutstream = fs.create (new Path (FileName));
Bufferedoutputstream = new Bufferedoutputstream (
Hdfsoutstream);
int b;
while ((b = Bufferedinputstream.read ())! =-1) {
Bufferedoutputstream.write (b);
}
Bufferedoutputstream.flush ();
Bufferedoutputstream.close ();
Entry = (zipentry) is.getnextentry ();
Ziplist.add (name);
}
} catch (FileNotFoundException e) {
E.printstacktrace ();
} catch (IOException e) {
E.printstacktrace ();
} finally {
try {
if (bufferedinputstream! = null) {
Bufferedinputstream.close ();
}
} catch (IOException e) {
E.printstacktrace ();
}
}
return ziplist;
}
/*
* Execute the Portal, Rarfilename for the file path to be extracted (specific to the file), Destdir to extract the target path path to HDFs
*/
Public list<string> Ungzipfile (string rarfilename, String destDir) throws IOException {
Gziphdfs Gziphdfs = new Gziphdfs (rarfilename);
Configuration conf = new configuration ();
FileSystem fs = Filesystem.get (Uri.create (DestDir), conf);
Boolean result = Fs.isdirectory (new Path (DestDir));
if (!result) {
Fs.mkdirs (New Path (DestDir));
}
String outputdirectory = DestDir;
list<string> r = gziphdfs.defungzipfile (OutputDirectory, FS);
Fs.close ();
return R;
}
Public list<string> defungzipfile (String outputdirectory, FileSystem FS) {
FileInputStream FIS = null;
Archiveinputstream in = null;
Bufferedinputstream bufferedinputstream = null;
list<string> tarlist = new linkedlist<string> ();
try {
Fsdatainputstream Hdfsinputstream = Fs.open (new Path (Zipfilename));
Gzipcompressorinputstream is = new Gzipcompressorinputstream (New Bufferedinputstream (
Hdfsinputstream));
Bufferedinputstream = new Bufferedinputstream (IS);
string[] NameList = Zipfilename.split ("/");
String name=namelist[namelist.length-1].replace (". Gz", "");
String fileName = outputdirectory+ "/" +name;
Fsdataoutputstream Hdfsoutstream = fs.create (new Path (FileName));
Bufferedoutputstream = new Bufferedoutputstream (
Hdfsoutstream);
int b;
while ((b = Bufferedinputstream.read ())! =-1) {
Bufferedoutputstream.write (b);
}
Bufferedoutputstream.flush ();
Bufferedoutputstream.close ();
Tarlist.add (name);
} catch (FileNotFoundException e) {
E.printstacktrace ();
} catch (IOException e) {
E.printstacktrace ();
} finally {
try {
if (bufferedinputstream! = null) {
Bufferedinputstream.close ();
}
} catch (IOException e) {
E.printstacktrace ();
}
}
return tarlist;
}
}
Post a Java read HDFs unzip the gz zip tar.gz saved to HDFs code