Java URL download file get suffix name
Web site download, URL resource URLs are diverse, not necessarily in the link directly to get the suffix name, this article synthesizes a variety of ways to get suffix name, for reference: through the URL suffix directly to get Through the URL response header in the content-disposition (matching filename) through the URL response header contenttype Match contenttype through header byte matching via mimetype in Org.apache.tika package (matching according to file content)
Import java.util.ArrayList;
Import Java.util.HashMap;
Import Java.util.Iterator;
Import java.util.List;
Import Java.util.Map;
Import Java.util.Map.Entry;
Import Org.apache.commons.lang.ArrayUtils;
Import Org.apache.commons.lang.StringUtils;
Import Org.apache.tika.mime.MimeType;
Import Org.apache.tika.mime.MimeTypes;
/** * @author GONGML * */public class Linkhelper {private static map<string, String> Types;//contenttype Collection private static list<string> extensions = new arraylist<string> ()//suffix collection public final static MAP<STR ing, string> file_type_map = new hashmap<string, string> ();//file header set static {Getallfiletype ();//initialization File type information initcontenttype (); Initialization of ContentType initextentsion ();
Initialization file suffix}/** * @param URL * @param disposition * @param contentType * @param bytearray * @return * @author GONGML * @DateTime September 7, 2017 morning 10:05:00 * @Desc get file suffix */public staTic string getfiletype (string URL, string disposition, string contenttype,byte[] bytearray) {string ext = null;
ext = gettypebyextenssion (URL);
if (ext!= null) return ext;
ext = gettypebydisposition (disposition);
if (ext!= null) return ext;
ext = Gettypebycontenttype (contentType);
if (ext!= null) return ext;
ext = Gettypebymimetype (contentType);
if (ext!= null) return ext;
ext = Getfiletypebystream (ByteArray);
if (ext!= null) return ext;
Return ". html"; /** * @param b * @return * @author GONGML * @DateTime September 7, 2017 morning 10:05:22 * @Desc via file header
Get file suffix * * private static String Getfiletypebystream (byte[] b) {if (Arrayutils.isnotempty (b)) {
B=arrayutils.subarray (b, 0, 50);
String Filetypehex = string.valueof (getfilehexstring (b)); Iterator<entry<string, string>> entryiterator = File_type_map.entryset (). iterator ();
while (Entryiterator.hasnext ()) {entry<string, string> Entry = Entryiterator.next ();
String Filetypehexvalue = Entry.getvalue ();
if (Filetypehex.touppercase (). StartsWith (Filetypehexvalue)) {return Entry.getkey ();
}} return null; /** * @param b * @return * @author GONGML * @DateTime September 7, 2017 morning 10:06:10 * @Desc byte[] Convert to 16 binary string */private static string getfilehexstring (byte[] b) {StringBuilder StringBuilder = new STRINGB
Uilder ();
if (b = = NULL | | b.length <= 0) {return null;
for (int i = 0; i < b.length i++) {int v = b[i] & 0xFF;
String HV = integer.tohexstring (v);
if (Hv.length () < 2) {stringbuilder.append (0); } stringbuilder.append (HV);
return stringbuilder.tostring (); /** * @param linkurl * @return * @author GONGML * @DateTime September 7, 2017 morning 10:06:42 * @Desc Tong
The suffix name of the link to determine the type of file * * public static string gettypebyextenssion (String linkurl) {if (Linkurl = null)
return null;
Linkurl=linkurl.tolowercase ();
for (String ext:extensions) {if (Linkurl.endswith (EXT)) {return ext;
} return null; /** * @param disposition * @return * @author GONGML * @DateTime September 7, 2017 a.m. 10:06:48 * @De SC obtains filename and file suffix through disposition in request Header/private static string Gettypebydisposition (string disposition) {string
Ext=null; if (!
Stringutils.isempty (disposition)) {disposition = Stringutils.replace (disposition, "\", "");
string[] STRs = Disposition.split (";"); For (String string:strs) {if (String.tolowercase (). IndexOf ("filename=") >= 0) {E
XT = Stringutils.substring (String, String.LastIndexOf ("."));
Break
}} return ext; /** * @param contentType * @return * @author GONGML * @DateTime September 7, 2017 a.m. 10:07:28 * @De SC gets the file suffix according to ContentType * * private static string Gettypebycontenttype (String contentType) {if (Types.conta
Inskey (ContentType)) return Types.get (ContentType);
return null; /** * @param contentType * @return * @author GONGML * @DateTime September 7, 2017 a.m. 10:07:51 * @De SC uses mimetype to judge contentType corresponding file suffix * * private static String Gettypebymimetype (String contentType) {string
Ext=null;
try {mimetypes mimetypes = Mimetypes.getdefaultmimetypes (); MimeType mimetype = Mimetypes.forname (contenTtype);
ext = mimetype.getextension ();
if (stringutils.isempty (EXT)) {ext=null;
The catch (Exception e) {} return ext; ///The file header is used to determine the occurrence of duplicates private static void Getallfiletype () {file_type_map.put (". pdf", "255044462d312e"); Adobe Acrobat (pdf) File_type_map.put (". Doc", "d0cf11e0"); MS Word file_type_map.put (". xls", "d0cf11e0"); MS Excel Note: Word is as file_type_map.put as the file header of Excel (". jpg", "ffd8ff"); JPEG (jpg) file_type_map.put (". png", "89504E47"); PNG (PNG) File_type_map.put (". gif", "47494638"); GIF (GIF) file_type_map.put (". tif", "49492a00"); TIFF (TIF) File_type_map.put (". bmp", "424D"); Windows Bitmap (BMP) File_type_map.put (". dwg", "41433130"); CAD (DWG) file_type_map.put (". html", "68746d6c3e"); HTML (HTML) file_type_map.put (". rtf", "7b5c727466"); Rich Text Format (RTF) FIle_type_map.put (". xml", "3c3f786d6c"); File_type_map.put (". zip", "504b0304");
The docx file header is the same as the zip File_type_map.put (". rar", "52617221"); File_type_map.put (". PSD", "38425053"); Photoshop (PSD) file_type_map.put (". eml", "44656c69766572792d646174653a"); Email file_type_map.put (". dbx", "cfad12fec5fd746f"); Outlook Express (DBX) file_type_map.put (". pst", "2142444E"); Outlook (PST) file_type_map.put (". mdb", "5374616e64617264204a"); MS Access (MDB) file_type_map.put (". WPD", "FF575043");
WordPerfect (WPD) File_type_map.put (". EPs", "252150532d41646f6265");
File_type_map.put ("PS", "252150532d41646f6265"); File_type_map.put (". Qdf", "ac9ebd8f"); Quicken (QDF) File_type_map.put (". PWL", "E3828596"); Windows Password (PWL) file_type_map.put (". wav", "57415645");
Wave (WAV) file_type_map.put (". avi", "41564920"); File_type_map.put (". Ram", "2E7261FD"); ReaL Audio (RAM) File_type_map.put (". RM", "2e524d46"); Real Media (RM) File_type_map.put (". mpg", "000001BA"); File_type_map.put (". mov", "6d6f6f76"); Quicktime (MOV) file_type_map.put (". asf", "3026b2758e66cf11"); Windows Media (ASF) File_type_map.put (". Mid", "4d546864"); MIDI (Mid)}//corresponding HTTP contenttype private static void Initcontenttype () {types = new hashmap< ;
String, string> ();
Types.put ("Application/pdf", ". pdf");
Types.put ("Application/msword", ". Doc");
Types.put ("Text/plain", ". txt");
Types.put ("Application/x-xls", ". xls");
Types.put ("Application/-excel", ". xls");
Types.put ("text/html", ". html");
Types.put ("Application/x-rtf", ". rtf");
Types.put ("message/rfc822", ". mht");
Types.put ("Application/x-ppt", ". ppt");
Types.put ("Image/jpeg", ". jpg"); Types.put ("application/vnd.openxmlformats-officedocument.wordprocessingMl.template ",". docx ");
Types.put ("Application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ". xlsx");
Types.put ("Application/vnd.openxmlformats-officedocument.presentationml.presentation", ". pptx");
Types.put ("message/rfc822", ". eml");
Types.put ("Application/xml", ". xml");
}//Custom needs to match the file suffix of the link, the unsatisfied can add private static void Initextentsion () {Extensions.add (". pdf");
Extensions.add (". Doc");
Extensions.add (". txt");
Extensions.add (". xls");
Extensions.add (". html");
Extensions.add (". rtf");
Extensions.add (". mht");
Extensions.add (". rar");
Extensions.add (". ppt");
Extensions.add (". jpg");
Extensions.add (". docx");
Extensions.add (". xlsx");
Extensions.add (". pptx");
Extensions.add (". eml");
Extensions.add (". zip");
Extensions.add (". docm");
Extensions.add (". xlsm");
Extensions.add (". xlsb"); Extensions. Add (". Dotx");
Extensions.add (". csv"); }
}