Package Org.shefron.fc.utfwithbom;import Java.io.fileinputstream;import Java.io.ioexception;import Java.io.inputstream;import Java.io.pushbackinputstream;public class Utffilehandler {/** * * @param file The FilePath * @re Turn the FileInputStream * @throws Exception */public static InputStream getinputstream (String file) throws Exception{file InputStream FIS = Null;try{fis = new FileInputStream (file);} catch (Exception e) {System.out.println (E.getmessage ()); throw new Exception ("IO Stream error!");} return FIS;} /** * * @param file The FilePath * @param enc the default encoding * @return The Utffilehandler.unicodeinputstream * @thr oWS Exception */public static InputStream Getinputstreamwithoutbom (String file,string enc) throws exception{ Unicodeinputstream stream = Null;try{fileinputstream fis = new FileInputStream (file); stream = new Unicodeinputstream (FIS , null); System.out.println ("Encoding:" +stream.getencoding ());} catch (Exception e) {System.out.println (E.getmessage ()); throw new Exception ("IOStream error! ");} return stream;} /** * This inputstream would recognize Unicode BOM marks and would skip bytes if * getencoding () method is called before any Of the Read (...) methods. * Usage pattern:string enc = "iso-8859-1"; or NULL to use Systemdefault * fileinputstream fis = new FileInputStream (file); Unicodeinputstream UIn = new * Unicodeinputstream (FIS, ENC); ENC = uin.getencoding (); Check and Skip * possible BOM bytes InputStreamReader in; if (enc = = null) in = new * InputStreamReader (UIn); else in = new InputStreamReader (UIn, ENC); */public Static class Unicodeinputstream extends InputStream {pushbackinputstream Internalin; Boolean isinited = false; String Defaultenc; String encoding; private static final int bom_size = 4; Public Unicodeinputstream (InputStream in, String defaultenc) {Internalin = new Pushbackinputstream (in, bom_size); This.defaultenc = Defaultenc; } public String getdefaultencoding () {return defaultenc; } Public String getencoding () {if (!isinited) {try {init (); } catch (IOException ex) {illegalstateexception ise = new IllegalStateException ("I Nit method failed. "); Ise.initcause (ISE); Throw Ise; }} return encoding; }/** * Read-ahead four bytes and check for BOM marks. Extra Bytes is unread * The stream, only BOM bytes is skipped. */protected void init () throws IOException {if (isinited) return; byte bom[] = new Byte[bom_size]; int n, unread; n = internalin.read (BOM, 0, bom.length); if ((bom[0] = = (byte) 0x00) && (bom[1] = = (byte) 0x00) && (bom[2] = = (byte) 0xFE) && ; (Bom[3] = = (byte) 0xFF)) {encoding = "utf-32be"; unread = n-4; } else if ((bom[0] = = (byte) 0xFF) && (bom[1] = = (byte) 0xFE) && (bom[2] = = (byte) 0x00) && (bom[3] = = (byte) 0x00) {encoding = "utf-32le"; unread = n-4; } else if ((bom[0] = = (byte) 0xEF) && (bom[1] = = (byte) 0xBB) && (bom[2] = = (byte) 0xBF)) { encoding = "UTF-8"; unread = n-3; } else if ((bom[0] = = (byte) 0xFE) && (bom[1] = = (byte) 0xFF)) {encoding = "utf-16be"; unread = n-2; } else if ((bom[0] = = (byte) 0xFF) && (bom[1] = = (byte) 0xFE)) {encoding = "utf-16le"; unread = n-2; } else {//Unicode BOM mark not found, unread all bytes encoding = Defaultenc; unread = n; }//System.out.println ("read=" + N + ", unread=" + unread); if (Unread > 0) internalin.unread (BOM, (N-unread), unread); Isinited = true; } public void Close () throws IOException {//init ();Isinited = true; Internalin.close (); } public int read () throws IOException {//init ();//isinited = true; return Internalin.read (); }}}
Java UTF-8 and UTF-8 without BOM tool processing classes