Ref: 79529756
41485403
1. Create a new class, called "Unicodereader", copy and paste the following content;
/** version:1.1/2007-01-25-changed BOM recognition ordering (longer BOMs first) Original Pseudocode:thomas Weiden Feller implementation Tweaked:aki Nieminen http://www.unicode.org/unicode/faq/utf_bom.html boms:00 FE FF = UTF-3 2, Big-endian ff FE xx = UTF-32, Little-endian EF BB BF = UTF-8, FE FF = UTF-16, Big-endian F F FE = UTF-16, little-endian win2k notepad:unicode format = Utf-16le***/import java.io.*;/** * Generic Unicode TextReader, which would use BOM mark * To identify the encoding to be used. If BOM is not found * then use a given default or system encoding. */public class Unicodereader extends Reader {pushbackinputstream Internalin; InputStreamReader internalIn2 = null; String Defaultenc; private static final int bom_size = 4; /** * * @param in InputStream to being read * @param defaultenc default encoding if stream does not * BOM marker. Give NULL to use SysteM-level default. */Unicodereader (InputStream in, String defaultenc) {Internalin = new Pushbackinputstream (in, bom_size); This.defaultenc = Defaultenc; } public String getdefaultencoding () {return defaultenc; }/** * Get stream encoding or NULL if stream is uninitialized. * Call init () or read () method to initialize it. */Public String getencoding () {if (internalIn2 = = null) return null; return internalin2.getencoding (); }/** * Read-ahead four bytes and check for BOM marks. Extra Bytes is * Unread back to the stream, only BOM bytes is skipped. */protected void init () throws IOException {if (internalIn2! = null) return; String encoding; byte bom[] = new Byte[bom_size]; int n, unread; n = internalin.read (BOM, 0, bom.length); if ((bom[0] = = (byte) 0x00) && (bom[1] = = (byte) 0x00) && (bom[2] = = (byte) 0xFE) && ; (Bom[3] = = (byte) 0xFF)) {Encoding = "utf-32be"; unread = n-4; } else if ((bom[0] = = (byte) 0xFF) && (bom[1] = = (byte) 0xFE) && (bom[2] = = (byte) 0x00) &A mp;& (bom[3] = = (byte) 0x00) {encoding = "utf-32le"; unread = n-4; } else if ((bom[0] = = (byte) 0xEF) && (bom[1] = = (byte) 0xBB) && (bom[2] = = (byte) 0xBF)) { encoding = "UTF-8"; unread = n-3; } else if ((bom[0] = = (byte) 0xFE) && (bom[1] = = (byte) 0xFF)) {encoding = "utf-16be"; unread = n-2; } else if ((bom[0] = = (byte) 0xFF) && (bom[1] = = (byte) 0xFE)) {encoding = "utf-16le"; unread = n-2; } else {//Unicode BOM mark not found, unread all bytes encoding = Defaultenc; unread = n; }//system.out.println ("read=" + N + ", unread=" + unread); if (Unread > 0) internalin.unread (BOM, (N-unread), unread); Use given encoding IF (encoding = = null) {internalIn2 = new InputStreamReader (Internalin); } else {internalIn2 = new InputStreamReader (Internalin, encoding); }} public void Close () throws IOException {init (); Internalin2.close (); } public int read (char[] cbuf, int off, int len) throws IOException {init (); Return Internalin2.read (Cbuf, off, Len); }}
When writing to a file:
PrintWriter out=null; try { File filename = new File(savefile); filename.createNewFile(); out = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename),"UTF-8"))); System.out.println("打开fwriter"); String []ss = content.split(","); out.write("["+"\r\n"); out.write(s+","+"\r\n"); } catch (IOException ex) { ex.printStackTrace(); } finally { out.flush(); out.close(); System.out.println("关闭fwriter"); } }
Java read UTF8 txt file, first character empty or question mark question