Method 1: Take advantage of the Windows text file encoding feature.
under Windows, Unicode, Unicode big endian, and UTF-8 encoded TXT files begin with a few more bytes, namely FF, Fe (Unicode), Fe, FF (Unicode big endian), EF, BB, BF (UTF-8).
public static string Getcharset (file file) {string charset = "GBK"; byte[] first3bytes = new Byte[3]; try {Boolean checked = false; Bufferedinputstream bis = new Bufferedinputstream (new FileInputStream (file)); Bis.mark (0); int read = Bis.read (first3bytes, 0, 3); if (read = =-1) return charset; if (first3bytes[0] = = (byte) 0xFF && first3bytes[1] = = (byte) 0xFE) {charset = "utf-16le"; Checked = true; } else if (first3bytes[0] = = (byte) 0xFE && first3bytes[1] = = (byte) 0xFF) {CharSet = "Utf-16be"; Checked = true; } else if (first3bytes[0] = = (byte) 0xEF && first3bytes[1] = = (byte) 0xBB &A mp;& first3bytes[2] = = (byte) 0xBF) {charset = "UTF-8"; Checked = true; } Bis.reset (); if (!checked) {int loc = 0; while (read = Bis.read ())! =-1) {loc++; if (read >= 0xF0) break; The single appearance of the Bf below, is also considered GBK if (0x80 <= read && read <= 0xBF) break; if (0xC0 <= read && read <= 0xDF) {read = Bis.read (); if (0x80 <= read && read <= 0xBF)//Double Byte (0XC0-0XDF)//(0x80- 0xBF), may also be continue within GB code; else break; There may also be errors, but the odds are lower} else if (0xE0 <= read && read <= 0xEF) {read = Bis.read (); if (0x80 <= read && read <= 0xBF) {Read = Bis.read (); if (0x80 <= read && read <= 0xBF) {charset = "UTF-8"; Break } else break; } else break; }} System.out.println (loc + "" + integer.tohexstring (read)); } bis.close (); } catch (Exception e) {e.printstacktrace (); } return charset; }
Cons: It is not possible to detect files under Linux.
Method 2: Open Source Engineering Jchardet
http://www.iteye.com/topic/266501
Package Org.mozilla.intl.chardet;import Java.io.bufferedinputstream;import Java.io.file;import Java.io.fileinputstream;import java.io.filenotfoundexception;import java.io.ioexception;/** * Get file Character set with Jchardet * @ Author Icer * PS: * Jchardet is the Java porting of Mozilla automatic character set detection algorithm code, its official homepage is: * http://jchardet.sourceforge.net/* @date2008/11/13 */public class Filecharsetdetector {private Boolean found = false;/** * If a character set detection algorithm is fully matched, this property holds the name of the character set. Otherwise (such as a binary file) its value is the default value NULL, you should query the property */private String encoding = null;public static void Main (string[] argv) throws Exception {if (argv.length ! = 1 && argv.length! = 2) {System.out.println ("Usage:filecharsetdetector <path> [<languagehint>]") ; System.out.println (""); System.out.println ("Where <path> is D:/demo.txt"); System.out.println ("for optional <languagehint>. Use following ... "); System.out.println ("1 = Japanese"); System.out.println ("2 = Chinese"); System.out.println ("3 = Simplified Chinese"); System.out.println ("4 =&Gt Traditional Chinese "); System.out.println ("5 = Korean"); System.out.println ("6 = Dont Know (default)"); return;} else {String encoding = null;if (Argv.length = = 2) {encoding = new Filecharsetdetector (). guestfileencoding (Argv[0],intege R.valueof (argv[1]));} else {encoding = new Filecharsetdetector (). guestfileencoding (Argv[0]);} SYSTEM.OUT.PRINTLN ("File code:" + Encoding);}} /** * Pass in a file object, check the file encoding * * @param the file File object instance * @return The document encoding, if none, return null * @throws Filenotfoundexcepti On * @throws IOException */public String guestfileencoding (file file) throws Filenotfoundexception,ioexception {return GE Estfileencoding (file, new Nsdetector ());} /** * Get the encoding of the file * * @param the files * File object instance * @param languagehint * language hint area code eg:1: Japanese; 2:chinese; 3:simplified Chinese; * 4:traditional Chinese; 5:korean; 6:dont Know (default) * @return file encoding, eg:utf-8,gbk,gb2312 form, if none, returns NULL * @throws FileNotFoundException * @throws ioexcept Ion */public String guestfileencoding (file file, int languagehint) throws FileNotFoundException, IOException {return geestfileencoding ( File, new Nsdetector (Languagehint));} /** * Get the encoding of the file * * @param path * File path * @return file encoding, eg:utf-8,gbk,gb2312 form, if none, returns Null * @throws Filenotfoundexc Eption * @throws ioexception */public string guestfileencoding (String path) throws Filenotfoundexception,ioexception { Return guestfileencoding (new File);} /** * Get the encoding of the file * * @param path * file path * @param languagehint * language prompt area code eg:1: Japanese; 2:chinese; 3:simplified Chinese; * 4:traditional Chinese; 5:korean; 6:dont Know (default) * @return * @throws filenotfoundexception * @throws ioexception */public String guestfileencoding ( String path, int languagehint) throws FileNotFoundException, IOException {return guestfileencoding (new File (path), Languagehint);} /** * Get the encoding of the file * * @param files * @param det * @return * @throws filenotfoundexception * @throws IOException */privaTe String geestfileencoding (file file, nsdetector det) throws FileNotFoundException, IOException {//Set an observer...//T He Notify () would be called when a matching CharSet was Found.det.Init (new Nsicharsetdetectionobserver () {public void Notify (String CharSet) {found = True;encoding = CharSet;}}); Bufferedinputstream imp = new Bufferedinputstream (new FileInputStream (file)); byte[] buf = new Byte[1024];int Len;boolean Done = False;boolean Isascii = true;while (len = imp.read (buf, 0, buf.length))! =-1) {//Check if the stream is only ASC Ii.if (isascii) isascii = Det.isascii (buf, Len);//DoIt if non-ascii and not do Yet.if (!isascii &&!done) Done = Det. DoIt (buf, Len, false);} Det. Dataend (); if (isascii) {encoding = "ASCII"; found = true;} if (!found) {String prob[] = det.getprobablecharsets (), if (Prob.length > 0) {//In the absence of a discovery, take the first possible encoding encoding = prob[0];} else {return null;}} return encoding;}}
jar Package: http://download.csdn.net/detail/u012587637/8041169
Method 3: Open Source Engineering Juniversalcharde
http://code.google.com/p/juniversalchardet/
public static String getfileincode (file file) {if (!file.exists ()) {System.err.println ("Getfileincode:file not exists!") ; return null;} byte[] buf = new byte[4096]; FileInputStream fis = null;try {fis = new FileInputStream (file);//(1) universaldetector detector = new Universaldetector (n ULL);//(2) int Nread;while ((nread = Fis.read (buf)) > 0 &&!detector.isdone ()) {detector.handledata (buf, 0, NRE AD);} (3) Detector.dataend ();//(4) String encoding = Detector.getdetectedcharset (); if (encoding! = NULL) { SYSTEM.OUT.PRINTLN ("detected encoding =" + encoding);} else {System.out.println ("No encoding detected.");} (5) Detector.reset (); Fis.close (); return encoding;} catch (Exception e) {e.printstacktrace ();} return null;}
Methods for introducing Packages:
Put the package into the Libs folder,
Select Package, right-to-build path--> add to build path.
Jar Package Download: http://download.csdn.net/detail/u012587637/8041181
Note: The third method is faster and newer than the second one, so the third one is recommended.
android--Judging text file encoding