Copy Code code as follows:
public static String getcharset (file file) {
String charset = "GBK";
byte[] first3bytes = new Byte[3];
try {
Boolean checked = false;
Bufferedinputstream bis = new Bufferedinputstream (
New FileInputStream (file));
Bis.mark (0);
int read = Bis.read (first3bytes, 0, 3);
if (read = = 1)
return charset;
if (first3bytes[0] = = (byte) 0xFF && first3bytes[1] = = (byte) 0xFE) {
CharSet = "Utf-16le";
Checked = true;
else if (first3bytes[0] = = (byte) 0xFE && first3bytes[1]
= = (byte) 0xFF) {
CharSet = "Utf-16be";
Checked = true;
else if (first3bytes[0] = = (byte) 0xEF && first3bytes[1]
= = (byte) 0xBB
&& first3bytes[2] = = (byte) 0xBF) {
CharSet = "UTF-8";
Checked = true;
}
Bis.reset ();
if (!checked) {
int loc = 0;
while (read = Bis.read ())!=-1) {
loc++;
if (read >= 0xF0)
Break
Under the BF alone, is also considered GBK
if (0x80 <= read && read <= 0xBF)
Break
if (0xc0 <= read && read <= 0xDF) {
Read = Bis.read ();
if (0x80 <= read && read <= 0xBF)//Double byte (0XC0-0XDF)
(0x80-
0xBF), or it may be within GB code
Continue
Else
Break
It may be wrong, but it's less likely.
else if (0xe0 <= read && read <= 0xEF) {
Read = Bis.read ();
if (0x80 <= read && read <= 0xBF) {
Read = Bis.read ();
if (0x80 <= read && read <= 0xBF) {
CharSet = "UTF-8";
Break
} else
Break
} else
Break
}
}
System.out.println (loc + "" + integer.tohexstring (read));
}
Bis.close ();
catch (Exception e) {
E.printstacktrace ();
}
return charset;
}