C # get file encoding format
Class Encodingtype { <summary> The path of the given file, reading the binary data of the file, judging the file encoding type </summary> <param name= "file_name" > File path </param> Encoding type of <returns> file </returns> PublicStatic System.Text.Encoding GetType (String file_name) { FileStream fs =New FileStream (file_name, FileMode.Open, FileAccess.Read); Encoding r = GetType (FS); Fs. Close (); return R; }
<summary> Determine the encoding type of a file by a given file stream </summary> <param name= "FS" > File Flow </param> Encoding type of <returns> file </returns> PublicStatic System.Text.Encoding GetType (FileStream FS) { byte[] Unicode =NewByte[] {0xFF, 0xFE, 0x41}; byte[] Unicodebig =NewByte[] {0xFE, 0xFF, 0x00}; byte[] UTF8 =NewByte[] {0xEF, 0xBB, 0xBF};With BOM Encoding reVal = Encoding.default;
BinaryReader r =New BinaryReader (FS, System.Text.Encoding.Default); int i; Int. TryParse (fs. Length.tostring (),Out i); byte[] ss = R.readbytes (i); if (isutf8bytes (ss) | | (ss[0] = = 0xEF && ss[1] = = 0xBB && ss[2] = = 0xBF)) { ReVal = Encoding.UTF8; } Elseif (ss[0] = = 0xFE && ss[1] = = 0xFF && ss[2] = = 0x00) { ReVal = Encoding.bigendianunicode; } Elseif (ss[0] = = 0xFF && ss[1] = = 0xFE && ss[2] = = 0x41) { ReVal = Encoding.unicode; } R.close (); return reVal;
}
<summary> Determine if the UTF8 format is not with the BOM </summary> <param name= "Data" ></param> <returns></returns> PrivateStaticBOOL Isutf8bytes (byte[] data) { int charbytecounter =1; Calculates the number of bytes that are currently being parsed by the word Fu Ching BYTE Curbyte;The currently parsed bytes. for (int i =0; I < data. Length; i++) { Curbyte = Data[i]; if (Charbytecounter = =1) { if (Curbyte >= 0x80) { Judging the current while ((Curbyte <<=1) & 0x80)! =0) { charbytecounter++; } If the first mark is not 0, it starts at least 2 1 such as: 110XXXXX ... 1111110X if (Charbytecounter = =1 | | Charbytecounter >6) { ReturnFalse } } } Else { If UTF-8, the first bit must be 1. if ((Curbyte & 0xC0)! = 0x80) { ReturnFalse } charbytecounter--; } } if (Charbytecounter > 1) { throw New Exception ("non-expected byte format"); } return true; } }
|
Call
Encoding
txtencoding = Encodingtype.gettype (Txtpath); //get encode from document. StreamReader sr = New StreamReader (Txtpath, txtencoding); |
C # Get file Encoding format (GO)