Package com.test.bs;
/** * * 1, Unicode is a coding specification, is designed to solve global character universal code, and Rutf-8,utf-16 is a kind of implementation of this specification.
2, Java Internal use of Unicode coding norms, that is, support multi-language, the specific use of UTF-16 encoding.
3, regardless of the procedures used in the process of gbk,iso8859-1 and other formats, in the process of storage and delivery are Unicode encoded data, to receive the value does not appear garbled, it is necessary to ensure that the passing of the time used is a code, received when the use of a code to convert to receive.
4, if both sides of the file.encoding to ensure that the same, it is easier, all the default turn, but often in different projects when interacting with many times is inconsistent, this time is required to encode the conversion. 5, regardless of conversion, the Java program's data are to be converted first and Unicode, which is able to handle the cause of the multilingual character set.
The bottom is consistent, as long as the value and the value of the same time will certainly not appear garbled.
* */public class UnicodeTest1 {public static void main (String args[]) throws Exception {string str = ' Chinese character '; System.out.println ("Original String---" + str);/the original string/** * * Str.getbytes () is normally exported;
If CharSet is not written in parentheses, it is Sytem.getproperty ("file.encoding"), that is, the encoding of the current file, * * Many people write the default encoding of the system, which is not the case with code testing, and actually gets the encoding of the file * * * Str.getbytes ("charset");//Specifies charset, which resolves the underlying stored Unicode code to a byte array of charset encoded format * * String New_str=new string (str.g
Etbytes ("Utf-8"), "GBK")); * *//Convert the parsed byte data into GBK encoded string, in memory that is the byte array in the GBK format to Unicode to interchange/string nEw_str = new String (str.getbytes ("Utf-8"), "GBK"); /** * * At this time the output is garbled, in the UTF-8 of the file.encoding output GBK format of the data must be garbled, but NEW_STR is indeed GBK encoded * * * At this time the garbled from the encoding, but GBK format new_s
The TR itself has no problem with the data, and it can be seen through the following transformations * * SYSTEM.OUT.PRINTLN ("new string----" + new_str); String final_str = new String (New_str.getbytes ("GBK"), "utf-8")//The meaning of the argument is the same as the topmost comment/** * * The output is normal, GBK encoded format
New_str string, with GBK this charset to parse it, and then with Utf-8 again transcoding, * because NEW_STR is indeed GBK format, can pass Utf-8 code to get normal data display.
* * SYSTEM.OUT.PRINTLN ("final String---" + final_str);
}
}
Package com.test.bs;
Import java.io.UnsupportedEncodingException;
public class UnicodeTest2 {public
static void Main (string[] args) {
String a = "haha";
try {
byte[] gb2312 = a.getbytes ("GB2312");
byte[] utf = a.getbytes ("UTF-8");
for (int i = 0; i < gb2312.length i++) {
System.out.print (gb2312[i]);
System.out.println ();
for (int i = 0; i < utf.length i++) {
System.out.print (utf[i]);
System.out.println ();
System.out.println (New String (gb2312));
System.out.println (New String (UTF));
System.out.println (System.getproperty ("file.encoding"))//The encoding of the current file
System.out.println (new String (UTF, " UTF-8 "));
System.out.println (New String (gb2312, "UTF-8"));
} catch (Unsupportedencodingexception e) {
e.printstacktrace ();}}}