With regard to the relationship between Unicode and UTF, it can be simple to remember: Unicode is a coding organization, a coding specification, in the Java middle Finger utf-16;utf is a Unicode encoded translation conversion format, so as to be well delivered in the network, In the storage media rollup save, so UTF exist in several formats, such as 8, 16, 32, and the difference between Le, TE, Unicode encoding format will have the following 10 kinds of procedures.
public static void Main (string[] args) throws Unsupportedencodingexception {
Stringutil.str2all ("0 product model description");
Stringutil.str4all ("30000900a74ec1548b57f753cf63f08f");
}
/**
* Attempt to encode hexadecimal numeric strings in all encoded formats
*
* @param hexstr
* @throws unsupportedencodingexception
*/
public static void Str4all (String uStr) throws unsupportedencodingexception{
System.out.println ("+++++++++++++++++++++++++++++++++++++++++++++++++++");
Byte[] bs = new Byte[ustr.length ()/2];
for (int i = 0; i < bs.length; i++) {
Bs[i] = (byte) integer.parseint (Ustr.substring (i*2, i*2+2), 16);
}
System.out.println (New String (BS, "Utf-8"));
16
System.out.println (New String (BS, "utf-16")); With Unicode
System.out.println (New String (BS, "Utf-16le"));
System.out.println (New String (BS, "X-utf-16le-bom"));
System.out.println (New String (BS, "Utf-16be"));
System.out.println (New String (BS, "X-utf-16be-bom")); Unsupportedencodingexception
32
System.out.println (New String (BS, "utf-32"));
System.out.println (New String (BS, "Utf-32le"));
System.out.println (New String (BS, "X-utf-32le-bom"));
System.out.println (New String (BS, "Utf-32be"));
System.out.println (New String (BS, "X-utf-32le-bom"));
}
/**
* Lists all encoded hexadecimal numeric strings corresponding to the decoded string
*
* @param uStr
* @throws unsupportedencodingexception
*/
public static void Str2all (String uStr) throws unsupportedencodingexception{
System.out.println ("+++++++++++++++++++++++++++++++++++++++++++++++++++");
Byte[] bs = new byte[]{};
BS = ustr.getbytes ("Utf-8");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
16
BS = ustr.getbytes ("utf-16");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
BS = ustr.getbytes ("Utf-16le");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
BS = ustr.getbytes ("X-utf-16le-bom");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
BS = ustr.getbytes ("Utf-16be");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
BS = ustr.getbytes ("X-utf-16be-bom"); Unsupportedencodingexception
32
BS = ustr.getbytes ("utf-32");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
BS = ustr.getbytes ("Utf-32le");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
BS = ustr.getbytes ("X-utf-32le-bom");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
BS = ustr.getbytes ("Utf-32be");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
BS = ustr.getbytes ("X-utf-32le-bom");
for (byte b:bs) {
System.out.print (integer.tohexstring (b & 0xff));
}
System.out.println ();
}
CharSet Us-ascii%s
Historicalname ASCII
# IANA Aliases
Alias Iso-ir-6
Alias ansi_x3.4-1986
Alias iso_646.irv:1991
Alias ASCII
Alias Iso646-us
Alias us
Alias IBM367
Alias cp367
Alias Csascii
Alias default
# Other aliases
Alias 646 # Solaris POSIX locale
Alias iso_646.irv:1983
Alias ansi_x3.4-1968 # Linux POSIX locale (RedHat)
Alias Ascii7
CharSet UTF-8 Utf_8
Historicalname UTF8
Alias UTF8
Alias Unicode-1-1-utf-8
CharSet UTF-16 Utf_16
Historicalname UTF-16
Alias Utf_16
Alias Utf16
Alias Unicode
Alias Unicodebig
CharSet Utf-16be Utf_16be
Historicalname unicodebigunmarked
Alias Utf_16be
Alias Iso-10646-ucs-2
Alias X-utf-16be
Alias unicodebigunmarked
CharSet Utf-16le Utf_16le
Historicalname unicodelittleunmarked
Alias Utf_16le
Alias X-utf-16le
Alias unicodelittleunmarked
CharSet X-utf-16le-bom Utf_16le_bom
Historicalname Unicodelittle
Alias Unicodelittle
CharSet UTF-32 utf_32
Alias Utf_32
Alias UTF32
CharSet Utf-32le Utf_32le
Alias Utf_32le
Alias X-utf-32le
CharSet Utf-32be Utf_32be
Alias Utf_32be
Alias X-utf-32be
CharSet X-utf-32le-bom Utf_32le_bom
Alias Utf_32le_bom
Alias Utf-32le-bom
CharSet X-utf-32be-bom Utf_32be_bom
Alias Utf_32be_bom
Alias Utf-32be-bom
CharSet Iso-8859-1%s
Historicalname Iso8859_1
# IANA Aliases
Alias iso-ir-100
Alias Iso_8859-1
Alias Latin1
Alias L1
Alias IBM819
Alias cp819
Alias CsISOLatin1
# Other aliases
Alias 819
Alias IBM-819
Alias Iso8859_1
Alias iso_8859-1:1987
Alias Iso_8859_1
Alias 8859_1
Alias Iso8859-1
CharSet iso-8859-2%s
historicalname iso8859_2
alias Iso8859_2
alias 8859_2
alias iso-ir-101
alias Iso_8859-2
alias iso_8859-2:1987
alias Iso8859-2
alias Latin2
alias L2
alias ibm912
alias ibm-912
alias cp912
alias 912
alias csISOLatin2
CharSet iso-8859-4%s
historicalname iso8859_4
alias Iso8859_4
alias Iso8859-4
alias 8859_4
alias iso-ir-110
alias Iso_8859-4
alias iso_8859-4:1988
alias Latin4
alias L4
alias ibm914
alias ibm-914
alias cp914
alias 914
alias CsISOLatin4
CharSet iso-8859-5%s
historicalname iso8859_5
alias Iso8859_5
alias 8859_5
alias iso-ir-144
alias Iso_8859-5
alias iso_8859-5:1988
alias Iso8859-5
alias Cyrillic
alias ibm915
alias ibm-915
alias cp915
alias 915
alias Csisolatincyrillic
CharSet iso-8859-7%s
historicalname iso8859_7
alias Iso8859_7
alias 8859_7
alias iso-ir-126
alias Iso_8859-7
alias iso_8859-7:1987
alias elot_928
alias ECMA-118
alias Greek
alias Greek8
alias Csisolatingreek
alias Sun_eu_greek # Solaris 7/8 compatibility
alias ibm813
alias ibm-813
alias 813
alias cp813
Alias Iso8859-7 # Solaris 9 compatibility
CharSet iso-8859-9%s
Historicalname Iso8859_9
Alias Iso8859_9
Alias 8859_9
Alias iso-ir-148
Alias Iso_8859-9
Alias iso_8859-9:1989
Alias Iso8859-9
Alias Latin5
Alias L5
Alias ibm920
Alias ibm-920
Alias 920
Alias cp920
Alias CsISOLatin5
CharSet iso-8859-13%s
Historicalname iso8859_13
Alias Iso8859_13
Alias 8859_13
Alias Iso_8859-13
Alias Iso8859-13
CharSet iso-8859-15%s
Historicalname iso8859_15
# IANA Alias
Alias Iso_8859-15
# Other aliases
Alias 8859_15
Alias Iso8859_15
Alias Iso8859-15
Alias IBM923
Alias IBM-923
Alias cp923
Alias 923
Alias LATIN0
Alias LATIN9
Alias L9
Alias CsISOlatin0
Alias CsISOlatin9
Alias Iso8859_15_fdis
CharSet Koi8-r%s
Historicalname Koi8_r
Alias Koi8_r
Alias Koi8
Alias cskoi8r
CharSet Koi8-u%s
Alias Koi8_u
CharSet windows-1250%s
Historicalname Cp1250
Alias cp1250
Alias cp5346 # Euro IBM CCSID
CharSet windows-1251%s
Historicalname Cp1251
Alias cp1251
Alias cp5347 # Euro IBM CCSID
Alias ansi-1251 # Solaris compatibility
CharSet windows-1252%s
Historicalname Cp1252
Alias cp1252
Alias cp5348 # Euro IBM CCSID
CharSet windows-1253%s
Historicalname Cp1253
Alias cp1253
Alias cp5349 # Euro IBM CCSID
CharSet windows-1254%s
Historicalname Cp1254
Alias cp1254
Alias cp5350 # Euro IBM CCSID
CharSet windows-1257%s
Historicalname Cp1257
Alias cp1257
Alias cp5353 # Euro IBM CCSID
CharSet IBM437%s
Historicalname Cp437
Alias cp437
Alias ibm-437
Alias 437
Alias cspc8codepage437
Alias windows-437
CharSet x-ibm737%s
Historicalname Cp737
Alias cp737
Alias ibm737
Alias ibm-737
Alias 737
CharSet IBM775%s
Historicalname Cp775
Alias cp775
Alias ibm-775
Alias 775
CharSet IBM850%s
Historicalname Cp850
Alias cp850
Alias ibm-850
Alias 850
Alias Cspc850multilingual
CharSet IBM852%s
Historicalname Cp852
Alias cp852
Alias ibm-852
Alias 852
Alias csPCp852
CharSet IBM855%s
Historicalname Cp855
Alias cp855
Alias ibm-855
Alias 855
Alias cspcp855
CharSet IBM857%s
Historicalname Cp857
Alias cp857
Alias ibm-857
Alias 857
Alias csIBM857
CharSet IBM00858%s
Historicalname Cp858
Alias cp858
Alias ccsid00858
Alias cp00858
Alias 858
CharSet IBM862%s
Historicalname Cp862
Alias cp862
Alias ibm-862
Alias 862
Alias csIBM862
Alias Cspc862latinhebrew
CharSet IBM866%s
Historicalname Cp866
Alias cp866
Alias ibm-866
Alias 866
Alias csIBM866
CharSet x-ibm874%s
Historicalname Cp874
Alias cp874
Alias ibm874
Alias ibm-874
Alias 874