Original: http://tb.blog.csdn.net/TrackBack.aspx? Postid = 800901
This article mainly introduces character encoding and Its decoding functions, as well as the function to determine whether it is a character encoded by UTF-8.
Import java. Io. unsupportedencodingexception;
Import java.net. urlencoder;
Import java.net. urldecoder;
/**
*
Title: character encoding Tool
*
Description:
*
Copyright: flashman.com.cn copyright (c) 2005
*
Company: flashman.com.cn
* @ Author: jeffzhu
* @ Version 1.0
*/
Public class chartools {
/**
* convert the encoding ISO-8859-1 to gb2312
* @ Param text
* @ return
*/
Public String iso2gb (string text) {
string result = "";
try {
result = new string (text. getbytes ("ISO-8859-1"), "gb2312");
}< br> catch (unsupportedencodingexception ex) {
result = ex. tostring ();
}< br> return result;
}
/**
* Conversion encoding gb2312 to ISO-8859-1
* @ Param text
* @ Return
*/
Public String gb2iso (string text ){
String result = "";
Try {
Result = new string (text. getbytes ("gb2312"), "ISO-8859-1 ");
}
Catch (unsupportedencodingexception ex ){
Ex. printstacktrace ();
}
Return result;
}
/**
* Utf8url Encoding
* @ Param s
* @ Return
*/
Public String utf8urlencode (string text ){
Stringbuffer result = new stringbuffer ();
For (INT I = 0; I <text. Length (); I ++ ){
Char c = text. charat (I );
If (C> = 0 & C <= 255 ){
Result. append (C );
} Else {
Byte [] B = new byte [0];
Try {
B = character. tostring (c). getbytes ("UTF-8 ");
} Catch (exception ex ){
}
For (Int J = 0; j <B. length; j ++ ){
Int K = B [J];
If (k <0) K + = 256;
Result. append ("%" + integer. tohexstring (k). touppercase ());
}
}
}
Return result. tostring ();
}
/**
* Utf8url Decoding
* @ Param text
* @ Return
*/
Public String utf8urldecode (string text ){
String result = "";
Int p = 0;
If (text! = NULL & text. Length ()> 0 ){
TEXT = text. tolowercase ();
P = text. indexof ("% E ");
If (P =-1) return text;
While (P! =-1 ){
Result + = text. substring (0, P );
TEXT = text. substring (p, text. Length ());
If (text = "" | text. Length () <9) return result;
Result + = codetoword (text. substring (0, 9 ));
TEXT = text. substring (9, text. Length ());
P = text. indexof ("% E ");
}
}
Return result + text;
}
/**
* Utf8url encoding to character conversion
* @ Param text
* @ Return
*/
Private string codetoword (string text ){
String result;
If (utf8codecheck (text )){
Byte [] code = new byte [3];
Code [0] = (byte) (integer. parseint (text. substring (1, 3), 16)-256 );
Code [1] = (byte) (integer. parseint (text. substring (4, 6), 16)-256 );
Code [2] = (byte) (integer. parseint (text. substring (7, 9), 16)-256 );
Try {
Result = new string (Code, "UTF-8 ");
} Catch (unsupportedencodingexception ex ){
Result = NULL;
}
}
Else {
Result = text;
}
Return result;
}
Public static Boolean isvalidutf8 (byte [] B, int amaxcount ){
Int llen = B. length, lcharcount = 0;
For (INT I = 0; I <llen & lcharcount <amaxcount; ++ lcharcount ){
Byte lbyte = B [I ++]; // to fast operation, ++ now, ready for the following (;;)
If (lbyte> = 0) continue; //> = 0 is normal ASCII
If (lbyte <(byte) 0xc0 | lbyte> (byte) 0xfd)
Return false;
Int lcount = lbyte> (byte) 0xfc? 5: lbyte> (byte) 0xf8? 4: lbyte> (byte) 0xf0? 3: lbyte> (byte) 0xe0? 2: 1;
If (I + lcount> llen) return false;
For (Int J = 0; j <lcount; ++ J, ++ I)
If (B [I]> = (byte) 0xc0) return false;
}
Return true;
}
/**
* encoding validity
* @ Param text
* @ return
*/
private Boolean utf8codecheck (string text) {
string Sign = "";
If (text. startswith ("% E")
for (INT I = 0, P = 0; P! =-1; I ++) {
P = text. indexof ("%", P);
If (P! =-1)
P ++;
sign + = P;
}< br> return sign. equals ("147-1");
}
/**
* Utf8url encoding?
* @ Param text
* @ Return
*/
Public Boolean isutf8url (string text ){
TEXT = text. tolowercase ();
Int P = text. indexof ("% ");
If (P! =-1 & text. Length ()-P> 9 ){
TEXT = text. substring (p, p + 9 );
}
Return utf8codecheck (text );
}
/**
* Test
* @ Param ARGs
*/
Public static void main (string [] ARGs ){
Chartools = new chartools ();
String URL;
Url = "http://www.google.com/search? Hl = ZH-CN & newwindow = 1 & Q = % E4 % B8 % ad % E5 % 9B % BD % E5 % A4 % A7 % E7 % 99% be % E7 % A7 % 91% E5 % 9C % A8 % E7 % Ba % BF % E5 % 85% A8 % E6 % 96% E6 % A3 % 87% E7 % B4 % A2 & btng = % E6 % 80% 9C % E7 % B4 % A2 & LR = ";
If (chartools. isutf8url (URL )){
System. Out. println (chartools. utf8urldecode (URL ));
} Else {
System. Out. println (urldecoder. Decode (URL ));
}
Url = "http://www.baidu.com/baidu? WORD = % D6 % D0 % B9 % fa % B4 % F3 % B0 % D9 % BF % C6 % D4 % da % CF % DF % C8 % AB % Ce % c4 % BC % EC % CB % F7 & Tn = myie2dg ";
If (chartools. isutf8url (URL )){
System. Out. println (chartools. utf8urldecode (URL ));
} Else {
System. Out. println (urldecoder. Decode (URL ));
}
}
}