The Java code is as follows:
Copy Code code as follows:
Package Com.gjob.common;
public class URLtoUTF8 {
Convert to%e4%bd%a0 form
public static string toutf8string (string s) {
StringBuffer sb = new StringBuffer ();
for (int i = 0; i < s.length (); i++) {
char C = S.charat (i);
if (c >= 0 && C <= 255) {
Sb.append (c);
} else {
Byte[] B;
try {
b = string.valueof (c). GetBytes ("Utf-8");
catch (Exception ex) {
System.out.println (ex);
b = new Byte[0];
}
for (int j = 0; J < B.length; J + +) {
int k = B[j];
if (K < 0)
K + 256;
Sb.append ("%" + integer.tohexstring (k). toUpperCase ());
}
}
}
return sb.tostring ();
}
Convert%e4%bd%a0 to Chinese characters
public static string unescape (string s) {
StringBuffer sbuf = new StringBuffer ();
int L = s.length ();
int ch =-1;
int B, sumb = 0;
for (int i = 0, more =-1; i < L; i++) {
/* Get next byte b from URL segment S * *
switch (ch = s.charat (i)) {
Case '% ':
ch = s.charat (++i);
int HB = (character.isdigit (char) ch)? CH-' 0 '
: Ten + character.tolowercase ((char) ch)-' a ') & 0xF;
ch = s.charat (++i);
int lb = (character.isdigit (char) ch)? CH-' 0 '
: Ten + character.tolowercase ((char) ch)-' a ') & 0xF;
B = (HB << 4) | lb
Break
Case ' + ':
b = ';
Break
Default
b = ch;
}
/* Decode byte B as UTF-8, SUMB collects incomplete chars * *
if ((b & 0xc0) = = 0x80) {//10xxxxxx (continuation byte)
Sumb = (sumb << 6) | (b & 0x3f); ADD 6 bits to Sumb
if (--more = 0)
Sbuf.append ((char) sumb); Add Char to Sbuf
else if ((b & 0x80) = = 0x00) {//0xxxxxxx (yields 7 bits)
Sbuf.append ((char) b); Store in Sbuf
else if ((b & 0xe0) = = 0xc0) {//110xxxxx (yields 5 bits)
Sumb = b & 0x1f;
more = 1; Expect 1 more byte
else if ((b & 0xf0) = = 0xe0) {//1110xxxx (yields 4 bits)
Sumb = b & 0x0f;
more = 2; Expect 2 more bytes
else if ((b & 0xf8) = = 0xf0) {//11110xxx (yields 3 bits)
Sumb = b & 0x07;
more = 3; Expect 3 More bytes
else if ((b & 0xfc) = = 0xf8) {//111110XX (yields 2 bits)
Sumb = b & 0x03;
more = 4; Expect 4 more bytes
else/*if ((b & 0xfe) = 0XFC) */{//1111110x (yields 1 bit)
Sumb = b & 0x01;
more = 5; Expect 5 more bytes
}
/* We don ' t test if the UTF-8 encoding is well-formed * *
}
return sbuf.tostring ();
}
public static void Main (string[] args) {
System.out.println (Urltoutf8.toutf8string ("You"));
System.out.println (Urltoutf8.unescape ("%E4%BD%A0%20%E5%A5%BD"));
}
}
############
Run Result:
%e4%bd%a0
Hello