原:http://tb.blog.csdn.net/TrackBack.aspx?PostId=800901
主要介紹 字元編碼及其解碼的函數,還有判斷是否是UTF-8編碼字元的函數
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.net.URLDecoder;
/**
*
Title:字元編碼工具類
*
Description:
*
Copyright: flashman.com.cn Copyright (c) 2005
*
Company: flashman.com.cn
* @author: jeffzhu
* @version 1.0
*/
public class CharTools {
/**
* 轉換編碼 ISO-8859-1到GB2312
* @param text
* @return
*/
public String ISO2GB(String text) {
String result = "";
try {
result = new String(text.getBytes("ISO-8859-1"), "GB2312");
}
catch (UnsupportedEncodingException ex) {
result = ex.toString();
}
return result;
}
/**
* 轉換編碼 GB2312到ISO-8859-1
* @param text
* @return
*/
public String GB2ISO(String text) {
String result = "";
try {
result = new String(text.getBytes("GB2312"), "ISO-8859-1");
}
catch (UnsupportedEncodingException ex) {
ex.printStackTrace();
}
return result;
}
/**
* Utf8URL編碼
* @param s
* @return
*/
public String Utf8URLencode(String text) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (c >= 0 && c <= 255) {
result.append(c);
}else {
byte[] b = new byte[0];
try {
b = Character.toString(c).getBytes("UTF-8");
}catch (Exception ex) {
}
for (int j = 0; j < b.length; j++) {
int k = b[j];
if (k < 0) k += 256;
result.append("%" + Integer.toHexString(k).toUpperCase());
}
}
}
return result.toString();
}
/**
* Utf8URL解碼
* @param text
* @return
*/
public String Utf8URLdecode(String text) {
String result = "";
int p = 0;
if (text!=null && text.length()>0){
text = text.toLowerCase();
p = text.indexOf("%e");
if (p == -1) return text;
while (p != -1) {
result += text.substring(0, p);
text = text.substring(p, text.length());
if (text == "" || text.length() < 9) return result;
result += CodeToWord(text.substring(0, 9));
text = text.substring(9, text.length());
p = text.indexOf("%e");
}
}
return result + text;
}
/**
* utf8URL編碼轉字元
* @param text
* @return
*/
private String CodeToWord(String text) {
String result;
if (Utf8codeCheck(text)) {
byte[] code = new byte[3];
code[0] = (byte) (Integer.parseInt(text.substring(1, 3), 16) - 256);
code[1] = (byte) (Integer.parseInt(text.substring(4, 6), 16) - 256);
code[2] = (byte) (Integer.parseInt(text.substring(7, 9), 16) - 256);
try {
result = new String(code, "UTF-8");
}catch (UnsupportedEncodingException ex) {
result = null;
}
}
else {
result = text;
}
return result;
}
public static boolean isValidUtf8(byte[] b, int aMaxCount) {
int lLen = b.length, lCharCount = 0;
for (int i = 0; i < lLen && lCharCount < aMaxCount; ++lCharCount) {
byte lByte = b[i++]; //to fast operation, ++ now, ready for the following for(;;)
if (lByte >= 0) continue; //>=0 is normal ascii
if (lByte < (byte) 0xc0 || lByte > (byte) 0xfd)
return false;
int lCount = lByte > (byte) 0xfc ? 5 : lByte > (byte) 0xf8 ? 4 : lByte > (byte) 0xf0 ? 3 : lByte > (byte) 0xe0 ? 2 : 1;
if (i + lCount > lLen) return false;
for (int j = 0; j < lCount; ++j, ++i)
if (b[i] >= (byte) 0xc0)return false;
}
return true;
}
/**
* 編碼是否有效
* @param text
* @return
*/
private boolean Utf8codeCheck(String text){
String sign = "";
if (text.startsWith("%e"))
for (int i = 0, p = 0; p != -1; i++) {
p = text.indexOf("%", p);
if (p != -1)
p++;
sign += p;
}
return sign.equals("147-1");
}
/**
* 是否Utf8Url編碼
* @param text
* @return
*/
public boolean isUtf8Url(String text) {
text = text.toLowerCase();
int p = text.indexOf("%");
if (p != -1 && text.length() - p > 9) {
text = text.substring(p, p + 9);
}
return Utf8codeCheck(text);
}
/**
* 測試
* @param args
*/
public static void main(String[] args) {
CharTools charTools = new CharTools();
String url;
url = "http://www.google.com/search?hl=zh-CN&newwindow=1&q=%E4%B8%AD%E5%9B%BD%E5%A4%A7%E7%99%BE%E7%A7%91%E5%9C%A8%E7%BA%BF%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2&btnG=%E6%90%9C%E7%B4%A2&lr=";
if(charTools.isUtf8Url(url)){
System.out.println(charTools.Utf8URLdecode(url));
}else{
System.out.println(URLDecoder.decode(url));
}
url = "http://www.baidu.com/baidu?word=%D6%D0%B9%FA%B4%F3%B0%D9%BF%C6%D4%DA%CF%DF%C8%AB%CE%C4%BC%EC%CB%F7&tn=myie2dg";
if(charTools.isUtf8Url(url)){
System.out.println(charTools.Utf8URLdecode(url));
}else{
System.out.println(URLDecoder.decode(url));
}
}
}