Therefore, you must pass the encoded keyword, such as "Parse keyword code" in Google input search, get encoded "%e8%a7%a3%e6%9e%90%e5%85%b3%e9%94%ae%e5%ad%97%e7%bc%96%e7%a0%81 ”
1. From the above address to resolve the key word section.
2. Encode the encoded name (e.g. GBK,UTF-8, etc.) after the encoded keyword gets encoded
3. Use UrlDecode (Keywords,encodecode) to decode the corresponding keyword.
The following is the implementation of Java code:
Copy Code code as follows:
Package test;
Import java.io.UnsupportedEncodingException;
Import Java.net.URLDecoder;
Import Java.util.regex.Matcher;
Import Java.util.regex.Pattern;
public class Parseurlkeyword {
public static void Main (string[] args) {
String url = "http://www.google.co.kr/search?hl=en&q=%ed%95%9c%ea%b5%ad%ec%96%b4+&btng=google+search& Aq=f&oq= ";
System.out.println (Parseurlkeyword.getkeyword (URL));
System.out.println ("");
url = "http://www.google.cn/search?q=%E6%8F%90%E5%8F%96+%E6%90%9C%E7%B4%A2%E5%BC%95%E6%93%8E+%E5%85%B3%E9%94%AE% E5%ad%97&hl=zh-cn&newwindow=1&sa=2 ";
System.out.println (Parseurlkeyword.getkeyword (URL));
System.out.println ("");
url = "http://www.google.com.tw/search?hl=zh-CN&q=%E6%B9%98%E9%8B%BC%E4%B8%AD%E5%9C%8B%E9%A6%99%E7%85%99 &btng=google+%e6%90%9c%e7%b4%a2&aq=f&oq= ";
System.out.println (Parseurlkeyword.getkeyword (URL));
System.out.println ("");
url = "Http://www.baidu.com/s?wd=%D6%D0%87%F8%D3%D0%BE%80%D8%9F%C8%CE%B9%AB%CB%BE";
System.out.println (Parseurlkeyword.getkeyword (URL));
System.out.println ("");
url = "Http://www.baidu.com/s?wd=%C6%F3%D2%B5%CD%C6%B9%E3";
System.out.println (Parseurlkeyword.getkeyword (URL));
System.out.println ("");
}
public static string Getkeyword (string url) {
String Keywordreg = "(?: yahoo.+?[ \\?| &]p=|openfind.+?query=|google.+?q=|lycos.+?query=|onseek.+?keyword=|search\\.tom.+?word=|search\\.qq\\. Com.+?word=|zhongsou\\.com.+?word=|search\\.msn\\.com.+?q=|yisou\\.com.+?p=|sina.+?word=|sina.+?query=|sina.+? _searchkey=|sohu.+?word=|sohu.+?key_word=|sohu.+?query=|163.+?q=|baidu.+?wd=|soso.+?w=|3721\\.com.+?p=| alltheweb.+?q=) ([^&]*) ";
String Encodereg = "^ (?: [\\x00-\\x7f]|[ \\xfc-\\xff][\\x80-\\xbf]{5}| [\\XF8-\\XFB] [\\X80-\\XBF] {4}| [\\xf0-\\xf7] [\\X80-\\XBF] {3}| [\\xe0-\\xef] [\\X80-\\XBF] {2}| [\\XC0-\\XDF] [\\X80-\\XBF]) +$";
Pattern Keywordpatt = Pattern.compile (Keywordreg);
StringBuffer keyword = new stringbuffer (20);
Matcher Keywordmat = keywordpatt.matcher (URL);
while (Keywordmat.find ()) {
Keywordmat.appendreplacement (keyword, "$");
}
if (!keyword.tostring (). Equals ("")) {
String keywordstmp = keyword.tostring (). Replace ("http://www.", "");
Pattern Encodepatt = Pattern.compile (Encodereg);
String unescapestring = Parseurlkeyword.unescape (keywordstmp);
Matcher Encodemat = Encodepatt.matcher (unescapestring);
String encodestring = "GBK";
if (Encodemat.matches ()) encodestring = "Utf-8";
try {
Return Urldecoder.decode (keywordstmp, encodestring);
catch (Unsupportedencodingexception e) {
Return "";
}
}
Return "";
}
public static string unescape (String src) {
StringBuffer tmp = new StringBuffer ();
Tmp.ensurecapacity (Src.length ());
int lastpos = 0, pos = 0;
Char ch;
while (Lastpos < Src.length ()) {
pos = src.indexof ("%", lastpos);
if (pos = = Lastpos) {
if (Src.charat (pos + 1) = = ' U ') {
ch = (char) integer.parseint (src.substring (pos + 2, pos + 6), 16);
Tmp.append (CH);
Lastpos = pos + 6;
} else {
ch = (char) integer.parseint (src.substring (pos + 1, pos + 3), 16);
Tmp.append (CH);
Lastpos = pos + 3;
}
} else {
if (pos = = 1) {
Tmp.append (src.substring (Lastpos));
Lastpos = Src.length ();
} else {
Tmp.append (Src.substring (Lastpos, POS));
Lastpos = pos;
}
}
}
return tmp.tostring ();
}
}
The following is the implementation code for ASP:
Copy Code code as follows:
Function decodeURI (s)
S = unescape (s)
Dim Reg, cs
cs = "GBK"
Set reg = New RegExp
Reg. Pattern = "^ (?: [\x00-\x7f]|[ \xfc-\xff][\x80-\xbf]{5}| [\XF8-\XFB] [\X80-\XBF] {4}| [\xf0-\xf7] [\X80-\XBF] {3}| [\xe0-\xef] [\X80-\XBF] {2}| [\XC0-\XDF] [\X80-\XBF]) +$ "
If Reg. Test (s) Then cs = "UTF-8"
Set reg = Nothing
Dim sm
Set sm = CreateObject ("ADODB". Stream ")
with SM
. Type = 2
. Mode = 3
. Open
. CharSet = "Iso-8859-1"
. WRITETEXT s
. Position = 0
. CharSet = cs
decodeURI =. ReadText ( -1)
. Close
End With
Set sm = Nothing
End Function
Response.Write decodeURI ("%b8%a7%cb%b3%c7%e0%cb%c9%d2 %a9%d2%b5 ")
Response.Write decodeURI ("%e6%8a%9a%e9%a1%ba%e9%9d%92%e6%9d%be%e8%8d%af%e4%b8%9a ")