Therefore, you must enter a search keyword in google by using the encoded keyword, for example, "Resolution keyword encoding, encoded "% E8 % A7 % A3 % E6 % 9E % 90% E5 % 85% B3 % E9 % 94% AE % E5 % AD % 97% E7 % BC % 96% E7 % A0 "% 81"
1. parse the keyword from the above address.
2. Get the encoding name (such as gbk and UTF-8) through the encoded keywords)
3. Use URLdecode (keywords, encodeCode) to decode the corresponding keywords.
The following is the implementation of java code:
Copy codeThe Code is as follows: package test;
Import java. io. UnsupportedEncodingException;
Import java.net. URLDecoder;
Import java. util. regex. Matcher;
Import java. util. regex. Pattern;
Public class ParseURLKeyword {
Public static void main (String [] args ){
String url = "http://www.google.co.kr/search? Hl = en & q = % ED % 95% 9C % EA % B5 % AD % EC % 96% B4 + & btnG = Google + Search & aq = f & oq = ";
System. out. println (ParseURLKeyword. getKeyword (url ));
System. out. println ("");
Url = "http://www.google.cn/search? Q = % E6 % 8F % 90% E5 % 8F % 96 + % E6 % 90% 9C % E7 % B4 % A2 % E5 % BC % 95% E6 % 93% 8E + % E5 % 85% b3 % E9 % 94% AE % E5 % AD % 97 & hl = zh-CN & newwindow = 1 & sa = 2 ";
System. out. println (ParseURLKeyword. getKeyword (url ));
System. out. println ("");
Url = "http://www.google.com.tw/search? Hl = zh-CN & q = % E6 % B9 % 98% E9 % 8B % BC % E4 % B8 % AD % E5 % 9C % 8B % E9 % A6 % 99% E7 % 85% 99 & btnG = Google + % E6 % 90% 9C % E7 % B4 % A2 & aq = f & oq = ";
System. out. println (ParseURLKeyword. getKeyword (url ));
System. out. println ("");
Url = "http://www.baidu.com/s? Wd = % D6 % D0 % 87% F8 % D3 % D0 % BE % 80% D8 % 9F % C8 % CE % B9 % AB % CB % BE ";
System. out. println (ParseURLKeyword. getKeyword (url ));
System. out. println ("");
Url = "http://www.baidu.com/s? Wd = % C6 % F3 % D2 % B5 % CD % C6 % B9 % E3 ";
System. out. println (ParseURLKeyword. getKeyword (url ));
System. out. println ("");
}
Public static String getKeyword (String url ){
String keywordReg = "(? : Yahoo. +? [\\? | &] P = | openfind. +? Query = | google. +? Q = | lycos. +? Query = | onseek. +? Keyword = | search \. tom. +? Word = | search \. qq \. com. +? Word = | zhongsou \. com. +? Word = | search \. msn \. com. +? Q = | yisou \. com. +? P = | sina. +? Word = | sina. +? Query = | sina. +? _ Searchkey = | sohu. +? Word = | sohu. +? Key_word = | sohu. +? Query = | 163. +? Q = | baidu. +? Wd = | soso. +? W = | 3721 \. com. +? P = | Alltheweb. +? Q =) ([^ &] *) ";
String encodeReg = "^ (? : [\ X00-\ x7f] | [\ xfc-\ xff] [\ x80-\ xbf] {5} | [\ xf8-\ xfb] [\ x80-\ xbf] {4} | [\ xf0-\ xf7] [\ x80-\ xbf] {3} | [\ xe0 -\ xef] [\ x80-\ xbf] {2} | [\ xc0-\ xdf] [\ x80-\ xbf]) + $ ";
Pattern keywordPatt = Pattern. compile (keywordReg );
StringBuffer keyword = new StringBuffer (20 );
Matcher keywordMat = keywordPatt. matcher (url );
While (keywordMat. find ()){
KeywordMat. appendReplacement (keyword, "$1 ");
}
If (! Keyword. toString (). equals ("")){
String keywordsTmp = keyword. toString (). replace ("http: // www .","");
Pattern encodePatt = Pattern. compile (encodeReg );
String unescapeString = ParseURLKeyword. unescape (keywordsTmp );
Matcher encodeMat = encodePatt. matcher (unescapeString );
String encodeString = "gbk ";
If (encodeMat. matches () encodeString = "UTF-8 ";
Try {
Return URLDecoder. decode (keywordsTmp, encodeString );
} Catch (UnsupportedEncodingException e ){
Return "";
}
}
Return "";
}
Public static String unescape (String src ){
StringBuffer tmp = new StringBuffer ();
Tmp. ensureCapacity (src. length ());
Int lastPos = 0, pos = 0;
Char ch;
While (lastPos <src. length ()){
Pos = src. indexOf ("%", lastPos );
If (pos = lastPos ){
If (src. charAt (pos + 1) = 'U '){
Ch = (char) Integer. parseInt (src. substring (pos + 2, pos + 6), 16 );
Tmp. append (ch );
LastPos = pos + 6;
} Else {
Ch = (char) Integer. parseInt (src. substring (pos + 1, pos + 3), 16 );
Tmp. append (ch );
LastPos = pos + 3;
}
} Else {
If (pos =-1 ){
Tmp. append (src. substring (lastPos ));
LastPos = src. length ();
} Else {
Tmp. append (src. substring (lastPos, pos ));
LastPos = pos;
}
}
}
Return tmp. toString ();
}
}
The following is the implementation code of Asp:Copy codeThe Code is as follows: Function DecodeURI (s)
S = UnEscape (s)
Dim reg, cs
Cs = "GBK"
Set reg = New RegExp
Reg. Pattern = "^ (? : [\ X00-\ x7f] | [\ xfc-\ xff] [\ x80-\ xbf] {5} | [\ xf8-\ xfb] [\ x80-\ xbf] {4} | [\ xf0-\ xf7] [\ x80-\ xbf] {3} | [\ xe0-\ xef] [\ x80-\ xbf] {2} | [\ xc0-\ xdf] [\ x80-\ xbf]) + $"
If reg. Test (s) Then cs = "UTF-8"
Set reg = Nothing
Dim sm
Set sm = CreateObject ("ADODB. Stream ")
With sm
. Type = 2
. Mode = 3
. Open
. CharSet = "iso-8859-1"
. WriteText s
. Position = 0
. CharSet = cs
DecodeURI =. ReadText (-1)
. Close
End
Set sm = Nothing
End Function
Response. Write DecodeURI ("% B8 % A7 % CB % B3 % C7 % E0 % CB % C9 % D2 % A9 % D2 % B5 ")
Response. write DecodeURI ("% E6 % 8A % 9A % E9 % A1 % BA % E9 % 9D % 92% E6 % 9D % BE % E8 % 8D % AF % E4 % B8 % 9A")