I do not know why the major search engine code is not the same. Of course not gb2312 is utf-8. Coding problem is the problem of more headaches ... The headache is not fatal ...
We get keywords that are typically analyzed by the URL of the visiting page.
Http://www.google.com/search?hl=zh-CN&q=%E5%AD%A4%E7%8B%AC&lr=
You must know this is encoded by UrlEncode.
We get the information and we need to take 2 steps. The first step is to carry out the urldecode, while our normal parameters are alive, this is done by the ASP itself, but now we have to hand-decode.
There are many online functions, but they are all aimed at gb2312 page solution Gb2312.utf-8. For this, we can easily decode the advanced line, and then according to the search engine to judge its encoding, if it is utf-8 then convert to gb2312.
But because my site is utf-8 page. and Utf-8 page I found only utf-8 characters urldecode encoded. I paused here for a long time, and finally I can only use the worst way to submit the split keyword to a XMLHTTP ASP page. , and then live garbled (gb2312) after the gb2312 to Utf-8 conversion.
The following main implementation code.
Public Function Getsearchkeyword (refererurl) ' Search keywords
If Refererurl= "" or Len (refererurl) <1 then Exit function
On Error Resume Next
Dim RE
Set re = New RegExp
Re. IgnoreCase = True
Re. Global = True
Dim A,b,j
' Fuzzy Lookup keyword, this method is faster, the scope is also larger
Re. Pattern = "(word= ([^&]*) |q= ([^&]*) |p= ([^&]*) |query= ([^&]*) |name= ([^&]*) |_searchkey= [^&] *) |baidu.*?w= ([^&]*)] "
Set A = Re. Execute (Refererurl)
If A.count>0 Then
Set B = A (a.count-1). Submatches
For J=1 to B.count
If Len (b (j)) >0 Then
If InStr (1,refererurl, "Google", 1) Then
Getsearchkeyword=trim (U8decode (b (j)))
ElseIf InStr (1,refererurl, "Yahoo", 1) Then
Getsearchkeyword=trim (U8decode (b (j)))
ElseIf InStr (1,refererurl, "Yisou", 1) Then
Getsearchkeyword=trim (Getkey (b (j)))
ElseIf InStr (1,refererurl, "3721", 1) Then
Getsearchkeyword=trim (Getkey (b (j)))
Else
Getsearchkeyword=trim (Getkey (b (j)))
End If
Exit Function
End If
Next
End If
If Err Then
Err.Clear
Getsearchkeyword = Refererurl
Else
Getsearchkeyword = ""
End If
End Function
Function urlencoding (Vstrin)
Dim strreturn,i,thischr
Strreturn = ""
For i = 1 to Len (Vstrin)
THISCHR = Mid (vstrin,i,1)
If Abs (ASC (THISCHR)) < &hff Then
Strreturn = Strreturn & THISCHR
Else
Innercode = ASC (THISCHR)
If Innercode < 0 Then
Innercode = Innercode + &h10000
End If
Hight8 = (Innercode and &hff00) \ &hff
Low8 = Innercode and &hff
Strreturn = strreturn & "%" & Hex (HIGHT8) & "%" & Hex (LOW8)
End If
Next
urlencoding = Strreturn
End Function
function Getkey (key)
Dim oreq
Set oreq = CreateObject ("MSXML2. XMLHTTP ")
Oreq.open "POST", "http://" &WebUrl& "/system/showgb2312xml.asp?a=" &key,false
Oreq.send
GETKEY=UTF2GB (Oreq.responsetext)
End Function
function Chinese2unicode (STR)
Dim i
Dim Str_one
Dim Str_unicode
For I=1 to Len (STR)
Str_one=mid (str,i,1)
STR_UNICODE=STR_UNICODE&CHR (38)
STR_UNICODE=STR_UNICODE&CHR (35)
STR_UNICODE=STR_UNICODE&CHR (120)
str_unicode=str_unicode& Hex (AscW (Str_one))
STR_UNICODE=STR_UNICODE&CHR (59)
Next
Response.Write Str_unicode
End Function
function UTF2GB (UTFSTR)
Dim Dig,gbstr
For Dig=1 to Len (UTFSTR)
If mid (utfstr,dig,1) = "%" Then
If Len (utfstr) >= dig+8 Then
Gbstr=gbstr & Convchinese (Mid (utfstr,dig,9))
Dig=dig+8
Else
Gbstr=gbstr & Mid (utfstr,dig,1)
End If
Else
Gbstr=gbstr & Mid (utfstr,dig,1)
End If
Next
Utf2gb=gbstr
End Function
function Convchinese (x)
Dim A,i,j,digs,unicode
A=split (Mid (x,2), "%")
I=0
J=0
For i=0 to UBound (A)
A (i) =c16to2 (A (i))
Next
For i=0 to UBound (A)-1
Digs=instr (A (i), "0")
Unicode= ""
For J=1 to DigS-1
If J=1 Then
A (i) =right (A (i), Len (A (i))-digs)
Unicode=unicode & A (i)
Else
I=i+1
A (i) =right (A (i), Len (A (i))-2)
Unicode=unicode & A (i)
End If
Next
If Len (c2to16 (Unicode)) =4 Then
Convchinese=convchinese & ChrW (int ("&h" & C2to16 (Unicode))
Else
Convchinese=convchinese & chr (int ("&h" & C2to16 (Unicode))
End If
Next
End Function
function U8decode (ENSTR)
' Enter a bunch of%-delimited strings, divide them into arrays, and judge the completion rules according to the UTF8 rules.
' Input: Guan E5 B3 key E9 AE word E5 AD 97
' Output: Off B9d8 key BCFC word d7d6
Dim C,i,i2,v,destr,weis
For I=1 to Len (ENSTR)
C=mid (enstr,i,1)
If c= "%" then
V=c16to2 (Mid (enstr,i+1,2))
' Judging the first occurrence of the 0 position,
' may be 1 (single-byte), 3 (3-1 bytes), 4,5,6,7 cannot be 2 and greater than 7
' Theoretically to 7, it doesn't actually exceed 3.
Weis=instr (V, "0")
V=right (V,len (v)-weis) ' The first one to remove the leftmost WeiS
I=i+3
For i2=2 to WeiS-1
C=c16to2 (Mid (enstr,i+1,2))
C=right (C,len (c)-2) ' The rest remove the two left
V=v & C
I=i+3
Next
If Len (c2to16 (v)) =4 Then
Destr=destr & ChrW (C2to10 (v))
Else
Destr=destr & Chr (C2to10 (v))
End If
I=i-1
Else
If c= "+" then
destr=destr& ""
Else
Destr=destr&c
End If
End If
Next
U8decode = Destr
End Function
function C16to2 (x)
' This function is used to convert 16 to 2, can be any length, the general conversion UTF-8 time is two lengths, such as A9
For example: Input "C2", converted to "11000010", where 1100 is "C" is 10 in 12 (1100), then 2 (10) less than 4 digits to complement into equal (0010).
Dim tempstr
Dim i:i=0 ' Temporary Pointer
For I=1 to Len (Trim (x))
Tempstr= C10to2 (CInt (int ("&h" & Mid (x,i,1)))
Do While Len (TEMPSTR) <4
Tempstr= "0" & tempstr ' 4 digits if less than 4 digits
Loop
C16to2=c16to2 & TempStr
Next
End Function
function c2to16 (x)
' 2 to 16 conversion, every 4 0 or 1 converted into a 16-letter, the input length of course is not a multiple of 4
Dim I:i=1 ' Temporary Pointer
For I=1 to Len (x) Step 4
C2to16=c2to16 & Hex (C2to10 (Mid (x,i,4)))
Next
End Function
function C2to10 (x)
' Pure 2 to 10 conversion, regardless of the transfer of 16 to the required 4-digit first 0 is made up.
' Because this function is very useful! It will be used later, people who have done communications and hardware should know.
' Here is a string representing the binary
C2to10=0
If x= "0" Then Exit Function ' If it's 0, direct 0 is done.
Dim i:i=0 ' Temporary Pointer
For i= 0 to Len (x)-1 ' Otherwise use 8421 yards, this from the time I first learn the computer will be, I miss the original teach our Shi Daojian old man Ah!
If Mid (X,len (x)-i,1) = "1" then c2to10=c2to10+2^ (i)
Next
End Function
function C10to2 (x)
' 10 binary to 2 ' conversion
Dim sign, Result
result = ""
' Symbol
Sign = SGN (x)
x = ABS (x)
If x = 0 Then
C10to2 = 0
Exit function
End If
Do Until x = "0"
Result = result & (x mod 2)
x = x \2
Loop
result = StrReverse (Result)
If sign =-1 Then
C10to2 = "-" & Result
Else
C10to2 = result
End If
End Function
function UrlDecode (ENSTR)
Dim destr,strspecial
Dim c,i,v
Destr= ""
Strspecial= "!" " #$%& ' () *+,/:;<=>?@[\]^ ' {|} ~%"
For I=1 to Len (ENSTR)
C=mid (enstr,i,1)
If c= "%" then
V=eval ("&h" +mid (enstr,i+1,2))
If InStr (STRSPECIAL,CHR (v)) >0 Then
DESTR=DESTR&CHR (v)
I=i+2
Else
V=eval ("&h" +mid (enstr,i+1,2) +mid (enstr,i+4,2))
DESTR=DESTR&CHR (v)
I=i+5
End If
Else
If c= "+" then
destr=destr& ""
Else
Destr=destr&c
End If
End If
Next
Urldecode=destr
End Function