<% @ LANGUAGE = "VBSCRIPT" CODEPAGE = "936" %>
<! -- # Include file = "conn. ASP" -->
<! -- # Include file = "inc/function. asp" -->
<! Doctype html public "-// W3C // dtd html 4.01 Transitional // EN" "http://www.w3.org/TR/html4/loose.dtd">
<Html>
<Head>
<Title> Untitled Document </title>
<Meta http-equiv = "Content-Type" content = "text/html; charset = gb2312">
<Meta http-equiv = "refresh" content = "300; URL = steal_house.asp">
</Head>
<Body>
<%
On error resume next
'
Server. ScriptTimeout = 999999
'================================================ ========================
'Character encoding Functions
'================================================ ==================
Function BytesToBstr (body, code)
Dim objstream
Set objstream = Server. createObject ("adodb. stream ")
Objstream. Type = 1
Objstream. Mode = 3
Objstream. Open
Objstream. Write body
Objstream. Position = 0
Objstream. Type = 2
Objstream. Charset = code
BytesToBstr = objstream. ReadText
Objstream. Close
Set objstream = nothing
End Function
'Position where the row string appears in another string
Function Newstring (wstr, strng)
Newstring = Instr (lcase (wstr), lcase (strng ))
If Newstring <= 0 then Newstring = Len (wstr)
End Function
'Replacement string Function
Function ReplaceStr (ori, str1, str2)
ReplaceStr = replace (ori, str1, str2)
End function
'================================================ ==================
Function ReadXML (url, code, start, ends)
Set oSend = createobject ("Microsoft. XMLHTTP ")
SourceCode = oSend. open ("GET", url, false)
OSend. send ()
ReadXml = BytesToBstr (oSend. responseBody, code)
Start = Instr (ReadXml, start)
ReadXml = mid (ReadXml, start)
Ends = Instr (ReadXml, ends)
ReadXml = left (ReadXml, ends-1)
End function
Function SubStr (body, start, ends)
Start = Instr (body, start)
SubStr = mid (body, start + len (start) + 1)
Ends = Instr (SubStr, ends)
SubStr = left (SubStr, ends-1)
End function
Dim getcont, NewsContent
Dim url, title
Url = "http: // www. ***. com" 'news url knowsky.com
Getcont = ReadXml (url, "gb2312", "<table class = k2 border =" 0 "," </table> ")
Getcont = RegexHtml (getcont)
Dim KeyId, NewsClass, City, Position, HouseType, Level, Area, Price, Demostra
Dim ContactMan, Contact
For I = 2 to ubound (getcont)
Response. Write (getcont (I) & "__< br> ")
TempLink = mid (getcont (I), instr (getcont (I), "href =" ") + 6, instr (getcont (I)," onClick ") -10)
TempLink = replace (tempLink ,"../","")
Response. Write (I & ":" & tempLink & "<br> ")
NewsContent = ReadXml (tempLink, "gb2312", "<td valign =" "bottom" "width =" 400 "> ", "NewsContent = RemoveHtml (NewsContent)
NewsContent = replace (NewsContent, VbCrLf ,"")
NewsContent = replace (NewsContent, vbNewLine ,"")
NewsContent = replace (NewsContent ,"","")
NewsContent = replace (NewsContent ,"","")
NewsContent = replace (NewsContent ,"","")
NewsContent = replace (NewsContent, "\ n ","")
NewsContent = replace (NewsContent, chr (10 ),"")
NewsContent = replace (NewsContent, chr (13 ),"")
'= Get Content = =
Response. Write (NewsContent)
KeyId = SubStr (NewsContent, "column number:", "Information category :")
NewsClass = SubStr (NewsContent, "category:", "City :")
City = SubStr (NewsContent, "City:", "Location :")
Position = SubStr (NewsContent, "Location:", "housing type :")
HouseType = SubStr (NewsContent, "type:", "floor :")
Level = SubStr (NewsContent, "floor:", "Use Area :")
Area = SubStr (NewsContent, "Area:", "Price :")
Price = SubStr (NewsContent, "Price:", "other instructions :")
Demostra = SubStr (NewsContent, "NOTE:", "Contact :")
ContactMan = SubStr (NewsContent, "Contact:", "contact info :")
Contact = SubStr (NewsContent, "Contact information:", "Information Source :")
Response. Write ("total serial number:" & KeyId & "<br> ")
Response. Write ("Information category:" & NewsClass & "<br> ")
Response. Write ("City:" & City & "<br> ")
Response. Write ("Location:" & Position & "<br> ")
Response. Write ("house type:" & HouseType & "<br> ")
Response. Write ("floor:" & Level & "<br> ")
Response. Write ("Area used:" & Area & "<br> ")
Response. Write ("Price:" & Price & "<br> ")
Response. Write ("other instructions:" & Demostra & "<br> ")
Response. Write ("Contact:" & ContactMan & "<br> ")
Response. Write ("Contact info:" & Contact & "<br> ")
'Title = RemoveHTML (aa (I ))
'Response. Write ("title:" & title)
For n = 0 to application. Contents. count
If (application. Contents (n) = KeyId) then
Ifexit = true
End if
Next
If not ifexit then
Application (time & I) = KeyId
'Add to database
'================================================ ==================
Set rs = server. createObject ("adodb. recordset ")
Rs. open "select top 1 * from news order by id desc", conn, 3,3
Rs. addnew
Rs ("NewsClass") = NewsClass
Rs ("City") = City
Rs ("Position") = Position
Rs ("HouseType") = HouseType
Rs ("Level") = Level
Rs ("Area") = Area
Rs ("Price") = Price
Rs ("Demostra") = Demostra
Rs ("ContactMan") = ContactMan
Rs ("Contact") = Contact
Rs. update
Rs. close
Set rs = nothing
End if
'================================================ ==============
Next
Function RemoveTag (body)
Set regEx = New RegExp
RegEx. Pattern = "<[a]. *? <\/[A]>"
RegEx. IgnoreCase = True
RegEx. Global = True
Set Matches = regEx. Execute (body)
Dim I, arr (15), ifexit
I = 0
J = 0
For Each Match in Matches
TempStr = Match. Value
TempStr = replace (TempStr, "<td> ","")
TempStr = replace (TempStr, "</td> ","")
TempStr = replace (TempStr, "<tr> ","")
TempStr = replace (TempStr, "</tr> ","")
Arr (I) = TempStr
I = I + 1
If (I> = 15) then
Exit
End if
Next
Set regEx = nothing
Set Matches = nothing
RemoveTag = arr
End function
Function RegexHtml (body)
Dim r_arr (47), r_temp
Set regEx2 = New RegExp
RegEx2.Pattern = "<.*? <\/A>"
RegEx2.IgnoreCase = True
RegEx2.Global = True
Set Matches2 = regEx2.Execute (body)
Iii = 0
For Each Match in Matches2
R_arr (iii) = Match. Value
Iii = iii + 1
Next
RegexHtml = r_arr
Set regEx2 = nothing
Set Matches2 = nothing
End function
'================================================ ====================
Conn. close
Set conn = nothing
%>
</Body>
</Html>
Function. asp
<%
'*************************************** ***********
'Function name: gotTopic
'Usage: String truncation. One Chinese character is counted as two characters, and one English character is counted as one character.
'Parameter: str ---- original string
'Strlen ---- truncation Length
'Return value: the intercepted string
'*************************************** ***********
Function gotTopic (str, strlen)
If str = "" then
GotTopic = ""
Exit function
End if
Dim l, t, c, I
Str = replace (str, "", ""), ", chr (34),"> ","> "), "<", "<")
Str = replace (str ,"? ","")
L = len (str)
T = 0
For I = 1 to l
C = Abs (Asc (Mid (str, I, 1 )))
If c & gt; 255 then
T = t + 2
Else
T = t + 1
End if
If t> = strlen then
GotTopic = left (str, I )&"... "
Exit
Else
GotTopic = str
End if
Next
GotTopic = replace (gotTopic, "", ""), chr (34), "),"> ","> "), "<", "<")
End function
'================================================ ============================
'Function: RemoveHTML (strHTML)
'Function: Remove HTML tags
'Parameter: strHTML -- string to remove HTML tags
'================================================ ============================
Function RemoveHTML (strHTML)
Dim objRegExp, Match, Matches
Set objRegExp = New Regexp
ObjRegExp. IgnoreCase = True
ObjRegExp. Global = True
'Take the closed <>
ObjRegExp. Pattern = "<. +?> "
'For matching
Set Matches = objRegExp. Execute (strHTML)
'Traverse the matching set and replace the matched items.
For Each Match in Matches
StrHtml = Replace (strHTML, Match. Value ,"")
Next
RemoveHTML = strHTML
Set objRegExp = Nothing
Set Matches = nothing
End Function
%>
Conn. asp
<%
'On error resume next
Set conn = server. createObject ("adodb. connection ")
Con = "driver = {Microsoft Access Driver (*. mdb)}; dbq =" & Server. MapPath ("stest. mdb ")
Conn. open con
Sub connclose
Conn. close
Set conn = nothing
End sub
%>