<% @ LANGUAGE = "VBSCRIPT" CODEPAGE = "936" %>
<! -- # Include file = "conn. asp" -->
<! -- # Include file = "inc/function. asp" -->
<! Doctype html public "-// W3C // dtd html 4.01 Transitional // EN" "http://www.w3.org/TR/html4/loose.dtd">
<Html>
<Head>
<Title> Untitled Document </title>
<Meta http-equiv = "Content-Type" content = "text/html; charset = gb2312">
<Meta http-equiv = "refresh" content = "300; URL = steal_house.asp">
</Head>
<Body>
<%
On error resume next
'
Server. ScriptTimeout = 999999
'================================================ ========================
'Character encoding Functions
'================================================ ==================
Function BytesToBstr (body, code)
Dim objstream
Set objstream = Server. CreateObject ("adodb. stream ")
Objstream. Type = 1
Objstream. Mode = 3
Objstream. Open
Objstream. Write body
Objstream. Position = 0
Objstream. Type = 2
Objstream. Charset = code
BytesToBstr = objstream. ReadText
Objstream. Close
Set objstream = nothing
End Function
'Position where the row string appears in another string
Function Newstring (wstr, strng)
Newstring = Instr (lcase (wstr), lcase (strng ))
If Newstring <= 0 then Newstring = Len (wstr)
End Function
'Replacement string Function
Function ReplaceStr (ori, str1, str2)
ReplaceStr = replace (ori, str1, str2)
End function
'================================================ ==================
Function ReadXml (url, code, start, ends)
Set oSend = createobject ("Microsoft. XMLHTTP ")
SourceCode = oSend. open ("GET", url, false)
OSend. send ()
ReadXml = BytesToBstr (oSend. responseBody, code)
Start = Instr (ReadXml, start)
ReadXml = mid (ReadXml, start)
Ends = Instr (ReadXml, ends)
ReadXml = left (ReadXml, ends-1)
End function
Function SubStr (body, start, ends)
Start = Instr (body, start)
SubStr = mid (body, start + len (start) + 1)
Ends = Instr (SubStr, ends)
SubStr = left (SubStr, ends-1)
End function
Dim getcont, NewsContent
Dim url, title
Url = "http: // www. ***. com" 'news url knowsky.com
Getcont = ReadXml (url, "gb2312", "<table class = k2 border =" 0 "," </table> ")
Getcont = RegexHtml (getcont)
Dim KeyId, NewsClass, City, Position, HouseType, Level, Area, Price, Demostra
Dim ContactMan, Contact
For I = 2 to ubound (getcont)
Response. Write (getcont (I) & "__< br> ")
TempLink = mid (getcont (I), instr (getcont (I), "href =" ") + 6, instr (getcont (I)," onClick ") -10)
TempLink = replace (tempLink ,"../","")
Response. Write (I & ":" & tempLink & "<br> ")
NewsContent = ReadXml (tempLink, "gb2312", "<td valign =" "bottom" "width =" 400 "> ", "NewsContent = RemoveHtml (NewsContent)
NewsContent = replace (NewsContent, VbCrLf ,"")
NewsContent = replace (NewsContent, vbNewLine ,"")
NewsContent = replace (NewsContent ,"","")
NewsContent = replace (NewsContent ,"","")
NewsContent = replace (NewsContent ,"","")
NewsContent = replace (NewsContent, "\ n ","")
NewsContent = replace (NewsContent, chr (10 ),"")
NewsContent = replace (NewsContent, chr (13 ),"")
'= Get Content = =
Response. Write (NewsContent)
KeyId = SubStr (NewsContent, "column number:", "Information category :")
NewsClass = SubStr (NewsContent, "category:", "City :")
City = SubStr (NewsContent, "City:", "Location :")
Position = SubStr (NewsContent, "Location:", "housing type :")
HouseType = SubStr (NewsContent, "type:", "floor :")
Level = SubStr (NewsContent, "floor:", "Use Area :")
Area = SubStr (NewsContent, "Area:", "Price :")
Price = SubStr (NewsContent, "Price:", "other instructions :")
Demostra = SubStr (NewsContent, "NOTE:", "Contact :")
ContactMan = SubStr (NewsContent, "Contact:", "contact info :")
Contact = SubStr (NewsContent, "Contact information:", "Information Source :")
Response. Write ("total serial number:" & KeyId & "<br> ")
Response. Write ("Information category:" & NewsClass & "<br> ")
Response. Write ("City:" & City & "<br> ")
Response. Write ("Location:" & Position & "<br> ")
Response. Write ("house type:" & HouseType & "<br> ")
Response. Write ("floor:" & Level & "<br> ")
Response. Write ("Area used:" & Area & "<br> ")
Response. Write ("Price:" & Price & "<br> ")
Response. Write ("other instructions:" & Demostra & "<br> ")
Response. Write ("Contact:" & ContactMan & "<br> ")
Response. Write ("Contact info:" & Contact & "<br> ")
'Title = RemoveHTML (aa (I ))
'Response. Write ("title:" & title)
For n = 0 to application. Contents. count
If (application. Contents (n) = KeyId) then
Ifexit = true
End if
Next
If not ifexit then
Application (time & I) = KeyId
'Add to database
'================================================ ==================
Set rs = server. CreateObject ("adodb. recordset ")
Rs. open "select top 1 * from news order by id desc", conn, 3,3
Rs. addnew
Rs ("NewsClass") = NewsClass
Rs ("City") = City
Rs ("Position") = Position
Rs ("HouseType") = HouseType
Rs ("Level") = Level
Rs ("Area") = Area
Rs ("Price") = Price
Rs ("Demostra") = Demostra
Rs ("ContactMan") = ContactMan
Rs ("Contact") = Contact
Rs. update
Rs. close
Set rs = nothing
End if
'================================================ ==============
Next
Function RemoveTag (body)
Set regEx = New RegExp
RegEx. Pattern = "<[a]. *? <\/[A]>"
RegEx. IgnoreCase = True
RegEx. Global = True
Set Matches = regEx. Execute (body)
Dim I, arr (15), ifexit
I = 0
J = 0
For Each Match in Matches
TempStr = Match. Value
TempStr = replace (TempStr, "<td> ","")
TempStr = replace (TempStr, "</td> ","")
TempStr = replace (TempStr, "<tr> ","")
TempStr = replace (TempStr, "</tr> ","")
Arr (I) = TempStr
I = I + 1
If (I> = 15) then
Exit
End if
Next
Set regEx = nothing
Set Matches = nothing
RemoveTag = arr
End function
Function RegexHtml (body)
Dim r_arr (47), r_temp
Set regEx2 = New RegExp
RegEx2.Pattern = "<.*? <\/A>"
RegEx2.IgnoreCase = True
RegEx2.Global = True
Set Matches2 = regEx2.Execute (body)
Iii = 0
For Each Match in Matches2
R_arr (iii) = Match. Value
Iii = iii + 1
Next
RegexHtml = r_arr
Set regEx2 = nothing
Set Matches2 = nothing
End function
'================================================ ====================
Conn. close
Set conn = nothing
%>
</Body>
</Html>
Function. asp
<%
'*************************************** ***********
'Function name: gotTopic
'Usage: String truncation. One Chinese character is counted as two characters, and one English character is counted as one character.
'Parameter: str ---- original string
'Strlen ---- truncation Length
'Return value: the intercepted string
'*************************************** ***********
Function gotTopic (str, strlen)
If str = "" then
GotTopic = ""
Exit function
End if
Dim l, t, c, I
Str = replace (str, "", ""), ", chr (34),"> ","> "), "<", "<")
Str = replace (str ,"? ","")
L = len (str)
T = 0
For I = 1 to l
C = Abs (Asc (Mid (str, I, 1 )))
If c & gt; 255 then
T = t + 2
Else
T = t + 1
End if
If t> = strlen then
GotTopic = left (str, I )&"... "
Exit
Else
GotTopic = str
End if
Next
GotTopic = replace (gotTopic, "", ""), chr (34), "),"> ","> "), "<", "<")
End function
'================================================ ============================
'Function: RemoveHTML (strHTML)
'Function: Remove HTML tags
'Parameter: strHTML -- string to remove HTML tags
'================================================ ============================
Function RemoveHTML (strHTML)
Dim objRegExp, Match, Matches
Set objRegExp = New Regexp
ObjRegExp. IgnoreCase = True
ObjRegExp. Global = True
'Take the closed <>
ObjRegExp. Pattern = "<. +?> "
'For matching
Set Matches = objRegExp. Execute (strHTML)
'Traverse the matching set and replace the matched items.
For Each Match in Matches
StrHtml = Replace (strHTML, Match. Value ,"")
Next
RemoveHTML = strHTML
Set objRegExp = Nothing
Set Matches = nothing
End Function
%>
Conn. asp
<%
'On error resume next
Set conn = server. CreateObject ("adodb. connection ")
Con = "driver = {Microsoft Access Driver (*. mdb)}; dbq =" & Server. MapPath ("stest. mdb ")
Conn. open con
Sub connclose
Conn. close
Set conn = nothing
End sub
%>
Appendix: detailed page examples for capturing information
Total serial number: |
479280 |
Information type: |
Rent |
City: |
Jinan |
Location: |
Junction of Huaxin Road, hualong Road |
House type: |
Others |
Floor: |
Layer 6 |
Use Area: |
24 ~ Between 240 m² |
Price: |
0 [Lease: RMB/month, sales: RMB/set] |
Other Instructions: |
Small spaces on the 3 to 6 floors of Huaxin business building are rented out (0.5 RMB/level). This building is for pure commercial office investment and can be used in office rooms, complete surrounding facilities, convenient transportation (37, 80, and K95 pass in front of the building), full property rights, city permit. Facilities in the building include water, electricity, heating, and elevator facilities. If you are interested, you can call us! |
Contact: |
Lu, Wang |
Contact info: |
88017966, 86812217 |
Information Source: |
8:28:55 from: 218.98.86.175 |
Clicks: |
19 |