The ASP collects the code that crawls the online real Estate Information _ thief/Collect

Source: Internet
Author: User
Copy Code code as follows:

<% @LANGUAGE = "VBSCRIPT" codepage= "936"%>
<!--#include file= "conn.asp"-->
<!--#include file= "inc/function.asp"-->
<! DOCTYPE HTML PUBLIC "-//w3c//dtd HTML 4.01 transitional//en"
"Http://www.w3.org/TR/html4/loose.dtd" >
<title>untitled document</title>
<meta http-equiv= "Content-type" content= "text/html; charset=gb2312 ">
<meta http-equiv= "Refresh" content= "300; Url=steal_house.asp ">
<body>
<%
On Error Resume Next
'
Server.ScriptTimeout = 999999
'========================================================
' Character encoding function
'====================================================
Function Bytestobstr (Body,code)
Dim objstream
Set objstream = Server.CreateObject ("ADODB.stream")
Objstream. Type = 1
Objstream. Mode =3
Objstream. Open
Objstream. Write body
Objstream. Position = 0
Objstream. Type = 2
Objstream. Charset =code
Bytestobstr = objstream. ReadText
Objstream. Close
Set objstream = Nothing
End Function
The position in another string where the ' fetch ' string appears
Function newstring (WSTR,STRNG)
Newstring=instr (LCase (WSTR), LCase (STRNG))
If Newstring<=0 then Newstring=len (WSTR)
End Function
' Replace string function
function Replacestr (ORI,STR1,STR2)
Replacestr=replace (ORI,STR1,STR2)
End Function
'====================================================
function ReadXml (url,code,start,ends)
Set Osend=createobject ("Microsoft.XMLHTTP")
SourceCode = Osend.open ("Get", Url,false)
Osend.send ()
Readxml=bytestobstr (Osend.responsebody,code)
Start=instr (Readxml,start)
Readxml=mid (Readxml,start)
Ends=instr (Readxml,ends)
Readxml=left (readxml,ends-1)
End Function
function SubStr (body,start,ends)
Start=instr (Body,start)
Substr=mid (Body,start+len (start) +1)
Ends=instr (Substr,ends)
Substr=left (substr,ends-1)
End Function
Dim getcont,newscontent
Dim url,title
Url= "http://www.***.com" ' news website
Getcont=readxml (URL, "gb2312", "<table class=k2 border=" "0" "", "</table>")
Getcont=regexhtml (Getcont)
Dim Keyid,newsclass,city,position,housetype,level,area,price,demostra
Dim contactman,contact
For i=2 to UBound (Getcont)
Response. Write (Getcont (i) & "__<br>")

Templink=mid (Getcont (i), InStr (Getcont (i), "href=" "") +6,instr (Getcont (i), "" "
OnClick ")-10)
Templink=replace (Templink, ". /","")

Response. Write (i& ":" &tempLink& "<br>")
Newscontent=readxml (Templink, "gb2312", "<td valign=" "Bottom" "
Width= "" > "," NoShade size= "1" "color=" "#808080" ">
")
Newscontent=removehtml (newscontent)
Newscontent=replace (Newscontent,vbcrlf, "")
Newscontent=replace (Newscontent,vbnewline, "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (newscontent, "\ n", "")
Newscontent=replace (NEWSCONTENT,CHR (10), "")
Newscontent=replace (NEWSCONTENT,CHR (13), "")
' ===============get content=======================
Response. Write (newscontent)
Keyid=substr (newscontent, "column number:", "Information Category:")
Newsclass=substr (Newscontent, "Category:", "City:")
City=substr (Newscontent, "City:", "House Specific location:")
Position=substr (Newscontent, "Location:", "House type:")
Housetype=substr (Newscontent, "type:", "Floor:")
Level=substr (Newscontent, "Floor:", "usable area:")
Area=substr (Newscontent, "area:", "Price:")
Price=substr (Newscontent, "Price:", "Other Description:")
Demostra=substr (newscontent, "description:", "Contact:")
Contactman=substr (Newscontent, "Contact:", "Contact:")
Contact=substr (Newscontent, "Contact:", "Source of information:")
Response. Write ("Total serial number:" &KeyId& "<br>")
Response. Write ("Information Category:" &NewsClass& "<br>")
Response. Write ("City:" &City& "<br>")
Response. Write ("Home Specific location:" &Position& "<br>")
Response. Write ("House type:" &HouseType& "<br>")
Response. Write ("Floor:" &Level& "<br>")
Response. Write ("Usable area:" &Area& "<br>")
Response. Write ("Room Rate:" &Price& "<br>")
Response. Write ("Other Description:" &Demostra& "<br>")
Response. Write ("Contact:" &ContactMan& "<br>")
Response. Write ("Contact:" &Contact& "<br>")
' Title=removehtml (AA (i))
' Response. Write ("title:" &title)
For n=0 to application. Contents.count
if (application. Contents (n) =keyid) then
Ifexit=true
End If
Next
If not Ifexit then
Application (time&i) =keyid
' Add to Database
'====================================================
Set Rs=server. CreateObject ("Adodb.recordset")
Rs.Open "SELECT top 1 * FROM news ORDER BY id Desc", conn,3,3
Rs.addnew
RS ("Newsclass") =newsclass
RS ("city") =city
RS ("Position") =position
RS ("Housetype") =housetype
RS ("level") =level
RS ("area") =area
RS ("price") =price
RS ("Demostra") =demostra
RS ("Contactman") =contactman
RS ("contact") =contact
Rs.update
Rs.close
Set rs=nothing
End If
'==================================================

Next
function Removetag (body)
Set regEx = New RegExp
Regex.pattern = "<[a].*?<\/[a]>"
Regex.ignorecase = True
Regex.global = True
Set matches = Regex.execute (body)
Dim I,arr (In), Ifexit
I=0
J=0
For the Match in matches
TempStr = Match.value
Tempstr=replace (TempStr, "<td>", "")
Tempstr=replace (TempStr, "</td>", "")
Tempstr=replace (TempStr, "<tr>", "")
Tempstr=replace (TempStr, "</tr>", "")
Arr (i) =tempstr
I=i+1
if (i>=15) then
Exit For
End If
Next
Set regex=nothing
Set matches =nothing
Removetag=arr

End Function
function regexhtml (body)
Dim R_arr (In), r_temp
Set regEx2 = New RegExp
Regex2.pattern = "<a.*?<\/a>"
Regex2.ignorecase = True
Regex2.global = True
Set Matches2 = Regex2.execute (body)
Iii=0
For the Match in Matches2

R_arr (iii) =match.value

Iii=iii+1
Next
Regexhtml=r_arr
Set regex2=nothing
Set matches2=nothing
End Function
'======================================================
Conn.close
Set conn=nothing
%>
</body>

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.