Example to explain ASP crawl online real Estate information

Source: Internet
Author: User
Tags exit chr count html tags include strlen

Attached: Detailed page case of crawling information

<% @LANGUAGE = "VBSCRIPT"Codepage=" 936 "%>
<!--#include file= "conn.asp"-->
<!--#include file= "inc/function.asp"-->
<! DOCTYPE HTML PUBLIC "-//w3c//dtd HTML 4.01 transitional//en"
"HTTP://WWW.W3.ORG/TR/HTML4/LOOSE.DTD">
<title>untitled document</title>
<meta http-equiv= "Content-type" content= "text/html; charset=gb2312 ">
<meta http-equiv= "Refresh" content= "300; Url=steal_house.asp ">
<body>
<%
On Error Resume Next
'
Server.ScriptTimeout = 999999
'========================================================
' Character encoding function
'====================================================
Function Bytestobstr (Body,code)
Dim objstream
Set objstream = Server.CreateObject ("ADODB.stream")
Objstream. Type = 1
Objstream. Mode =3
Objstream. Open
Objstream. Write body
Objstream. Position = 0
Objstream. Type = 2
Objstream. Charset =code
Bytestobstr = objstream. ReadText
Objstream. Close
Set objstream = Nothing
End Function
The position in another string where the ' fetch ' string appears
Function newstring (WSTR,STRNG)
Newstring=instr (LCase (WSTR), LCase (STRNG))
If Newstring<=0 then Newstring=len (WSTR)
End Function
' Replace string function
function Replacestr (ORI,STR1,STR2)
Replacestr=replace (ORI,STR1,STR2)
End Function
'====================================================
function ReadXml (url,code,start,ends)
Set Osend=createobject ("Microsoft.XMLHTTP")
SourceCode = Osend.open ("Get", Url,false)
Osend.send ()
Readxml=bytestobstr (Osend.responsebody,code)
Start=instr (Readxml,start)
Readxml=mid (Readxml,start)
Ends=instr (Readxml,ends)
Readxml=left (readxml,ends-1)
End Function
function SubStr (body,start,ends)
Start=instr (Body,start)
Substr=mid (Body,start+len (start) +1)
Ends=instr (Substr,ends)
Substr=left (substr,ends-1)
End Function
Dim getcont,newscontent
Dim url,title
Url= "http://www.***.com "'News website
Getcont=readxml (URL, "gb2312", "<table class=k2 border=" "0" "", "</table>")
Getcont=regexhtml (Getcont)
Dim Keyid,newsclass,city,position,housetype,level,area,price,demostra
Dim contactman,contact
For i=2 to UBound (Getcont)
Response. Write (Getcont (i) & "__<br>")

Templink=mid (Getcont (i), InStr (Getcont (i), "href=" "") +6,instr (Getcont (i), "" "
OnClick ")-10)
Templink=replace (Templink, ". /","")

Response. Write (i& ":" &tempLink& "<br>")
Newscontent=readxml (Templink, "gb2312", "<td valign=" "Bottom" "
Width= "" > "," NoShade size= "1" "color=" "#808080" ">
")
Newscontent=removehtml (newscontent)
Newscontent=replace (Newscontent,vbcrlf, "")
Newscontent=replace (Newscontent,vbnewline, "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (newscontent, "\ n", "")
Newscontent=replace (NEWSCONTENT,CHR (10), "")
Newscontent=replace (NEWSCONTENT,CHR (13), "")
' ===============get content=======================
Response. Write (newscontent)
Keyid=substr (newscontent, "column number:", "Information Category:")
Newsclass=substr (Newscontent, "Category:", "City:")
City=substr (Newscontent, "City:", "House Specific location:")
Position=substr (Newscontent, "Location:", "House type:")
Housetype=substr (Newscontent, "type:", "Floor:")
Level=substr (Newscontent, "Floor:", "usable area:")
Area=substr (Newscontent, "area:", "Price:")
Price=substr (Newscontent, "Price:", "Other Description:")
Demostra=substr (newscontent, "description:", "Contact:")
Contactman=substr (Newscontent, "Contact:", "Contact:")
Contact=substr (Newscontent, "Contact:", "Source of information:")
Response. Write ("Total serial number:" &KeyId& "<br>")
Response. Write ("Information Category:" &NewsClass& "<br>")
Response. Write ("City:" &City& "<br>")
Response. Write ("Home Specific location:" &Position& "<br>")
Response. Write ("House type:" &HouseType& "<br>")
Response. Write ("Floor:" &Level& "<br>")
Response. Write ("Usable area:" &Area& "<br>")
Response. Write ("Room Rate:" &Price& "<br>")
Response. Write ("Other Description:" &Demostra& "<br>")
Response. Write ("Contact:" &ContactMan& "<br>")
Response. Write ("Contact:" &Contact& "<br>")
' Title=removehtml (AA (i))
' Response. Write ("title:" &title)
For n=0 to application. Contents.count
if (application. Contents (n) =keyid) then
Ifexit=true
End If
Next
If not Ifexit then
Application (time&i) =keyid
' Add to Database
'====================================================
Set Rs=server. CreateObject ("Adodb.recordset")
Rs.Open "SELECT top 1 * FROM news ORDER BY id Desc", conn,3,3
Rs.addnew
RS ("Newsclass") =newsclass
RS ("city") =city
RS ("Position") =position
RS ("Housetype") =housetype
RS ("level") =level
RS ("area") =area
RS ("price") =price
RS ("Demostra") =demostra
RS ("Contactman") =contactman
RS ("contact") =contact
Rs.update
Rs.close
Set rs=nothing
End If
'==================================================

Next
function Removetag (body)
Set regEx = New RegExp
Regex.pattern = "<[a].*?<\/[a]>"
Regex.ignorecase = True
Regex.global = True
Set matches = Regex.execute (body)
Dim I,arr (In), Ifexit
I=0
J=0
For the Match in matches
TempStr = Match.value
Tempstr=replace (TempStr, "<td>", "")
Tempstr=replace (TempStr, "</td>", "")
Tempstr=replace (TempStr, "<tr>", "")
Tempstr=replace (TempStr, "</tr>", "")
Arr (i) =tempstr
I=i+1
if (i>=15) then
Exit For
End If
Next
Set regex=nothing
Set matches =nothing
Removetag=arr

End Function
function regexhtml (body)
Dim R_arr (In), r_temp
Set regEx2 = New RegExp
Regex2.pattern = "<a.*?<\/a>"
Regex2.ignorecase = True
Regex2.global = True
Set Matches2 = Regex2.execute (body)
Iii=0
For the Match in Matches2

R_arr (iii) =match.value

Iii=iii+1
Next
Regexhtml=r_arr
Set regex2=nothing
Set matches2=nothing
End Function
'======================================================
Conn.close
Set conn=nothing
%>
</body>


Function.asp

<%
'**************************************************
' Function name: gottopic
' function: truncated string, Chinese character one count two characters, English count a character
' Argument: str----Original string
' strlen----intercept length
' Return value: After the intercepted string
'**************************************************
function Gottopic (Str,strlen)
If str= "" Then
Gottopic= ""
Exit function
End If
Dim l,t,c, I
Str=replace (replace (replace (str, "", ""), "", CHR), ">", ">"), "<", "<")
Str=replace (str, "?", "")
L=len (str)
T=0
For I=1 to L
C=abs (ASC (str,i,1))
If c>255 Then
T=t+2
Else
T=t+1
End If
If T>=strlen Then
Gottopic=left (str,i) & "..."
Exit For
Else
Gottopic=str
End If
Next
Gottopic=replace (replace (replace (Gottopic, "", ""), Chr (+), "" ")," > "," > ")," < "," < ")
End Function
'=========================================================
' Function: removehtml (strhtml)
' Features: Removing HTML tags
' Parameters: strHTML--A string to remove HTML tags
'=========================================================
Function removehtml (strhtml)
Dim objRegExp, Match, Matches
Set objRegExp = New Regexp
Objregexp.ignorecase = True
Objregexp.global = True
' Take the closed <>
Objregexp.pattern = "<.+?>"
' to match
Set matches = Objregexp.execute (strhtml)
' Iterate through the matching set and replace the matching item
For the Match in matches
Strhtml=replace (Strhtml,match.value, "")
Next
Removehtml=strhtml
Set objRegExp = Nothing
Set matches=nothing
End Function
%>


Conn.asp
<%
' On Error Resume Next
Set Conn=server. CreateObject ("Adodb.connection")
con= "Driver={microsoft Access driver (*.mdb)};d bq=" & Server.MapPath ("Stest.mdb")

Conn.Open Con
Sub Connclose
Conn.close
Set conn=nothing
End Sub
%>



Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.