Attached: Detailed page case of crawling information
<% @LANGUAGE = "VBSCRIPT"Codepage=" 936 "%>
<!--#include file= "conn.asp"-->
<!--#include file= "inc/function.asp"-->
<! DOCTYPE HTML PUBLIC "-//w3c//dtd HTML 4.01 transitional//en"
"HTTP://WWW.W3.ORG/TR/HTML4/LOOSE.DTD">
<title>untitled document</title>
<meta http-equiv= "Content-type" content= "text/html; charset=gb2312 ">
<meta http-equiv= "Refresh" content= "300; Url=steal_house.asp ">
<body>
<%
On Error Resume Next
'
Server.ScriptTimeout = 999999
'========================================================
' Character encoding function
'====================================================
Function Bytestobstr (Body,code)
Dim objstream
Set objstream = Server.CreateObject ("ADODB.stream")
Objstream. Type = 1
Objstream. Mode =3
Objstream. Open
Objstream. Write body
Objstream. Position = 0
Objstream. Type = 2
Objstream. Charset =code
Bytestobstr = objstream. ReadText
Objstream. Close
Set objstream = Nothing
End Function
The position in another string where the ' fetch ' string appears
Function newstring (WSTR,STRNG)
Newstring=instr (LCase (WSTR), LCase (STRNG))
If Newstring<=0 then Newstring=len (WSTR)
End Function
' Replace string function
function Replacestr (ORI,STR1,STR2)
Replacestr=replace (ORI,STR1,STR2)
End Function
'====================================================
function ReadXml (url,code,start,ends)
Set Osend=createobject ("Microsoft.XMLHTTP")
SourceCode = Osend.open ("Get", Url,false)
Osend.send ()
Readxml=bytestobstr (Osend.responsebody,code)
Start=instr (Readxml,start)
Readxml=mid (Readxml,start)
Ends=instr (Readxml,ends)
Readxml=left (readxml,ends-1)
End Function
function SubStr (body,start,ends)
Start=instr (Body,start)
Substr=mid (Body,start+len (start) +1)
Ends=instr (Substr,ends)
Substr=left (substr,ends-1)
End Function
Dim getcont,newscontent
Dim url,title
Url= "http://www.***.com "'News website
Getcont=readxml (URL, "gb2312", "<table class=k2 border=" "0" "", "</table>")
Getcont=regexhtml (Getcont)
Dim Keyid,newsclass,city,position,housetype,level,area,price,demostra
Dim contactman,contact
For i=2 to UBound (Getcont)
Response. Write (Getcont (i) & "__<br>")
Templink=mid (Getcont (i), InStr (Getcont (i), "href=" "") +6,instr (Getcont (i), "" "
OnClick ")-10)
Templink=replace (Templink, ". /","")
Response. Write (i& ":" &tempLink& "<br>")
Newscontent=readxml (Templink, "gb2312", "<td valign=" "Bottom" "
Width= "" > "," NoShade size= "1" "color=" "#808080" ">
")
Newscontent=removehtml (newscontent)
Newscontent=replace (Newscontent,vbcrlf, "")
Newscontent=replace (Newscontent,vbnewline, "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (newscontent, "\ n", "")
Newscontent=replace (NEWSCONTENT,CHR (10), "")
Newscontent=replace (NEWSCONTENT,CHR (13), "")
' ===============get content=======================
Response. Write (newscontent)
Keyid=substr (newscontent, "column number:", "Information Category:")
Newsclass=substr (Newscontent, "Category:", "City:")
City=substr (Newscontent, "City:", "House Specific location:")
Position=substr (Newscontent, "Location:", "House type:")
Housetype=substr (Newscontent, "type:", "Floor:")
Level=substr (Newscontent, "Floor:", "usable area:")
Area=substr (Newscontent, "area:", "Price:")
Price=substr (Newscontent, "Price:", "Other Description:")
Demostra=substr (newscontent, "description:", "Contact:")
Contactman=substr (Newscontent, "Contact:", "Contact:")
Contact=substr (Newscontent, "Contact:", "Source of information:")
Response. Write ("Total serial number:" &KeyId& "<br>")
Response. Write ("Information Category:" &NewsClass& "<br>")
Response. Write ("City:" &City& "<br>")
Response. Write ("Home Specific location:" &Position& "<br>")
Response. Write ("House type:" &HouseType& "<br>")
Response. Write ("Floor:" &Level& "<br>")
Response. Write ("Usable area:" &Area& "<br>")
Response. Write ("Room Rate:" &Price& "<br>")
Response. Write ("Other Description:" &Demostra& "<br>")
Response. Write ("Contact:" &ContactMan& "<br>")
Response. Write ("Contact:" &Contact& "<br>")
' Title=removehtml (AA (i))
' Response. Write ("title:" &title)
For n=0 to application. Contents.count
if (application. Contents (n) =keyid) then
Ifexit=true
End If
Next
If not Ifexit then
Application (time&i) =keyid
' Add to Database
'====================================================
Set Rs=server. CreateObject ("Adodb.recordset")
Rs.Open "SELECT top 1 * FROM news ORDER BY id Desc", conn,3,3
Rs.addnew
RS ("Newsclass") =newsclass
RS ("city") =city
RS ("Position") =position
RS ("Housetype") =housetype
RS ("level") =level
RS ("area") =area
RS ("price") =price
RS ("Demostra") =demostra
RS ("Contactman") =contactman
RS ("contact") =contact
Rs.update
Rs.close
Set rs=nothing
End If
'==================================================
Next
function Removetag (body)
Set regEx = New RegExp
Regex.pattern = "<[a].*?<\/[a]>"
Regex.ignorecase = True
Regex.global = True
Set matches = Regex.execute (body)
Dim I,arr (In), Ifexit
I=0
J=0
For the Match in matches
TempStr = Match.value
Tempstr=replace (TempStr, "<td>", "")
Tempstr=replace (TempStr, "</td>", "")
Tempstr=replace (TempStr, "<tr>", "")
Tempstr=replace (TempStr, "</tr>", "")
Arr (i) =tempstr
I=i+1
if (i>=15) then
Exit For
End If
Next
Set regex=nothing
Set matches =nothing
Removetag=arr
End Function
function regexhtml (body)
Dim R_arr (In), r_temp
Set regEx2 = New RegExp
Regex2.pattern = "<a.*?<\/a>"
Regex2.ignorecase = True
Regex2.global = True
Set Matches2 = Regex2.execute (body)
Iii=0
For the Match in Matches2
R_arr (iii) =match.value
Iii=iii+1
Next
Regexhtml=r_arr
Set regex2=nothing
Set matches2=nothing
End Function
'======================================================
Conn.close
Set conn=nothing
%>
</body>
Function.asp
<%
'**************************************************
' Function name: gottopic
' function: truncated string, Chinese character one count two characters, English count a character
' Argument: str----Original string
' strlen----intercept length
' Return value: After the intercepted string
'**************************************************
function Gottopic (Str,strlen)
If str= "" Then
Gottopic= ""
Exit function
End If
Dim l,t,c, I
Str=replace (replace (replace (str, "", ""), "", CHR), ">", ">"), "<", "<")
Str=replace (str, "?", "")
L=len (str)
T=0
For I=1 to L
C=abs (ASC (str,i,1))
If c>255 Then
T=t+2
Else
T=t+1
End If
If T>=strlen Then
Gottopic=left (str,i) & "..."
Exit For
Else
Gottopic=str
End If
Next
Gottopic=replace (replace (replace (Gottopic, "", ""), Chr (+), "" ")," > "," > ")," < "," < ")
End Function
'=========================================================
' Function: removehtml (strhtml)
' Features: Removing HTML tags
' Parameters: strHTML--A string to remove HTML tags
'=========================================================
Function removehtml (strhtml)
Dim objRegExp, Match, Matches
Set objRegExp = New Regexp
Objregexp.ignorecase = True
Objregexp.global = True
' Take the closed <>
Objregexp.pattern = "<.+?>"
' to match
Set matches = Objregexp.execute (strhtml)
' Iterate through the matching set and replace the matching item
For the Match in matches
Strhtml=replace (Strhtml,match.value, "")
Next
Removehtml=strhtml
Set objRegExp = Nothing
Set matches=nothing
End Function
%>
Conn.asp
<%
' On Error Resume Next
Set Conn=server. CreateObject ("Adodb.connection")
con= "Driver={microsoft Access driver (*.mdb)};d bq=" & Server.MapPath ("Stest.mdb")
Conn.Open Con
Sub Connclose
Conn.close
Set conn=nothing
End Sub
%>