<% @LANGUAGE = "VBSCRIPT" codepage= "936"%>
<!--#include file= "conn.asp"-->
<!--#include file= "inc/function.asp"-->
<! DOCTYPE HTML PUBLIC "-//w3c//dtd HTML 4.01 transitional//en" "HTTP://WWW.W3.ORG/TR/HTML4/LOOSE.DTD" >
<title>untitled document</title>
<meta http-equiv= "Content-type" content= "text/html; charset=gb2312 ">
<meta http-equiv= "Refresh" content= "300; Url=steal_house.asp ">
<body>
<%
On Error Resume Next
'
Server.ScriptTimeout = 999999
'========================================================
' Character encoding function
'====================================================
Function Bytestobstr (Body,code)
Dim objstream
Set objstream = Server.CreateObject ("ADODB.stream")
Objstream. Type = 1
Objstream. Mode =3
Objstream. Open
Objstream. Write body
Objstream. Position = 0
Objstream. Type = 2
Objstream. Charset =code
Bytestobstr = objstream. ReadText
Objstream. Close
Set objstream = Nothing
End Function
The position in another string where the ' fetch ' string appears
Function newstring (WSTR,STRNG)
Newstring=instr (LCase (WSTR), LCase (STRNG))
If Newstring<=0 then Newstring=len (WSTR)
End Function
' Replace string function
function Replacestr (ORI,STR1,STR2)
Replacestr=replace (ORI,STR1,STR2)
End Function
'====================================================
function ReadXml (url,code,start,ends)
Set Osend=createobject ("Microsoft.XMLHTTP")
SourceCode = Osend.open ("Get", Url,false)
Osend.send ()
Readxml=bytestobstr (Osend.responsebody,code)
Start=instr (Readxml,start)
Readxml=mid (Readxml,start)
Ends=instr (Readxml,ends)
Readxml=left (readxml,ends-1)
End Function
function SubStr (body,start,ends)
Start=instr (Body,start)
Substr=mid (Body,start+len (start) +1)
Ends=instr (Substr,ends)
Substr=left (substr,ends-1)
End Function
Dim getcont,newscontent
Dim url,title
Url= "http://www.***.com" ' News URL knowsky.com
Getcont=readxml (URL, "gb2312", "<table class=k2 border=" "0" "", "</table>")
Getcont=regexhtml (Getcont)
Dim Keyid,newsclass,city,position,housetype,level,area,price,demostra
Dim contactman,contact
For i=2 to UBound (Getcont)
Response. Write (Getcont (i) & "__<br>")
Templink=mid (Getcont (i), InStr (Getcont (i), "href=" "") +6,instr (Getcont (i), "" "" OnClick ")-10)
Templink=replace (Templink, ". /","")
Response. Write (i& ":" &tempLink& "<br>")
Newscontent=readxml (Templink, "gb2312", "<td valign=" "Bottom" "width=" "," ">", "Newscontent=removehtml (newscontent)
Newscontent=replace (Newscontent,vbcrlf, "")
Newscontent=replace (Newscontent,vbnewline, "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (Newscontent, "", "")
Newscontent=replace (newscontent, "\ n", "")
Newscontent=replace (NEWSCONTENT,CHR (10), "")
Newscontent=replace (NEWSCONTENT,CHR (13), "")
' ===============get content=======================
Response. Write (newscontent)
Keyid=substr (newscontent, "column number:", "Information Category:")
Newsclass=substr (Newscontent, "Category:", "City:")
City=substr (Newscontent, "City:", "House Specific location:")
Position=substr (Newscontent, "Location:", "House type:")
Housetype=substr (Newscontent, "type:", "Floor:")
Level=substr (Newscontent, "Floor:", "usable area:")
Area=substr (Newscontent, "area:", "Price:")
Price=substr (Newscontent, "Price:", "Other Description:")
Demostra=substr (newscontent, "description:", "Contact:")
Contactman=substr (Newscontent, "Contact:", "Contact:")
Contact=substr (Newscontent, "Contact:", "Source of information:")
Response. Write ("Total serial number:" &KeyId& "<br>")
Response. Write ("Information Category:" &NewsClass& "<br>")
Response. Write ("City:" &City& "<br>")
Response. Write ("Home Specific location:" &Position& "<br>")
Response. Write ("House type:" &HouseType& "<br>")
Response. Write ("Floor:" &Level& "<br>")
Response. Write ("Usable area:" &Area& "<br>")
Response. Write ("Room Rate:" &Price& "<br>")
Response. Write ("Other Description:" &Demostra& "<br>")
Response. Write ("Contact:" &ContactMan& "<br>")
Response. Write ("Contact:" &Contact& "<br>")
' Title=removehtml (AA (i))
' Response. Write ("title:" &title)
For n=0 to application. Contents.count
if (application. Contents (n) =keyid) then
Ifexit=true
End If
Next
If not Ifexit then
Application (time&i) =keyid
' Add to Database
'====================================================
Set Rs=server. CreateObject ("Adodb.recordset")
Rs.Open "SELECT top 1 * FROM news ORDER BY id Desc", conn,3,3
Rs.addnew
RS ("Newsclass") =newsclass
RS ("city") =city
RS ("Position") =position
RS ("Housetype") =housetype
RS ("level") =level
RS ("area") =area
RS ("price") =price
RS ("Demostra") =demostra
RS ("Contactman") =contactman
RS ("contact") =contact
Rs.update
Rs.close
Set rs=nothing
End If
'==================================================
Next
function Removetag (body)
Set regEx = New RegExp
Regex.pattern = "<[a].*?<\/[a]>"
Regex.ignorecase = True
Regex.global = True
Set matches = Regex.execute (body)
Dim I,arr (In), Ifexit
I=0
J=0
For the Match in matches
TempStr = Match.value
Tempstr=replace (TempStr, "<td>", "")
Tempstr=replace (TempStr, "</td>", "")
Tempstr=replace (TempStr, "<tr>", "")
Tempstr=replace (TempStr, "</tr>", "")
Arr (i) =tempstr
I=i+1
if (i>=15) then
Exit For
End If
Next
Set regex=nothing
Set matches =nothing
Removetag=arr
End Function
function regexhtml (body)
Dim R_arr (In), r_temp
Set regEx2 = New RegExp
Regex2.pattern = "<a.*?<\/a>"
Regex2.ignorecase = True
Regex2.global = True
Set Matches2 = Regex2.execute (body)
Iii=0
For the Match in Matches2
R_arr (iii) =match.value
Iii=iii+1
Next
Regexhtml=r_arr
Set regex2=nothing
Set matches2=nothing
End Function
'======================================================
Conn.close
Set conn=nothing
%>
</body>
Function.asp
<%
'**************************************************
' Function name: gottopic
' function: truncated string, Chinese character one count two characters, English count a character
' Argument: str----Original string
' strlen----intercept length
' Return value: After the intercepted string
'**************************************************
function Gottopic (Str,strlen)
If str= "" Then
Gottopic= ""
Exit function
End If
Dim l,t,c, I
Str=replace (replace (replace (str, "", ""), "", CHR), ">", ">"), "<", "<")
Str=replace (str, "?", "")
L=len (str)
T=0
For I=1 to L
C=abs (ASC (str,i,1))
If c>255 Then
T=t+2
Else
T=t+1
End If
If T>=strlen Then
Gottopic=left (str,i) & "..."
Exit For
Else
Gottopic=str
End If
Next
Gottopic=replace (replace (replace (Gottopic, "", ""), Chr (+), "" ")," > "," > ")," < "," < ")
End Function
'=========================================================
' Function: removehtml (strhtml)
' Features: Removing HTML tags
' Parameters: strHTML--A string to remove HTML tags
'=========================================================
Function removehtml (strhtml)
Dim objRegExp, Match, Matches
Set objRegExp = New Regexp
Objregexp.ignorecase = True
Objregexp.global = True
' Take the closed <>
Objregexp.pattern = "<.+?>"
' to match
Set matches = Objregexp.execute (strhtml)
' Iterate through the matching set and replace the matching item
For the Match in matches
Strhtml=replace (Strhtml,match.value, "")
Next
Removehtml=strhtml
Set objRegExp = Nothing
Set matches=nothing
End Function
%>
Conn.asp
<%
' On Error Resume Next
Set Conn=server. CreateObject ("Adodb.connection")
con= "Driver={microsoft Access driver (*.mdb)};d bq=" & Server.MapPath ("Stest.mdb")
Conn.Open Con
Sub Connclose
Conn.close
Set conn=nothing
End Sub
%>
Attached: Detailed page case of crawling information
Total Serial Number: |
479280 |
Information Category:
|
Rental |
City Location:
|
Jinan |
Specific location of the house:
|
Hua Long Lu Hua Xin Lu Junction Kou |
Type of House:
|
Other |
Floor:
|
Six floors |
Area
|
Between 24~240 square meters |
Prices:
|
0 [Leasing: Yuan/month, trading: million yuan/set] |
Other Notes:
|
Huaxin Business Building 3 to 6-storey small space for rental (0.5 yuan/flat), the building is a pure business office investment, can be used in office writing room, surrounding facilities, convenient transportation (37, 80, K95 in front of the building), all property rights, city certificate, building facilities including water, electricity, warm, elevator facilities, interested parties can be telecommunications! |
Contact:
|
Lu, Wang |
Contact Method:
|
88017966, 86812217 |
Source of information:
|
2005-8-4 8:28:55 from: 218.98.86.175 |
Number of clicks:
|
19 |