Xml
On this basis can be combined with regular expressions to make better results, I hope you can share the XMLHTTP session sharing technology
AUTOGET
<%
'=================================================
' FileName:Getit.Asp
' Intro:auto get Data from Remote WebSite
' Author:babyt (Ron Artest)
' Url:http://blog.csdn.net/babyt
' Createat:2002-02 lastupdate:2004-09
' DB Table:data
' Table Field:
' UID-> Long-> Keep ID of the pages
' Ucontent-> Text-> Keep Content of the Pages (HTML)
'=================================================
server.scripttimeout=5000
' On Error Resume Next
Set conn = Server.CreateObject ("ADODB. Connection ")
Conn.Open "Provider=Microsoft.Jet.OLEDB.4.0;Data source=" & Server.MapPath ("Getit.mdb")
Set rs = Server.CreateObject ("ADODB.") Recordset ")
Sql= "SELECT * from Data"
Rs.Open sql,conn,1,3
Dim Comefrom,myerr,mycount
'========================================================
Comefrom= "http://www.xrss.cn/U.asp?ID="
Myerr1= "This information does not exist"
Myerr2= "This information is hidden"
'========================================================
'***************************************************************
' Just change the starting point intmin and end intmax of I here, set the step size Intstep
' Each interval is set to around 50,000. It's estimated to be two hours. No manual intervention is required during the period
'****************************************************************
Intmin=0
intmax=10000
' Set Step size
intstep=100
'==========================================================
' Do not change the following code
'==========================================================
Call Getpart (intmin)
Response.Write "has been converted to complete" & Intmin & "~ ~" & Intmax & "Data between"
Rs.close
Set rs=nothing
Conn. Close
Set conn=nothing
%>
<%
' Use XMLHTTP crawl address to handle content
Function GetBody (URL)
Dim Objxml
On Error Resume Next
Set Objxml = CreateObject ("Microsoft.XMLHTTP")
With Objxml
. Open "Get", Url, False, "", ""
. Send
GetBody =. Responsebody
End With
Getbody=bytestobstr (GetBody, "GB2312")
Set Objxml = Nothing
End Function
' Use ADODB.stream to process binary data
Function Bytestobstr (strbody,codebase)
Dim objstream
Set objstream = Server.CreateObject ("ADODB.stream")
objStream.Type = 1
Objstream.mode =3
objStream.Open
Objstream.write strbody
objstream.position = 0
objStream.Type = 2
Objstream.charset = CodeBase
Bytestobstr = Objstream.readtext
objStream.Close
Set objstream = Nothing
End Function
' Main function
Function Getpart (IStart)
Dim IGo
Time1=timer ()
Mycount=0
For Igo=istart to Istart+intstep
If Igo<=intmax Then
Response.execute Comefrom & IGo
' for simple data processing.
Content = GetBody (Comefrom & IGo)
Content = Replace (CONTENT,CHR (34), "")
If InStr (CONTENT,MYERR1) OR InStr (CONTENT,MYERR2) Then
' Skip error message
Else
' Write to the database
Rs. AddNew
RS ("UID") =igo
'********************************
RS ("Ucontent") =replace (Content, "" ", Chr (34))
'*********************************
Rs.update
Mycount=mycount+1
Response.Write IGo & "
"
Response.Flush
End If
Else
Response.Write " successfully crawled" &myCount& "record,"
Time2=timer ()
Response.Write "Time Consuming:" & Int (FormatNumber (time2-time1) *1000000,3) & "SEC
"
Response.Flush
Exit Function
End If
Next
Response.Write " successfully crawled" &myCount& "record,"
Time2=timer ()
Response.Write "Time Consuming:" & CInt (FormatNumber ((time2-time1), 3)) & "SEC
"
Response.Flush
' Recursive
Getpart (igo+1)
End Function%>