ASP uses Microsoft.XMLHTTP to crawl Web content (no garbled) and filter what is needed
Sample source code:
Copy Code code as follows:
<%
Dim Xmlurl,http,strhtml,strbody
XMLURL = Request.QueryString ("U")
REM asynchronously reads an XML source
Set http = server. CreateObject ("Microsoft.XMLHTTP")
http. Open "POST", Xmlurl,false
Http.setrequestheader "User-agent", "mozilla/4.0"
Http.setrequestheader "Connection", "keep-alive"
Http.setrequestheader "Content-type", "application/x-www-form-urlencoded"
http. Send ()
strhtml = Bytestobstr (http. Responsebody)
Set http = Nothing
REM Crawl Main content
Strbody = GetBody (strhtml, "<div id=" "Div_newscontentc" "class=" "cnt" ">", "</div>", 0,0)
Strbody =replace (Strbody, "(This article starts with", "")
Strbody =replace (Strbody, "wealth Power network </a> reprint, please specify the source." )","")
Strbody =replace (Strbody, "This article starts in, reprint please indicate the source." )","")
Strbody =replace (Strbody, "Wealth Power network </a>:http://www.927953.com", "")
Strbody =replace (Strbody, "This article starts with", "")
Response.Write Regremovehref (Strbody)
REM gets the HTML corresponding to the URL response
Function Bytestobstr (body)
Dim objstream
Set objstream = Server.CreateObject ("ADODB.stream")
Objstream. Type = 1
Objstream. Mode =3
Objstream. Open
Objstream. Write body
Objstream. Position = 0
Objstream. Type = 2
Objstream. Charset = "UTF-8"
' Converts the original default UTF-8 encoding into GB2312 encoding, otherwise directly
' XMLHTTP call with Chinese characters in the Web page will be garbled
Bytestobstr = objstream. ReadText
Objstream. Close
Set objstream = Nothing
End Function
REM uses regular expressions to crawl the contents of tags within
Function GetBody (Constr,startstr,overstr,inclul,inclur)
If constr= "$False $" or constr= "" or IsNull (constr) =true or startstr= "" or IsNull (STARTSTR) =true or overstr= "" or IsNull (Ov ERSTR) =true Then
Getbody= "$False $"
Exit Function
End If
Dim constrtemp
Dim Start,over
Constrtemp=lcase (CONSTR)
Startstr=lcase (STARTSTR)
Overstr=lcase (OVERSTR)
Start = InStrB (1, Constrtemp, Startstr, Vbbinarycompare)
If Start<=0 Then
Getbody= "$False $"
Exit Function
Else
If Inclul=false Then
Start=start+lenb (STARTSTR)
End If
End If
OVER=INSTRB (Start,constrtemp,overstr,vbbinarycompare)
If over<=0 Or Over<=start Then
Getbody= "$False $"
Exit Function
Else
If inclur=true Then
Over=over+lenb (OVERSTR)
End If
End If
GETBODY=MIDB (Constr,start,over-start)
End Function
REM Filter a hyperlink
Function Regremovehref (HTMLSTR)
Set RA = New RegExp
Ra. IgnoreCase = True
Ra. Global = True
Ra. Pattern = "<a[^>]+> (. +?) <\/a> "
Regremovehref = Replace (Ra.replace (HTMLSTR, "$"), "href=" "http://www.927953.com" "", "" "
End Function
%>
The effect chart is as follows: