輸入一個地址,就可以把那個網頁中的連結提取出來,下面這段代碼可以輕鬆實現,主要的是用到了Regex。
GetUrl.aspx代碼如下:
<meta http-equiv="content-type" content="text/html; charset=gb2312">
後代碼GetUrl.aspx.vb如下:
Imports System.IO<br />Imports System.Net<br />Imports System.Text<br />Imports System.Text.RegularExpressions<br />Imports System<br />Public Class GetUrl<br />Inherits System.Web.UI.Page<br />Protected WithEvents Label1 As System.Web.UI.WebControls.Label<br />Protected WithEvents urlTextBox As System.Web.UI.WebControls.TextBox<br />Protected WithEvents scrapeButton As System.Web.UI.WebControls.Button<br />Protected WithEvents TipResult As System.Web.UI.WebControls.Label<br />Protected WithEvents resultLabel As System.Web.UI.WebControls.TextBox<br />#Region " Web Form設計器產生的程式碼 "<br />'該調用是 Web Form設計器所必需的。<br /><system.diagnostics.debuggerstepthrough> Private Sub InitializeComponent()<br />End Sub<br />Private Sub Page_Init(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Init<br />'CODEGEN: 此方法調用是 Web Form設計器所必需的<br />'不要使用代碼編輯器修改它。<br />InitializeComponent()<br />End Sub<br />#End Region<br />Private Sub Page_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load<br />'在此處放置初始化頁的使用者代碼<br />Label1.Text = "請輸入一個URL地址:"<br />scrapeButton.Text = "分離Href連結"<br />End Sub<br />Private report As New StringBuilder()<br />Private webPage As String<br />Private countOfMatches As Int32<br />Public Sub scrapeButton_Click(ByVal sender As System.Object, ByVal e As System.EventArgs)<br />webPage = GrabUrl()<br />Dim myDelegate As New MatchEvaluator(AddressOf MatchHandler)<br />Dim linksExpression As New Regex( _<br />"\foundAnchor>[^'"">]+?)[^>]*?\>", _<br />RegexOptions.Multiline Or RegexOptions.IgnoreCase Or RegexOptions.IgnorePatternWhitespace)<br />Dim newWebPage As String = linksExpression.Replace(webPage, myDelegate)<br />TipResult.Text = "</p>從 " & urlTextBox.Text & "分離出的Href連結<p>" & _<br />"<strong>找到並整理" & countOfMatches.ToString() & " 個連結</strong></p><p>" & _<br />report.ToString().Replace(Environment.NewLine, "<br />")<br />TipResult.Text &= "</p>整理過的頁面<p><br />"<br />resultLabel.Text = newWebPage<br />End Sub<br />Public Function MatchHandler(ByVal m As Match) As String<br />Dim link As String = m.Groups("foundAnchor").Value<br />Dim rToL As New Regex("^", RegexOptions.Multiline Or RegexOptions.RightToLeft)<br />Dim col, row As Int32<br />Dim lineBegin As Int32 = rToL.Match(webPage, m.Index).Index<br />row = rToL.Matches(webPage, m.Index).Count<br />col = m.Index - lineBegin<br />report.AppendFormat( _<br />"Link <strong>{0}</strong>, fixed at row: {1}, col: {2}{3}", _<br />Server.HtmlEncode(m.Groups(0).Value), _<br />row, _<br />col, _<br />Environment.NewLine _<br />)<br />Dim newLink As String<br />If link.StartsWith("/") Then<br />newLink = link.Substring(1)<br />Else<br />newLink = link<br />End If<br />countOfMatches += 1<br />Return m.Groups(0).Value.Replace(link, newLink)<br />End Function<br />Private Function GrabUrl() As String<br />Dim wc As New WebClient()<br />Dim s As Stream = wc.OpenRead(urlTextBox.Text)<br />Dim sr As StreamReader = New StreamReader(s, System.Text.Encoding.Default)<br />GrabUrl = sr.ReadToEnd<br />s.Close()<br />wc.Dispose()<br />End Function<br />End Class<br /></a.></system.diagnostics.debuggerstepthrough>