The WebRequest class is used, in this case http://novel.hongxiu.com/a/1036665/10425842.html.
The code is as follows:
usingSystem;usingSystem.Collections.Generic;usingSystem.ComponentModel;usingSystem.Data;usingSystem.Drawing;usingSystem.IO;usingSystem.Net;usingSystem.Text;usingSystem.Text.RegularExpressions;usingSystem.Windows.Forms;namespaceWeb Crawl { Public Partial classForm1:form { PublicForm1 () {InitializeComponent (); } Public voidZhuaqu () {WebRequest request= WebRequest.Create (Label1. Text);//make a requestWebResponse response = Request. GetResponse ();//Responses to Internet requestsStreamReader sr =NewStreamReader (response. GetResponseStream (), Encoding.UTF8);//read the data stream returned by the Internet in encoded form stringHTML =Sr. ReadToEnd (); stringth = thtxt (HTML);//use regular expressions to replace labels in HTML source code with spacesSr. Close (); intsindex = th. IndexOf ("Red | sleeves | words | love | small |");//Search Index intlindex = th. IndexOf ("but what? "); stringsubtxt = th. Substring (sindex,lindex-sindex+6);//Capture the content you wantStreamWriter SW =NewStreamWriter ("E:\\x1.txt");//Write stream SaveSW. WriteLine (Subtxt); Sw. Close (); richTextBox1.Text=Subtxt; } Private voidButton1_Click (Objectsender, EventArgs e) {Zhuaqu (); } Private stringThtxt (stringHtml) {Regex reg=NewRegex ("< (. | \ n) +?>"); //regex r = new Regex (@ "\s+");//a regular expression that replaces a space stringth = Reg. Replace (Html,""); Th= th. Replace ("<","<"); Th= th. Replace (">",""); //th = r.replace (Th, ""); returnth; } }}
Run effect
Crawl Web page text content