Fetch page source code when click to read
String Wangzhi;
String resphtml;
private void Read Web page source code toolstripmenuitem_click (object sender, EventArgs e)
{
Textbox2.clear (); Empty before reading
Wangzhi = Combobox1.text;
HttpWebRequest rep = (HttpWebRequest) webrequest.create (Wangzhi); Locate the Web page in the rep by URL. Establish a connection
HttpWebResponse resp = (HttpWebResponse) Rep. GetResponse (); Reading Web pages
Encoding htmlencoding = Encoding.default; Determine the encoding format
StreamReader sr = new StreamReader (resp. GetResponseStream (), htmlencoding); Save Web page source code in stream
resphtml = Sr. ReadToEnd (); Read the stream from beginning to end, convert to string
TextBox2.Text = resphtml; Gets the page source code
COMBOBOX1.ITEMS.ADD (Combobox1.text);
}
//get images from Web source and download to E-drive Public intnum =0; Private voidRead the image from the source page Toolstripmenuitem_click (Objectsender, EventArgs e) {listView1.Columns.Clear (); ListView1.Items.Clear (); LISTVIEW1.COLUMNS.ADD ("link address and picture address", the); stringresult = resphtml;//Accept Web page source code with resultMatchCollection MC;//MC is a collection, you can put anything.//Regular expression get picture URLMC = regex.matches (Result,@"]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*["'"? [\s\t\r\n]* (? [^\s\t\r\n "" ' <>]* ") [^<>]*?/? [\s\t\r\n]*>", regexoptions.ignorecase); foreach(Match matchinchOck//iterate through the collection and put all the picture addresses in the ListViews{listView1.Items.Add (match. Value.tostring ()); } num= ListView1.Items.Count;//How many IMG addresses are logged stringIMGSRC =string. Empty;//definition//Cycle Download for(inti =0; i < num; i++) { stringImgurl = Listview1.items[i]. ToString ();//get picture URL addressRegex reg =NewRegex (@"[^ ""]*) "" [^>]*>", regexoptions.ignorecase);//A regular expression that represents an immutableMatchCollection MCL = Reg. Matches (Imgurl);//set the string to find foreach(Match mminchMCL) {Try{WebRequest req= WebRequest.Create (mm. groups["src"]. Value);//src content of the pictureWebResponse res =req. GetResponse (); //To read a picture with a file streamStream reader = Res. GetResponseStream ();//returning data streams from the Internet stringPath ="e://"+ i.tostring () +". jpg";//the picture path is named. Note the format, followed by the quotation mark in Chinese!!!!!!!!!! FileStream writer =NewFileStream (Path, FileMode.OpenOrCreate, FileAccess.Write); byte[] Buff =New byte[ +];//not very clear, ask the great God to explain intc =0; while(c = reader.) Read (Buff,0, Buff. Length)) >0) {writer. Write (Buff,0, c); } writer. Close (); Writer. Dispose (); Reader. Close (); Reader. Dispose (); Res. Close (); LISTVIEW1.ITEMS.ADD (Path+"Picture saved successfully! "); } Catch{MessageBox.Show ("the picture of this page has been read"); } } } } } }
The C # language implements a random crawl of a Web page image. Basic approach, but there is a small problem, please correct me.