usingSystem;usingSystem.Collections.Generic;usingSystem.ComponentModel;usingSystem.Data;usingSystem.Drawing;usingSystem.IO;usingSystem.Linq;usingSystem.Net;usingSystem.Text;usingSystem.Text.RegularExpressions;usingSystem.Threading;usingSystem.Windows.Forms;namespaceimagecollection{ Public Partial classForm1:form {Private Static stringPath = AppDomain.CurrentDomain.BaseDirectory +"img"; PublicForm1 () {InitializeComponent (); } Private voidBtnshuaqu_click (Objectsender, EventArgs e) { stringURL =Txturl. Text.trim (); if(string. IsNullOrEmpty (URL)) {MessageBox.Show ("Please enter a URL"); return; } txtimg. AppendText ("start the crawl: \ r \ n"); Thread th=NewThread (() = Shuaqu (URL)) {IsBackground =true }; Th. Start (); } Private voidShuaqu (stringURL) {DirectoryInfo di=NewDirectoryInfo (Path); if(System.IO.Directory.Exists (Path)) {di. Delete (true); } System.IO.Directory.CreateDirectory (Path); stringresult = Webhttp.httpget (URL,NULL,3); string[] str =gethtmlimageurllist (Result); Txtimg. Invoke (NewAction (() ={txtimg. AppendText ("data has been obtained!"+str. Count () +"\ r \ n"); })); //Create a regular expression to get the page titleString regex =@"<title>.+</title>"; //return page titleString title =Regex.match (result, Regex). ToString (); Txttitle. Invoke (NewAction (() ={txttitle. Text= Regex.Replace (title,@"[\""]+",""); })); foreach(stringSinchstr) {Uri U=NewUri (s); if(U.host = ="www.xxx.com") {Thread downimg=NewThread (() = Get_img (s)) {IsBackground =true }; Downimg. Start (); Txtimg. Invoke (NewAction (() ={txtimg. AppendText (S+"\ r \ n"); })); }} txtimg. Invoke (NewAction (() ={txtimg. AppendText ("Full Crawl complete!\r\n"); })); } Public voidGet_img (stringImgpath) { string[] File = Imgpath. Split ('?'); stringName = System.IO.Path.GetFileName (file[0]); WebClient mywebclient=NewWebClient (); Mywebclient. DownloadFile (Imgpath, Path+@"\"+name); //Bitmap img = null; //HttpWebRequest req; //HttpWebResponse res = null; //Try//{ //System.Uri httpurl = new System.Uri (Imgpath); //req = (HttpWebRequest) (WebRequest.Create (Httpurl)); //req. Timeout = 180000; //set timeout value 10 seconds//req. useragent = "XXXXX"; //req. Accept = "XXXXXX"; //req. Method = "GET"; //res = (HttpWebResponse) (req. GetResponse ()); //img = new Bitmap (res. GetResponseStream ());//Get Picture Stream//img. Save (Path + @ "\" +name);//Random Name//} //catch (Exception ex)//{ //string aa = ex. Message; //} //finally//{ //Res. Close (); //} } /// <summary> ///gets the URL of all the pictures in the HTML. /// </summary> /// <param name= "Shtmltext" >HTML code</param> /// <returns>List of URLs for pictures</returns> Private string[] Gethtmlimageurllist (stringShtmltext) { //define a regular expression to match an IMG tagRegex regimg =NewRegex (@"]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*["'"? [\s\t\r\n]* (? [^\s\t\r\n "" ' <>]* ") [^<>]*?/? [\s\t\r\n]*>", regexoptions.ignorecase); //search for matching stringsMatchCollection matches =regimg.matches (Shtmltext); inti =0; string[] Surllist =New string[matches. Count]; //get a list of matches foreach(Match matchinchmatches) Surllist[i+ +] = match. groups["Imgurl"]. Value; returnsurllist; } }}
#region Download image to imagepublicstatic image Urltoimage (string url) { New WebClient (); byte [] Bytes = mywebclient. Downloaddata (URL); using New MemoryStream (Bytes)) { = Image.fromstream (ms); return outputimg;} } #endregion
C # Crawl Web page of img src with parameter image link and download