Most of the img tags found on the Internet are found through string operations. This operation is troublesome and the code looks tired. Here, I use WebBrowser to load a page, and then the HTMLDocument class saves the string operation steps, and directly calls GetElementsByTagName to return all image addresses to an HtmlElementCollection object.
The Code is as follows:
Copy codeThe Code is as follows:
Using System;
Using System. Collections. Generic;
Using System. Linq;
Using System. Text;
Using System. Text. RegularExpressions;
Using System. Net;
Using System. IO;
Using System. Windows. Forms;
Namespace WindowsFormsApplication1
{
Public class GatherPic
{
Private string savePath;
Private string getUrl;
Private WebBrowser wb;
Private int iImgCount;
// Initialization parameters
Public GatherPic (string sWebUrl, string sSavePath)
{
This. getUrl = sWebUrl;
This. savePath = sSavePath;
}
// Start collection
Public bool start ()
{
If (getUrl. Trim (). Equals (""))
{
MessageBox. Show ("I haven't even lost my website! ");
Return false;
}
This. wb = new WebBrowser ();
This. wb. Navigate (getUrl );
// Delegate event
This. wb. DocumentCompleted + = new System. Windows. Forms. WebBrowserDocumentCompletedEventHandler (DocumentCompleted );
Return true;
}
// WebBrowser. DocumentCompleted delegate event
Private void DocumentCompleted (object sender, WebBrowserDocumentCompletedEventArgs e)
{
// The framework iframe in the page cannot be loaded. Use SearchImgList ()
If (e. Url! = Wb. Document. Url) return;
SearchImgList ();
}
// Check all images and collect them locally
Public void SearchImgList ()
{
String sImgUrl;
// Obtain all image addresses
HtmlElementCollection elemColl = this. wb. Document. GetElementsByTagName ("img ");
This. iImgCount = elemColl. Count;
Foreach (HtmlElement elem in elemColl)
{
SImgUrl = elem. GetAttribute ("src ");
// Call the Save remote image Function
SaveImageFromWeb (sImgUrl, this. savePath );
}
}
// Save the remote image Function
Public int SaveImageFromWeb (string imgUrl, string path)
{
String imgName = imgUrl. ToString (). Substring (imgUrl. ToString (). LastIndexOf ("/") + 1 );
Path = path + "\" + imgName;
String defaultType = ". jpg ";
String [] imgTypes = new string [] {". jpg", ". jpeg", ". png", ". gif", ". bmp "};
String imgType = imgUrl. ToString (). Substring (imgUrl. ToString (). LastIndexOf ("."));
Foreach (string it in imgTypes)
{
If (imgType. ToLower (). Equals (it ))
Break;
If (it. Equals (". bmp "))
ImgType = defaultType;
}
Try
{
HttpWebRequest request = (HttpWebRequest) WebRequest. Create (imgUrl );
Request. UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas. Robot )";
Request. Timeout = 10000;
WebResponse response = request. GetResponse ();
Stream stream = response. GetResponseStream ();
If (response. ContentType. ToLower (). StartsWith ("image /"))
{
Byte [] arrayByte = new byte [1, 1024];
Int imgLong = (int) response. ContentLength;
Int l = 0;
// CreateDirectory (path );
FileStream fso = new FileStream (path, FileMode. Create );
While (l {
Int I = stream. Read (arraybytes, 0, 1024 );
Fso. Write (arrayByte, 0, I );
L + = I;
}
Fso. Close ();
Stream. Close ();
Response. Close ();
Return 1;
}
Else
{
Return 0;
}
}
Catch (WebException)
{
Return 0;
}
Catch (UriFormatException)
{
Return 0;
}
}
}
}
// ----------------- Call code --------------------
GatherPic gatherpic = new GatherPic ("http://www.baidu.com", "C: \ test ");
// Make sure the test path exists in c: \.
Gatherpic. start ()