Sometimes we need to collect some information to their own database, local disk, we often use is webclient,webrequest, etc., today, mainly say, for a URI address, collect all the image resources on this page, the following is the source code, for everyone to learn.
<summary>///Download All pictures under the specified URL///</summary> public class Webpageimage {///<summary> Get all the pictures in the Web page///</summary>///<param name= "url" > Web address </param>///<param name= "Charse T "> Web Code, automatic judgment for null </param>///<returns> All pictures Display code </returns> public string getimages (string url, st
Ring CharSet) {string s = gethtml (URL, charSet);
Return getpictures (S, url);
///<summary>///Get all the pictures in the Web page///</summary>///<param name= "url" > URL </param>
<returns> all picture Code </returns> public string getimages (string url) {return getimages (URL, "");
String Doman (string url) {uri u = new Uri (URL);
return u.host;
///<summary>///Get page content///</summary>///<param name= "url" > website address </param> <param name= "CharSet" > Target page encoding, if the incoming is null or "", then automatically parse the page's encoding </param>///<returns></returns> string gethtml (string url, string charSet) {WebClient MYWEBC
lient = new WebClient ();
Create WebClient instance mywebclient//need to NOTE://Some Web pages may not be down, for a variety of reasons such as the need for cookies, coding problems and so on//it is necessary to specific problems specific analysis such as in the head to add cookies WebClient.
Headers.add ("Cookie", cookie); This may require some overloaded methods.
Write as you need to. Gets or sets the network credentials used to authenticate requests to Internet resources.
Mywebclient.credentials = CredentialCache.DefaultCredentials;
If the server is to authenticate the username, the password//networkcredential mycred = new NetworkCredential (struser, strpassword);
Mywebclient.credentials = mycred; Downloads data from a resource and returns a byte array.
(add @ Because there is a "/" symbol in the middle of the URL) byte[] mydatabuffer = mywebclient.downloaddata (URL);
String strwebdata = Encoding.Default.GetString (Mydatabuffer); Get a Web page character encoding description information Match Charsetmatch = Regex.match (Strwebdata, "<meta" ([^<]*) charset= ([^<]*) \ "", RegexOptions. IgnoreCase |
Regexoptions.multiline); String webcharset = Charsetmatch.groUPS[2].
Value.replace ("\" "," ");
if (CharSet = null | | charSet = = "") CharSet = Webcharset; if (CharSet!= null && charSet!= "" && encoding.getencoding (charSet)!= Encoding.default) strwebd ATA = encoding.getencoding (charSet).
GetString (Mydatabuffer);
return strwebdata; String Getpictures (string data, string URL) {matchcollection PS = regex.matches (data, @ "[^\s\t\r\n "" ' <>]*) [^<>]*?/?]
[\s\t\r\n]*> "); string s = String.
Empty; for (int i = 0; i < PS. Count; i++) {Pictures p = new pictures (Ps[i].
Value, URL);
S + + p.gethtml + "<br/>" + Environment.NewLine;
return s; ///<summary>///picture entity///Picture file properties Processing Class///</summary> public class Pictures {p Ublic pictures (String strhtml, String baseurl) {_html = strhtml;
Uri u1 = new Uri (BaseURL); _doman = U1.
Host; _baseurl = U1.
Scheme + "://" + _doman;
SETSRC (); private string _html = String.
Empty; private string _baseurl = String.
Empty; private string _doman = String.
Empty;
public string Gethtml {get {return _html;}
public string Alt {get {getattribute ("alt") [0];
public string Src {get {string s = getattribute ("SRC") [0];
return s; }///<summary>///converts relative paths to absolute diameters based on base paths///</summary>///<param name= "Baseu RL "> Base path </param>///<param name=" U "> Pending conversion relative path </param>///<returns> Absolute Path </returns
> public string Absurl (string BaseURL, String u) {URI ub = new Uri (BaseURL);
Uri ua = new Uri (UB, u); Return UA.
Absoluteuri; } private void Setsrc () {String strpattern = @ "src[\s\t\r\n]*=[\s\t\r\n]*[" "']?\s+[" "']?" string src = getattribute ("src") [0].
ToLower (); if (!) ( Src. IndexOf ("http://") = = 0 | | Src.
IndexOf ("https://") = = 0) && _baseurl.length > Ten {src = absurl (_baseurl, SRC);
string s = "src=\" "+ src +" "";
_html = Regex.Replace (_html, Strpattern, s); }///<summary>///get tag properties in HTML code///</summary>///<param name= "strHTML" >html code </param>///<param name= "strAttributeName" > property name </param>///<returns> attribute values collection & Lt;/returns> Private string[] getattribute (string strattributename) {list<string> Lstattribu
Te = new list<string> (); String Strpattern = String.
Format (@ "{0}[\s\t\r\n]*=[\s\t\r\n]*[" "']?\s+[" "]?", strAttributeName); MatchCollection MATCHS = Regex.Matches (_html, Strpattern, regexoptions.ignorecase); foreach (Match m in matchs) {Lstattribute.add (m.value.split (' = ') [1]. Replace ("\", "").
Replace ("'", ""));
} if (Lstattribute.count = 0) Lstattribute.add ("");
return Lstattribute.toarray ();
}
}
}
Call:
New Webpageimage (). GetImages (http://www.sina.com)
Results:
The above is the entire content of this article, I hope to help you learn.