Note: There is no implementation of CSS image collection, and the picture is still to be perfected.
Copy Code code as follows:
Using System;
Using System.Data;
Using System.Configuration;
Using System.Web;
Using System.Web.Security;
Using System.Web.UI;
Using System.Web.UI.WebControls;
Using System.Web.UI.WebControls.WebParts;
Using System.Web.UI.HtmlControls;
Introducing Space
Using System.Net;
Using System.IO;
Using System.Text;
Using System.Text.RegularExpressions;
Using System.Collections;
<summary>
Acquisition
</summary>
public class Caiji
{
Public Caiji ()
{
//
TODO: Add constructor logic here
//
}
<summary>
The connection address of the Web page to be collected
</summary>
<param name= "url" >url</param>
<returns></returns>
public static string Caijibyurl (String url,string chargest,string path)
{
String str = Getsourcetextbyurl (url,chargest);
ArrayList lib = new ArrayList ();
int i = 0;
Get the website domain name according to the URL
Uri uri = new uri (URL);
Scheme or agreement, generally for http,host for obtaining domain names
String baseurl = URI. Scheme + "://" + URI. Host + "/";
Extract the URL, including SRC and other information
\s matches any non-white-space character
Regex g = new Regex (@) (src= ("|\ ') \s+\." ( Gif|jpg|png|bmp) ("|\ ')", Regexoptions.multiline | Regexoptions.ignorecase);
MatchCollection m = g.matches (str);
foreach (Match math in M)
{
The path to the picture has been extracted, but you also need to divide the absolute path, relative path, and whether the suffix name is a picture, because it might be. Asp,.aspx these, such as code pictures
String imgurl = Math. Groups[0]. Value.tolower ()//to lowercase, = number may have indefinite space between
Remove src with single quotes, double quotes
Imgurl = Imgurl.replace ("src", "");
Imgurl = Imgurl.replace ("\", "");
Imgurl = Imgurl.replace ("'", "");
Imgurl = imgurl.replace ("=", "");
Imgurl = Imgurl.trim ();
Path handling
if (imgurl.substring (0, 4)!= "http")
{
Need to determine whether it is an absolute or a relative path
if (imgurl.substring (0, 1) = = "/")
{
Imgurl = BaseURL + Imgurl;
}
Else
{
Imgurl = URL. Substring (0,url. LastIndexOf ("/") + 1) + Imgurl;
}
}
To determine if an element already exists,-1 is not present
if (lib. IndexOf (imgurl) = =-1)
{
Lib. ADD (Imgurl);
}
}
String str_ = String. Empty;
WebClient client = new WebClient ();
for (int j = 0; J < Lib.) Count; J + +)
{
String Savepath = path + DateTime.Now.Month + DateTime.Now.Day + DateTime.Now.Minute + DateTime.Now.Second + j + lib[j]. ToString (). Substring ((lib[j). ToString (). Length) -4,4);
Try
{
Client. DownloadFile (New Uri (LIB[J). ToString ()), Savepath);
Str_ + = Lib[j]. ToString () + "<br/> Save path is:" + Savepath + "<br/><br/>";
}
catch (Exception e)
{
Str_ + = E.message;
}
}
return str_;
}
public static string Getsourcetextbyurl (String url,string chargest)
{
WebRequest request = webrequest.create (URL);
Request. Timeout = 20000;//20 seconds timeout
WebResponse response = Request. GetResponse ();
Stream Resstream = Response. GetResponseStream ();
StreamReader sr = new StreamReader (resstream,encoding.getencoding (chargest));
return Sr. ReadToEnd ();
}
}
Use: For example, I was saved to the upload folder:
Copy Code code as follows:
String path = Server.MapPath ("~/upload/");
Response.Write (Caiji.caijibyurl (http://www.jb51.net, "Utf-8", Path));