The original Published time: 2009-11-21--from my Baidu article [imported by moving tools]
Please study first: http://hi.baidu.com/handboy/blog/item/bfef61000a67ea16738b6565.html
string x = "Live for Nothing,die for something";
Regex r = new Regex (@ "^live for no (? <g1>[a-z]{5}), die for some\1$");
if (R.ismatch (x))
{
Console.WriteLine ("Group1 value:" + r.match (x). groups["G1"]. Value);//output: Thing
}
can be indexed according to the group name. Use the following format to identify the name of a group (? <groupname> ...).
string x = "Live for Nothing";
Regex r = new Regex (@ "([a-z]+) \1");
if (R.ismatch (x))
{
x = R.replace (x, "$");
Console.WriteLine ("var x:" + x);//output: Live for nothing
}
Delete the duplicate "Nothing" in the original string. Outside of an expression, use "$" to refer to the first group, and below
Group name to refer to:
string x = "Live for Nothing";
Regex r = new Regex (@ "(? <g1>[a-z]+) \1");
if (R.ismatch (x))
{
x = R.replace (x, "${g1}");
Console.WriteLine ("var x:" + x);//output: Live for nothing
}
string x = "Live for Nothing";
Regex r = new Regex (@ "^live for No (?: [A-z]{5}) $");
if (R.ismatch (x))
{
Console.WriteLine ("Group1 value:" + r.match (x). GROUPS[1]. Value);//output: (empty)
}
Adding "?:" to the group indicates that this is a "non-capturing group", that is, the engine will not save the contents of the group.
========
Recently idle to do nothing, relive the regular expression, and then do this image grabber.
The principle is based on the analysis of the common characteristics of Sina Blog, the image crawl to the local, automatically downloaded down. This principle is to use regular expression to match, if one Day Sina blog page format changes, perhaps this will not be used, but can be modified to meet. This is just an example, O (∩_∩) o ha!
WinForm Download preview: http://www.xmaspx.com/Services/FileAttachment.ashx?AttachmentID=51
First:
Under the root directory, build a folder named Downloadimages
Front Desk:
<%@ page language= "C #" autoeventwireup= "true" codefile= "SinaImage.aspx.cs" inherits= "Sinaimage"%>
<! DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 transitional//en" "Http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd ">
<title> Untitled Page </title>
<body>
<form id= "Form1" runat= "Server" >
<div>
<asp:textbox id= "TextBox1" runat= "Server" width= "495px" >http://blog.sina.com.cn/s/articlelist_1270540911_0_ 1.html</asp:textbox>
<asp:button id= "Button1" runat= "Server" onclick= "Button1_Click" text= "button" onclientclick= "Javascript:alert (' Start the download, it may take a few minutes, do not close ') '/><br/>
<asp:textbox id= "TextBox2" runat= "Server" height= "296px" textmode= "MultiLine" width= "498px" ></asp:textbox ></div>
</form>
</body>
Backstage:
Using System;
Using System.Web;
Using System.Web.UI.WebControls;
Using System.Net;
Using System.IO;
Using System.Text;
Using System.Collections;
Using System.Text.RegularExpressions;
public partial class SinaImage:System.Web.UI.Page
{
protected void Page_Load (object sender, EventArgs e)
{
}
protected void Button1_Click (object sender, EventArgs e)
{
int num = 0;
TextBox2.Text = "";
String p = @ "Http://blog.sina.com.cn/s/blog_ ([\w]) *.html";
string p2 = @ "http://([\w-]+\.) +[\w-]+ (/[\w-./?%&=]*)? ";
ArrayList Arrurl = GetUrl (this. TextBox1.Text, p);
for (int i = 0; i < Arrurl.count; i++)
{
String imgpage = Arrurl[i]. ToString ();
ArrayList Arrimgurl = GetUrl (Imgpage, p2);
for (int j = 0; J < Arrimgurl.count; J + +)
{
String imgurl = Arrimgurl[j]. ToString ();
if (!imgurl.contains ("simg") &&!imgurl.contains ("sinaimg") &&!imgurl.contains (". js"))
{
if (Imgurl.contains ("photo") | | | imgurl.contains ("image") | | imgurl.contains ("IMG"))
{
TextBox2.Text + = Imgurl + "\ n";
Try
{
Downloadimage (Imgurl, j.tostring ());
num++;
}
Catch
{
}
}
}
}
}
Clientscript.registerstartupscript (this. GetType (), "Alert", "alert (' downloaded" + num.) ToString () + "Zhang, please open folder Downloadimages, filter in thumbnail format ')", true);
}
protected void Downloadimage (String fromurl, String fileName)
{
String savepath = Server.MapPath ("downloadimages/") + DateTime.Now.ToString ("Yyyymmddhhmmss") + FileName + ". jpg";
WebClient mywebclient = new WebClient ();
Mywebclient.downloadfile (Fromurl, Savepath);
}
Protected ArrayList GetUrl (String Web_url, String p)
{
String All_Code = String. Empty;
ArrayList Arrurl = new ArrayList ();
HttpWebRequest all_coderequest = (HttpWebRequest) webrequest.create (Web_url);
WebResponse all_coderesponse = All_coderequest.getresponse ();
StreamReader The_reader = new StreamReader (All_coderesponse.getresponsestream (), encoding.getencoding ("GB2312"));
All_Code = The_reader.readtoend ();
The_reader.close ();
ArrayList my_list = new ArrayList ();
Regex re = new regex (p, regexoptions.ignorecase);
MatchCollection mc = Re. Matches (All_Code);
for (int i = 0; I <= MC. Count-1; i++)
{
BOOL _foo = false;
String name = Mc[i]. ToString ();
foreach (String list in my_list)
{
if (name = = list)
{
_foo = true;
Break
}
}//Filtration
if (!_foo)
{
Arrurl.add (name);
}
}
return arrurl;
}
}
Image Grabber Web + WinForm