Regex reg = new Regex (@ "(? is) <a[^>]*?href= (['" "]?) (? <url>[^ ' "" \s>]+) \1[^>]*> (?<text> (?:(?! </?a\b).) *) </a> ");
MatchCollection mc = Reg. Matches (YOURSTR);
foreach (Match m in MC)
{
Richtextbox2.text + = m.groups["url"]. Value + "\ n";//Get href value
Richtextbox2.text + = m.groups["Text"]. Value + "\ n";//Get <a><a/> intermediate content
}
Method 2:
<pre class= "BRUSH:C-SHARP;" >regex R;
Match m;
R = new Regex ("href\\s*=\\s* (?: \") (? <1>[^\ "]*) \" | (? <1>\\s+)) ",
regexoptions.ignorecase|regexoptions.compiled);
for (M = R.match (inputstring); m.success; m = M.nextmatch ())
{
Console.WriteLine ("Found href" + m.groups[1] + "at" + m.groups[1]. Index);
}
</PRE>
<pre class= "Brush:c-sharp from img src;" >regex reg = new Regex (@ "(? i) ]*?\ssrc\s*=\s* (['" "]?) (? <src>[^ ' "" \s>]+) \1[^>]*> ");
MatchCollection mc = Reg. Matches (YOURSTR);
foreach (Match m in MC)
{console.write (m.groups["src"]. Value + "\ n");
}
</PRE>
Method 4:
Extract img src
<pre class= "BRUSH:C-SHARP;" >
<summary>
Get the Path to img
</summary>
<param name= "HTMLText" >html string text </param>
<returns> return the picture path as an array </returns>
public static string[] Gethtmlimageurllist (string htmltext)
{
Regex regimg = new Regex (@ "]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[" "']?[ \s\t\r\n]* (? [^\s\t\r\n "" ' <>]* ") [^<>]*?/? [\s\t\r\n]*> ", regexoptions.ignorecase);
Create a new matches MatchCollection object to save the number of matching objects (img tag)
MatchCollection matches = regimg.matches (HTMLText);
int i = 0;
string[] surllist = new string[matches. Count];
Iterate through all IMG Tag objects
foreach (match match in matches)
{
Gets the path of all IMG SRC, and saves it to the array
surllist[i++] = match. groups["Imgurl"]. Value;
}
return surllist;
}</pre>
Centralized method for extracting hyperlink addresses using regular expressions in C # (GO)