Using system. Text. regularexpressions; // regular
string strHtml = " </ul> <div id=\"photo-tags\">
<ul id='tags'>
<li> <a href='/user/669345/tags/Belgium'>Belgium </a> </li>
<li> <a href='/user/669345/tags/Belgien'>Belgien </a> </li>
<li> <a href='/user/669345/tags/Urlaub'>Urlaub </a> </li>
<li> <a href='/user/669345/tags/Holidays'>Holidays </a> </li>
<li> <a href='/user/669345/tags/Vakanties'>Vakanties </a> </li>
<li> <a href='/user/669345/tags/CenterParcs'>CenterParcs </a> </li>
<li> <a href='/user/669345/tags/10 000 000'>10 000 000 </a> </li>
</ul> </div> <div id=\"photo-info\">";
TextBox1.Text = "" + strHtml + ""; Regex re = new Regex("(?<=<li> <a href='/user/[^>]*/tags/[^>]*>).*?(?= </a> </li>)"); if (re.IsMatch(strHtml)) { MatchCollection mc = re.Matches(strHtml); foreach (Match ma in mc) { for (int i = 0; i < ma.Groups.Count; i++) { TextBox2.Text += ma.Groups[i].Value + " "; } TextBox2.Text += "\n"; } } else { TextBox2.Text = "no"; }
Result:
Belgium
Belgien
Urlaub
Holidays
Vakanties
Centerparcs
10 000 000
/// <Summary>
/// Obtain the first image from the content of the article as a thumbnail of the article
/// </Summary>
/// <Param name = "articlecontent"> </param>
/// <Returns> </returns>
Public static string getimageurlfromarticle (string articlecontent)
{
RegEx r = new RegEx (@ "] + src = \ s *(? :'(? <SRC> [^ '] +)' | ""(? <SRC> [^ ""] +) "" | (? <SRC> [^> \ s] +) \ s * [^>] *> ", regexoptions. ignorecase );
Matchcollection MC = R. Matches (articlecontent );
If (MC. Count! = 0)
{
Return Mc [0]. Groups ["src"]. value. tolower ();
}
Else
{
Return "";
}
}