When I developed a small log program and saved the log file to HTML, operations on the file are inevitable. I wrote a file operation class HtmlOption for all HTML operations.
Using System;
Using System. Collections. Generic;
Using System. Text;
Using System. Text. RegularExpressions;
Using System. Net;
Namespace Common
{
Public class HtmlOption
{
Public HtmlOption ()
{
}
// This method extracts the image addresses in HTML. Multiple Image addresses are separated by commas. Why extract the image address from HTML? Because I want to save the webpage or part of content to a local place, the image in HTML is the address of the stored image, rather than the binary code of the image, so I have to save the file to the local, the address is required to save the image file to the local device, and then replace the image address in HTML with a local path to display the image locally without a network.
Public static string HtmImage (string htmlstring)
{
Regex regTag = new Regex (@ "<[^>] *> ");
Regex regImg = new Regex (@"(? I) ] *? Src = (['""]?) (? <Src> [^ '"\ s>] +) \ 1 [^>] *> ");
String result = regTag. replace (htmlstring, delegate (Match m) {if (m. value. toLower (). startsWith ("
Return result;
}
// This method is used to convert HTML into text strings, but the image code must be retained. The purpose of this operation is to filter out some special HTML code in HTML, but still display images normally. Filter out the benefits of HTML code and write your own CSS styles to beautify your log files. Not only is it beautiful, but sometimes it can also protect your eyes and increase your interest in reading.
Public static string HtmlToString (string Htmlstring, string websiteUrl, string PicPath)
{
Htmlstring = Htmlstring. Replace ("<br>", "\ r \ n ");
Htmlstring = Htmlstring. Replace ("<BR>", "\ r \ n ");
Htmlstring = Regex. replace (Htmlstring, @ "(<script) + [^ <>] *> [^ \ 0] * (<\/script>) +", "", RegexOptions. ignoreCase );
Htmlstring = Htmlstring. Replace ("《","\"");
Htmlstring = Htmlstring. Replace ("" "," \ "");
Htmlstring = HtmImage (Htmlstring );
Htmlstring = Regex. Replace (Htmlstring, @ "-->", "", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "<! --. * "," ", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (quot | #34);", "\" ", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (amp | #38);", "&", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (lt | #60);", "<", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (gt | #62);", ">", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (nbsp | #160);", "", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (iexcl | #161);", "\ xa1", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (cent | #162);", "\ xa2", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (pound | #163);", "\ xa3", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (copy | #169);", "\ xa9", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& # (\ d +);", "", RegexOptions. IgnoreCase );
Htmlstring = Htmlstring. Replace ("<","");
Htmlstring = Htmlstring. Replace ("> ","");
Htmlstring = Htmlstring. Replace ("\ r \ n", "<br> ");
Htmlstring = Htmlstring. Replace ("<script> </script> ","");
Htmlstring = Htmlstring. Replace ("," <");
Htmlstring = Htmlstring. Replace (","/> ");
Htmlstring = SavePic (Htmlstring, websiteUrl, PicPath );
Return Htmlstring;
}
// This method saves the remote image to the local device, saves the image after the specified time, and modifies the image path in Html to the local relative path to display the log Content locally.
/// <Summary>
/// Save the remote image to the local device
/// </Summary>
/// <Param name = "htmlstring"> HTML string to be modified </param>
/// <Param name = "websiteUrl"> main website domain name </param>
/// <Param name = "PicPath"> Save the image path </param>
/// <Returns> returns the processed HTML string </returns>
Public static string SavePic (string htmlstring, string websiteUrl, string PicPath)
{
System. Text. RegularExpressions. MatchCollection m;
// Extract the image of the string www.2cto.com
M = Regex. Matches (htmlstring, "( ");
For (int I = 0; I <m. Count; I ++)
{
String ostr = m [I]. ToString ();
// Extract the image address
System. Text. RegularExpressions. MatchCollection m2;
M2 = Regex. Matches (ostr, "(src =) ['| \"]. *? ['\ "]");
For (int j = 0; j <m2.Count; j ++)
{
String picurl = m2 [j]. ToString ();
Picurl = picurl. Replace ("src = ","");
Picurl = picurl. Replace ("\"","");
Picurl = picurl. Replace ("'","");
String picname = DateTime. Now. ToString ("yyyMMddHHmmss") + picurl. Substring (picurl. Length-4, 4 );
String newpic = "";
If (picurl. Substring (0, 4 )! = "Http ")
{
Newpic = websiteUrl + "/" + picurl;
}
Else
{
Newpic = picurl;
}
Try
{
WebClient wc = new WebClient ();
Wc. DownloadFile (newpic, PicPath + @ "\" + picname );
Htmlstring = htmlstring. Replace (newpic, @ "pic/" + picname );
}
Catch (Exception ex)
{
}
}
}
Return htmlstring;
}
}
}
This class is basically our HTML processing code, mainly used to filter HTML characters, save the image to a local device, and modify the image path and other functions.
Author zhaoyang