C # Development notes of small software (SaveClassic) -- (3) Basic Class (HTML operation class HtmlOption)

Source: Internet
Author: User

 

When I developed a small log program and saved the log file to HTML, operations on the file are inevitable. I wrote a file operation class HtmlOption for all HTML operations.

 

Using System;

 

Using System. Collections. Generic;

 

Using System. Text;

 

Using System. Text. RegularExpressions;

 

Using System. Net;

 

Namespace Common

 

{

 

Public class HtmlOption

 

{

 

Public HtmlOption ()

 

{

 

}

 

// This method extracts the image addresses in HTML. Multiple Image addresses are separated by commas. Why extract the image address from HTML? Because I want to save the webpage or part of content to a local place, the image in HTML is the address of the stored image, rather than the binary code of the image, so I have to save the file to the local, the address is required to save the image file to the local device, and then replace the image address in HTML with a local path to display the image locally without a network.

 

Public static string HtmImage (string htmlstring)

 

{

 

Regex regTag = new Regex (@ "<[^>] *> ");

 

Regex regImg = new Regex (@"(? I) ] *? Src = (['""]?) (? <Src> [^ '"\ s>] +) \ 1 [^>] *> ");

 

String result = regTag. replace (htmlstring, delegate (Match m) {if (m. value. toLower (). startsWith ("

 

Return result;

 

}

 

 

 

// This method is used to convert HTML into text strings, but the image code must be retained. The purpose of this operation is to filter out some special HTML code in HTML, but still display images normally. Filter out the benefits of HTML code and write your own CSS styles to beautify your log files. Not only is it beautiful, but sometimes it can also protect your eyes and increase your interest in reading.

 

Public static string HtmlToString (string Htmlstring, string websiteUrl, string PicPath)

 

{

 

Htmlstring = Htmlstring. Replace ("<br>", "\ r \ n ");

 

Htmlstring = Htmlstring. Replace ("<BR>", "\ r \ n ");

 

Htmlstring = Regex. replace (Htmlstring, @ "(<script) + [^ <>] *> [^ \ 0] * (<\/script>) +", "", RegexOptions. ignoreCase );

 

Htmlstring = Htmlstring. Replace ("《","\"");

 

Htmlstring = Htmlstring. Replace ("" "," \ "");

 

Htmlstring = HtmImage (Htmlstring );

 

Htmlstring = Regex. Replace (Htmlstring, @ "-->", "", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "<! --. * "," ", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (quot | #34);", "\" ", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (amp | #38);", "&", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (lt | #60);", "<", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (gt | #62);", ">", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (nbsp | #160);", "", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (iexcl | #161);", "\ xa1", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (cent | #162);", "\ xa2", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (pound | #163);", "\ xa3", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& (copy | #169);", "\ xa9", RegexOptions. IgnoreCase );

 

Htmlstring = Regex. Replace (Htmlstring, @ "& # (\ d +);", "", RegexOptions. IgnoreCase );

 

Htmlstring = Htmlstring. Replace ("<","");

 

Htmlstring = Htmlstring. Replace ("> ","");

 

Htmlstring = Htmlstring. Replace ("\ r \ n", "<br> ");

 

Htmlstring = Htmlstring. Replace ("<script> </script> ","");

 

Htmlstring = Htmlstring. Replace ("," <");

 

Htmlstring = Htmlstring. Replace (","/> ");

 

Htmlstring = SavePic (Htmlstring, websiteUrl, PicPath );

 

Return Htmlstring;

 

}

 

// This method saves the remote image to the local device, saves the image after the specified time, and modifies the image path in Html to the local relative path to display the log Content locally.

 

/// <Summary>

 

/// Save the remote image to the local device

 

/// </Summary>

 

/// <Param name = "htmlstring"> HTML string to be modified </param>

 

/// <Param name = "websiteUrl"> main website domain name </param>

 

/// <Param name = "PicPath"> Save the image path </param>

 

/// <Returns> returns the processed HTML string </returns>

 

Public static string SavePic (string htmlstring, string websiteUrl, string PicPath)

 

{

 

System. Text. RegularExpressions. MatchCollection m;

 

// Extract the image of the string www.2cto.com

 

M = Regex. Matches (htmlstring, "( ");

 

For (int I = 0; I <m. Count; I ++)

 

{

 

String ostr = m [I]. ToString ();

 

// Extract the image address

 

System. Text. RegularExpressions. MatchCollection m2;

 

M2 = Regex. Matches (ostr, "(src =) ['| \"]. *? ['\ "]");

 

For (int j = 0; j <m2.Count; j ++)

 

{

 

String picurl = m2 [j]. ToString ();

 

Picurl = picurl. Replace ("src = ","");

 

Picurl = picurl. Replace ("\"","");

 

Picurl = picurl. Replace ("'","");

 

String picname = DateTime. Now. ToString ("yyyMMddHHmmss") + picurl. Substring (picurl. Length-4, 4 );

 

String newpic = "";

 

If (picurl. Substring (0, 4 )! = "Http ")

 

{

 

Newpic = websiteUrl + "/" + picurl;

 

}

 

Else

 

{

 

Newpic = picurl;

 

}

 

Try

 

{

 

WebClient wc = new WebClient ();

 

Wc. DownloadFile (newpic, PicPath + @ "\" + picname );

 

Htmlstring = htmlstring. Replace (newpic, @ "pic/" + picname );

 

}

 

Catch (Exception ex)

 

{

 

}

 

 

}

 

}

 

Return htmlstring;

 

}

 

}

 

}

 

This class is basically our HTML processing code, mainly used to filter HTML characters, save the image to a local device, and modify the image path and other functions.

Author zhaoyang

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.