/// <summary> ///HTML conversion to plain text/// </summary> /// <param name= "source" ></param> /// <returns></returns> Private Static stringHtmltoplaintext (stringsource) { stringresult; //Remove Line Breaks,tabsresult = source. Replace ("\ r"," "); Result= result. Replace ("\ n"," "); Result= result. Replace ("\ t"," "); //Remove the headerresult = Regex.Replace (result,"(",string. Empty, regexoptions.ignorecase); Result= Regex.Replace (Result,@"< () *script ([^>]) *>","<script>", regexoptions.ignorecase); Result= Regex.Replace (Result,@"(<script>). * (</script>)",string. Empty, regexoptions.ignorecase); //Remove All Stylesresult = Regex.Replace (result,@"< () *style ([^>]) *>","<style>", regexoptions.ignorecase);//Clearing attributesresult = Regex.Replace (result,"(<style>). * (</style>)",string. Empty, regexoptions.ignorecase); //Insert tabs in spaces of <td> tagsresult = Regex.Replace (result,@"< () *td ([^>]) *>"," ", regexoptions.ignorecase); //Insert line breaks in places of <br> <li> tagsresult = Regex.Replace (result,@"< () *br () *>","\ r", regexoptions.ignorecase); Result= Regex.Replace (Result,@"< () *li () *>","\ r", regexoptions.ignorecase); //Insert line paragraphs in places of <tr> <p> tagsresult = Regex.Replace (result,@"< () *tr ([^>]) *>","\r\r", regexoptions.ignorecase); Result= Regex.Replace (Result,@"< () *p ([^>]) *>","\r\r", regexoptions.ignorecase); //remove anything thats enclosed inside < >result = Regex.Replace (result,@"<[^>]*>",string. Empty, regexoptions.ignorecase); //Replace special characters:result = Regex.Replace (result,@"&","&", regexoptions.ignorecase); Result= Regex.Replace (Result,@" "," ", regexoptions.ignorecase); Result= Regex.Replace (Result,@"<","<", regexoptions.ignorecase); Result= Regex.Replace (Result,@">",">", regexoptions.ignorecase); Result= Regex.Replace (Result,@"& #39;","'", regexoptions.ignorecase); Result= Regex.Replace (Result,@"& (. { 2,6});",string. Empty, regexoptions.ignorecase); //remove extra line breaks and tabsresult = Regex.Replace (result,@" ( )+"," "); Result= Regex.Replace (Result,"( \ R) () + (\ r)","\r\r"); Result= Regex.Replace (Result,@"(\r\r) +","\ r \ n"); returnresult; }
HTML converted to plain text, support apostrophe