I found a class to remove the HTML tag from the content on the Internet.
Package COM. foresee. JK; import Java. util. regEx. pattern; public class htmltext {public static string html2text (string inputstring) {string htmlstr = inputstring; string textstr = ""; Java. util. regEx. pattern p_script; Java. util. regEx. matcher m_script; Java. util. regEx. pattern p_style; Java. util. regEx. matcher m_style; Java. util. regEx. pattern p_html; Java. util. regEx. matcher m_html; Java. util. regEx. pattern p_html1; Java. util. RegEx. matcher m_html1; try {string regex_script = "<[\ s] *? Script [^>] *?> [\ S] *? <[\ S] *? \/[\ S] *? Script [\ s] *?> "; // Define the regular expression of the script {or <SCRIPT [^>] *?> [\ S] *? <\\/ SCRIPT> //} string regex_style = "<[\ s] *? Style [^>] *?> [\ S] *? <[\ S] *? \/[\ S] *? Style [\ s] *?> "; // Define the regular expression {or <style [^>] *?> [\ S] *? <\\/ Style> //} string regex_html = "<[^>] +> "; // define the regular expression string regex_html1 = "<[^>] +"; p_script = pattern. compile (regex_script, pattern. case_insensitive); m_script = p_script.matcher (htmlstr); htmlstr = m_script.replaceall (""); // filter the script tag p_style = pattern. compile (regex_style, pattern. case_insensitive); m_style = p_style.matcher (htmlstr); htmlstr = m_style.replaceall (""); // filter the STYLE tag p_html = pattern. compile (regex_html, pattern. case_insensitive); m_html = p_html.matcher (htmlstr); htmlstr = m_html.replaceall (""); // filter HTML tags p_html1 = pattern. compile (regex_html1, pattern. case_insensitive); m_html1 = p_html1.matcher (htmlstr); htmlstr = m_html1.replaceall (""); // filter HTML Tag textstr = htmlstr;} catch (exception e) {system. err. println ("html2text:" + E. getmessage ();} return textstr; // return text string} public static void main (string [] Arg) {string strtext = "<p> <tr> test </tr> <A> vvv </a> </P>"; system. out. println (html2text (strtext ));}}