1 /**2 * Remove HTML tags from text3 *4 * @paraminputstring5 * @return6 */7 Public Staticstring Html2text (String inputstring) {8 if(Stringutils.isempty (inputstring)) {9 return NULL;Ten } OneString Htmlstr =inputstring; AString textstr = ""; - Java.util.regex.Pattern P_script; - Java.util.regex.Matcher M_script; the Java.util.regex.Pattern P_style; - Java.util.regex.Matcher M_style; - Java.util.regex.Pattern p_html; - Java.util.regex.Matcher m_html; + - Java.util.regex.Pattern p_html1; + Java.util.regex.Matcher m_html1; A at Try { -String regex_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";//define a regular expression for script {or <script[^>]*?>[\\s\\S]*?<\\/script> - // } -String Regex_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>";//a regular expression that defines a style {or <style[^>]*?>[\\s\\S]*?<\\/style> - // } -String regex_html = "<[^>]+>";//Regular expressions that define HTML tags inString REGEX_HTML1 = "<[^>]+"; -P_script =pattern.compile (Regex_script, to pattern.case_insensitive); +M_script =P_script.matcher (HTMLSTR); -Htmlstr = M_script.replaceall ("");//Filter Script Tags the *P_style =Pattern $ . Compile (Regex_style, pattern.case_insensitive);Panax NotoginsengM_style =P_style.matcher (HTMLSTR); -Htmlstr = M_style.replaceall ("");//Filter Style Labels the +p_html =pattern.compile (regex_html, pattern.case_insensitive); Am_html =P_html.matcher (HTMLSTR); theHtmlstr = M_html.replaceall ("");//Filter HTML Tags + -P_HTML1 =Pattern $ . Compile (REGEX_HTML1, pattern.case_insensitive); $M_HTML1 =P_html1.matcher (HTMLSTR); -Htmlstr = M_html1.replaceall ("");//Filter HTML Tags - theTextstr =Htmlstr; - Wuyi //Replacement &nbsp; theTextstr = Textstr.replaceall ("&", ""). ReplaceAll ("nbsp;", ""); - Wu}Catch(Exception e) { -System.err.println ("Html2text:" +e.getmessage ()); About } $ - returnTEXTSTR;//returns a text string -}
/** * Remove HTML tags in text * * @param inputstring * @return * */public static string Html2text (String InputS Tring) {if (Stringutils.isempty (inputstring)) {return null; } String htmlstr = inputstring; String textstr = ""; Java.util.regex.Pattern P_script; Java.util.regex.Matcher M_script; Java.util.regex.Pattern P_style; Java.util.regex.Matcher M_style; Java.util.regex.Pattern p_html; Java.util.regex.Matcher m_html;
Java.util.regex.Pattern P_HTML1; Java.util.regex.Matcher M_HTML1;
Try { String regex_script = "<[\\S]*?SCRIPT[^>] *?>[\\s\\s]*?<[\\s]*?\\/[\\s]*?script[\\s]*?> "; Define a regular expression for script {or <script[^>]*?>[\\s\\S]*?<\\/script> //} String Regex_style = "<[\\s]*?style[^>]*?>[\\s\\s]*?<[\\s]*?\\/[\ \s]*?style[\\s]*?> "; Regular expressions that define a style {or <style[^>]*?>[\\s\\S]*?<\\/style> //} String regex_html = "<[^>]+>"; Regular expressions for defining HTML tags String REGEX_HTML1 = "<[^>]+"; P_script = Pattern.compile (regex_script, &NBS P pattern.case_insensitive); M_script = P_script.matcher (htmlstr); & nbSp HTMLSTR = M_script.replaceall (""); Filter script Tags
P_style = Pattern. Compile (Regex_style, pattern.case_insensitive); M_style = P_style.matcher (HTMLSTR); Htmlstr = M_style.replaceall (""); Filter style Labels
p_html = Pattern.compile (regex_html, pattern.case_insensitive); m_html = P_html.matcher (HTMLSTR); Htmlstr = M_html.replaceall (""); Filter HTML Tags
P_HTML1 = Pattern. Compile (REGEX_HTML1, pattern.case_insensitive); M_HTML1 = P_html1.matcher (HTMLSTR); Htmlstr = M_html1.replaceall (""); Filter HTML Tags
Textstr = Htmlstr;
Replacement &nbsp; Textstr = Textstr.replaceall ("&", ""). ReplaceAll ("nbsp;", "");
} catch (Exception e) {System.err.println ("Html2text:" + e.getmessage ()); }
Return textstr;//returns a text string}
Java uses regular expressions to filter tags in html