Package cn.anzhuoyue.jfinalBlog.util; Import Org.jsoup.jsoup;import Org.jsoup.nodes.document;import Org.jsoup.nodes.element;import Org.jsoup.safety.whitelist;import Com.jfinal.kit.StringKit; public class Htmlutil {//only plain text can be getText by public static string (string html) {if (html = = null) return null; return Jsoup.clean (HTML, Whitelist.none ()); }//The following tags can be passed through//b, EM, I, strong, U. Plain text public static string getsimplehtml (string html) {if (HTML = = NULL) return null; return Jsoup.clean (HTML, Whitelist.simpletext ()); }//The following tags can be obtained via//a, B, blockquote, BR, cite, code, DD, DL, DT, EM, I, Li, Ol, p, pre, q, small, strike, strong, Sub, SUP, u, ul public static string getbasichtml (string html) {if (html = = null) return null; return Jsoup.clean (HTML, Whitelist.basic ()); }//Add a picture on basic base by public static string Getbasichtmlandimage (string html) {if (HTML = = NULL) return null; return Jsoup.clean (HTML, whitelist.basicwithimages ()); }//The following tags can be obtained via//a, B, Blockquote, BR, caption, cite, code, col, Colgroup, DD, DL, DT, EM, H1, H2, H3, H4, H5, H6, I, IMG, Li, OL, p, pre, q, small, strike, strong, sub, sup, table, TBODY, TD, TFOOT, TH, THEAD, tr, u, ul public static S Tring getfullhtml (String html) {if (html = = null) return null; return Jsoup.clean (HTML, whitelist.relaxed ()); }//Allow only the specified HTML tag public static string Cleartags (string html, String ... tags) {Whitelist wl = new Whitel IST (); return Jsoup.clean (HTML, Wl.addtags (tags)); }//keyword plus color public static string Markkeywods (string keywords, string target) {if (Stringkit.notblank (keywords)) {string[] arr = Keywords.split (""); for (string S:arr) {if (Stringkit.notblank (s)) {String temp = ' <span class=\ ' high Light\ ">" + S +"</span>"; if (temp!=null) target = Target.replaceall (s, temp); }}} return target; }//Gets the IMG URL in the article public static String GETIMGSRC (string html) {if (html = = null) return null; Document doc = jsoup.parsebodyfragment (HTML); Element image = Doc.select ("img"). First (); return image = = null? Null:image.attr ("src"); }}
Tool classes written using Jsoup.jar (Java extracts text from HTML)