Filter HTML tags using Java Regular Expressions

Source: Internet
Author: User
Import  Java. util. RegEx. matcher;  Import  Java. util. RegEx. pattern;  /**  * <P> * Title: HTML-related regular expression tool * </P> * <p> * Description: includes HTML Tag filtering and HTML Tag conversion, replace a specific HTML Tag * </P> * <p> * copyright: Copyright (c) 2006 * </P> **  @ Author  Hejian *  @ Version  1.0 * @ createtime 2006-10-16  */      Public  Class  Htmlregexputil {  Private   Final   Static String regxpforhtml = "<([^>] *)> "; //  Filter all labels starting with <and ending with>        Private   Final   Static String regxpforimgtag = "<\ s * IMG \ s + ([^>] *) \ s *> "; //  Find the IMG tag        Private   Final  Static String regxpforimatagsrcattrib = "src = \" ([^ \ "] + )\""; //  Locate the src attribute of the IMG tag        /**  *  */        Public  Htmlregexputil (){  //  Todo auto-generated constructor stub  }  /**  ** Basic function: Replace the tag for normal display ** <p> **  @ Param Input *  @ Return  String  */        Public  String replacetag (string input ){  If (! Hasspecialchars (input )){  Return  Input;} stringbuffer filtered = New  Stringbuffer (input. Length ());  Char  C; For ( Int I = 0; I <= input. Length ()-1; I ++ ) {C = Input. charat (I );  Switch  (C ){  Case '<' : Filtered. append ( "& Lt ;" );  Break  ;  Case '>': Filtered. append ( "& Gt ;" );  Break  ;  Case '"' : Filtered. append ( "& Quot ;" );  Break  ;  Case '&' : Filtered. append ( "& Amp ;" ); Break  ;  Default  : Filtered. append (c );}}  Return  (Filtered. tostring ());}  /**  ** Basic function: determines whether a tag exists. ** <p> **  @ Param  Input *  @ Return  Boolean  */        Public  Boolean  Hasspecialchars (string input ){  Boolean Flag = False  ;  If (Input! = Null ) & (Input. Length ()> 0 )){  Char  C;  For ( Int I = 0; I <= input. Length ()-1; I ++ ) {C =Input. charat (I );  Switch  (C ){  Case '>' : Flag = True  ;  Break  ;  Case '<' : Flag = True  ; Break  ;  Case '"' : Flag = True  ;  Break  ;  Case '&' : Flag = True  ;  Break ;}}}  Return  Flag ;}  /**  ** Basic function: filter all tags starting with "<" and ending with ">" * <p> **  @ Param  Str *  @ Return  String  */        Public   Static  String filterhtml (string Str) {pattern Pattern =Pattern. Compile (regxpforhtml); matcher = Pattern. matcher (STR); stringbuffer sb = New  Stringbuffer ();  Boolean Result1 = Matcher. Find ();  While  (Result1) {matcher. appendreplacement (SB, "" ); Result1 = Matcher. Find ();} matcher. appendtail (SB );  Return SB. tostring ();}  /**  ** Basic function: Filter specified tags ** <p> **  @ Param  Str *  @ Param  Tag * specify tag *  @ Return  String  */        Public   Static  String fiterhtmltag (string STR, string tag) {string regxp = "<\ S *" + tag + "\ s + ([^>] *) \ s *>"; Pattern Pattern = Pattern. Compile (regxp); matcher = Pattern. matcher (STR); stringbuffer sb = New  Stringbuffer ();  Boolean Result1 = Matcher. Find ();  While  (Result1) {matcher. appendreplacement (SB, "" ); Result1 =Matcher. Find ();} matcher. appendtail (SB );  Return  SB. tostring ();}  /**  ** Basic function: Replace the specified tag ** <p> **  @ Param  Str *  @ Param  Beforetag * tag to be replaced *  @ Param  Tagattrib * tag attribute value to be replaced *  @ Param Starttag * new tag start tag *  @ Param  Endtag * new tag end tag *  @ Return  String * @ for example, if the src attribute value of the IMG label is replaced by the [img] attribute value [/img]  */        Public   Static  String replacehtmltag (string STR, string beforetag, string tagattrib, string starttag, string endtag) {string regxpfortag = "<\ S *" + beforetag + "\ s + ([^>] *) \ s *>" ; String regxpfortagattrib = Tagattrib + "= \" ([^ \ "] + )\""; Pattern patternfortag = Pattern. Compile (regxpfortag); pattern patternforattrib = Pattern. Compile (regxpfortagattrib); matcher matcherfortag = Patternfortag. matcher (STR); stringbuffer sb = New  Stringbuffer ();  Boolean Result = Matcherfortag. Find ();  While  (Result) {stringbuffer sbreplace =New  Stringbuffer (); matcher matcherforattrib = Patternforattrib. matcher (matcherfortag. Group ( 1 ));  If  (Matcherforattrib. Find () {matcherforattrib. appendreplacement (sbreplace, starttag + Matcherforattrib. Group (1) + Endtag);} matcherfortag. appendreplacement (SB, sbreplace. tostring (); Result = Matcherfortag. Find ();} matcherfortag. appendtail (SB ); Return  SB. tostring ();}} 

 

Reprinted: http://aguang520.iteye.com/blog/1056686

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.