public class Htmlcommenthandler {
/**
* Detector of annotations in HTML content
*
* @author Boyce
* @version 2013-12-3
*/
private Static Class Htmlcommentdetector {
private static final String Comment_start = "<!--";
private static final String comment_end = "-->";
Whether the string is an HTML comment line that contains the start tag of the comment and the end tag "<!---->"
private static Boolean iscommentline (String line) {
return Containscommentstarttag (line) && Containscommentendtag (line)
&& Line.indexof (Comment_start) < Line.indexof (comment_end);
}
Whether to include the start tag for the comment
private static Boolean Containscommentstarttag (String line) {
return Stringutils.isnotempty (line) &&
Line.indexof (Comment_start)!=-1;
}
Whether to include the end tag of the comment
private static Boolean Containscommentendtag (String line) {
return Stringutils.isnotempty (line) &&
Line.indexof (Comment_end)!=-1;
}
/**
* Delete the comment section in the line
*/
private static string Deletecommentinline (string line) {
while ( Iscommentline (line) {
int start = Line.indexof (Comment_start) + comment_start.length ();
int end = Line.indexof (comment_end);
line = line.substring (start, end);
}
return line;
}
Get the content before starting the annotation symbol
private static string Getbeforecommentcontent (string line) {
if (!containscommentstarttag (line))
return line;
Return line.substring (0, Line.indexof (comment_start));
}
//Get the contents of the end Comment line
private static string Getaftercommentcontent (string line) {
if (! Containscommentendtag (line)
return line;
return Line.substring (Line.indexof (comment_end) + comment_end.length ());
}
}
/**
* Read HTML content, remove comments
*/
public static String readhtmlcontentwithoutcomment (BufferedReader reader) throws IOException {
& nbsp; StringBuilder builder = new StringBuilder ();
String line = null;
Whether the current row is in the comment
Boolean incomment = false;
while (line = Reader.readline ()) {Objectutils.isnotnull
If you include a comment label
while (Htmlcommentdetector.containscommentstarttag) | |
Htmlcommentdetector.containscommentendtag (line)) {
Deletes the content between the annotation labels that appear in pairs
<!--comment-->
if (Htmlcommentdetector.iscommentline (line)) {
line = Htmlcommentdetector.deletecommentinline (line);
}
If it is not a comment line, but there is still a start and end tag, the end tag must precede the start tag
XXX-->content<!--
else if (Htmlcommentdetector.containscommentstarttag (line) && Htmlcommentdetector.containscommentendtag ( Line)) {
After the end tag is fetched, the text before the start tag, and the incomment is set to True
line = Htmlcommentdetector.getaftercommentcontent (line);
line = Htmlcommentdetector.getbeforecommentcontent (line);
Incomment = true;
}
If only the start tag exists, because the comment label does not support nesting, only the row at the start tag will not incomment
Content <!--
else if (!incomment && Htmlcommentdetector.containscommentstarttag (line)) {
Sets the incomment to true. Get the content before the start tag
Incomment = true;
line = Htmlcommentdetector.getbeforecommentcontent (line);
}
If only the end tag exists because the annotation label does not support nesting, only the row of the end tag must be incomment
-->content
else if (incomment && Htmlcommentdetector.containscommentendtag (line)) {
Set the Incomment to False. Get the content after the end tag
Incomment = false;
line = Htmlcommentdetector.getaftercommentcontent (line);
}
Save the contents of the line that are not annotated
if (Stringutils.isnotempty (line))
Builder.append (line);
}
Save the row for which there are no comment labels and incomment = False
if (Stringutils.isnotempty (line) &&!incomment)
Builder.append (line);
}
return builder.tostring ();
}
}
Of course, there are a number of other methods that can also be deleted through a regular match, or you can end up with a stack tag.
And so on, the above code has been tested and used, hoping to be useful to students in need.