Transferred from: http://java.freesion.com/article/48772295755/
Develop a system that needs to use this to generate a summary of how many bits you specify based on HTML
Package Com.chendaojun.util;import java.io.bufferedreader;import java.io.file;import java.io.FileInputStream; Import Java.io.inputstreamreader;import Java.sql.connection;import Java.sql.drivermanager;import Java.sql.resultset;import Java.sql.statement;public class Parsehtml {public staticvoidMain (string[] args) {//You can open a note to experiment individuallyparsehtml ph=Newparsehtml (); String HTML=""; //Open the following two lines to connect MySQL and parse HTML //Html=ph.gethtmlfrommysql (); //System.out.println (ph.parsehtml (HTML)); //System.out.println (ph.parsehtml (html,300)); //Open the following two lines to get the contents of the path file and parse the HTML, the path according to the actual modification //html=ph.gethtml ("e:\\1478300.html"); //System.out.println (ph.parsehtml (HTML)); //System.out.println (ph.parsehtml (html,300)); //specify length resolution directly //html=ph.parsehtml ("<p>sdfsdf</p><br><div>sdfsdfsdf</div>", "ten"); //System.out.println (HTML); //Direct ParsingHtml=ph.parsehtml ("<P>SDFSDF</P><BR><DIV>SDFSDFSDF</DIV>SDFLKSDFLKSDJFK<DKF"); SYSTEM.OUT.PRINTLN (HTML); } //Remove an HTML article from MySQL that the online editor saved inPublic String Gethtmlfrommysql () {string URL= "Jdbc:mysql://localhost:3306/blog"; String UserName= "Root"; String PassWord= "Root"; String ClassName= "Com.mysql.jdbc.Driver"; String SQL= "Select text from blog where id=5"; String HTML=""; Connection Conn=NULL; Statement stmt=NULL; ResultSet RS=NULL; Try{class.forname (className); Conn=drivermanager.getconnection (Url,username,password); stmt=conn.createstatement (); RS=stmt.executequery (SQL); while(Rs.next ()) {//Get HTML contentHtml=rs.getstring ("Text"); } }Catch(Exception e) {e.printstacktrace (); }finally{ Try{ if(rs!=NULL) {rs.close (); RS=NULL; } if(stmt!=NULL) {stmt.close (); stmt=NULL; } if(conn!=NULL) {conn.close (); Conn=NULL; } }Catch(Exception e) {e.printstacktrace (); } } returnhtml; } //reads the HTML file from the specified pathPublic string gethtml (String filePath) {string HTML= ""; FileInputStream FIS=NULL; InputStreamReader ISR=NULL; BufferedReader BR=NULL; Try{File File=NewFile (FilePath); FIS=Newfileinputstream (file); ISR=NewInputStreamReader (FIS); BR=NewBufferedReader (ISR); String BRead= ""; while((BRead = Br.readline ())! =NULL) {HTML+=BRead; } } Catch(Exception e) {e.printstacktrace (); } finally { Try { if(br!=NULL) {br.close (); BR=NULL; } if(isr!=NULL) {isr.close (); ISR=NULL; } if(fis!=NULL) {fis.close (); FIS=NULL; } } Catch(Exception e) {e.printstacktrace (); } } returnhtml; } //arbitrary HTML, incomplete can alsoPublic string parsehtml (string html) {/** <.*?> is a regular expression in which the. denotes any character, *? indicates that there are 0 or 0 occurrences, this method can remove the double-head label (double-headed pin for the mutilated label) * "<.*?" Represents all the characters after the < angle brackets, this method can remove the broken label, and the following content * "", if there are a number of such characters, the same method may be used to remove*/HTML= Html.replaceall ("<.*?>", ""). ReplaceAll ("", ""); HTML= Html.replaceall ("<.*?", "" "); return(HTML + "..."); } //you can specify the Intercept lengthpublic string parsehtml (string html,intlength) { if(Html.length () <length) { return"The length of the interception is longer than the total file content"; } returnParsehtml (html.substring (0, length)); }}
Java generates summaries based on HTML