ImportJava. Io. bufferedinputstream;
ImportJava. Io. fileoutputstream;
ImportJava. Io. filewriter;
ImportJava. Io. ioexception;
ImportJava. Io. printwriter;
ImportJava.net. url;
ImportOrg. W3C. Tidy. Tidy;
Public ClassHtml2xml {
PrivateStringURL;
PrivateStringOutfilename;
PrivateStringErroutfilename;
PublicHtml2xml (string URL, string outfilename, string erroutfilename ){
This . URL = URL; /// provides an ingress URL
This . outfilename = outfilename; /// export a file
This.Erroutfilename= Erroutfilename;//Zookeeper File
}
Public static void main (string [] ARGs) {
/// metric data: HTML file utl , export the file and upload the file name
system. out . println ( " Program Start Operation ...... " );
html2xml T = New html2xml ( " http: // hswebuat01/Maximo/help.html " ,
"C: \ temp \ html2xml. xml","C: \ temp \ err.txt");
T. Convert ();
System.Out. Println ("End of program operation...... ");
}
Public VoidConvert (){
Url u;
Bufferedinputstream in;
Fileoutputstream out;
tidy = New tidy ();
tidy. setxmlout ( true ); /// notification tidy about HTML scaled to XML
Try{
Tidy. seterrout ( New Printwriter ( New Filewriter ( Erroutfilename ), True )); // Save zookeeper information to a file
U =NewURL (URL);
In = New bufferedinputstream (U. openstream (); /// Create a sequence to input and output streams
Out =NewFileoutputstream (Outfilename);
Tidy. parse (In, out );//Zookeeper File
In. Close ();
Out. Close ();
}Catch(Ioexception e ){
System.Out. Println (This. Tostring () + E. tostring ());
}
}
}
/Files/snowfun/html2xml.zip
// Files/snowfun/jtidy-r938.zip
Key points:
(1) bufferedinputstream () Application
(2) Java extension standard library org. W3C. Tidy. Tidy
(3) Use of URLs
Note:
The Org. W3C. tidy package used in this program is downloaded at http://sourceforge.net/projects/jtidy. Decompress the package and reference the tidy package in the build folder.