This method is for Office2003, but if there is a picture in Word, the image can be parsed but not displayed in the HTML file. Image parsing in Excel is also not supported.
The required jar packages are as follows:
1:poiutil.java
PackageCom.wzh.poi;ImportJava.io.BufferedWriter;ImportJava.io.ByteArrayOutputStream;ImportJava.io.File;ImportJava.io.FileInputStream;Importjava.io.FileNotFoundException;ImportJava.io.FileOutputStream;Importjava.io.IOException;ImportJava.io.InputStream;ImportJava.io.OutputStreamWriter;Importjava.io.UnsupportedEncodingException;Importjava.util.List;ImportJava.util.logging.Level;ImportJava.util.logging.Logger;Importjavax.xml.parsers.DocumentBuilderFactory;Importjavax.xml.parsers.ParserConfigurationException;ImportJavax.xml.transform.OutputKeys;ImportJavax.xml.transform.Transformer;Importjavax.xml.transform.TransformerConfigurationException;Importjavax.xml.transform.TransformerException;Importjavax.xml.transform.TransformerFactory;ImportJavax.xml.transform.dom.DOMSource;ImportJavax.xml.transform.stream.StreamResult;ImportOrg.apache.poi.hssf.converter.ExcelToHtmlConverter;ImportOrg.apache.poi.hssf.usermodel.HSSFWorkbook;Importorg.apache.poi.hwpf.HWPFDocument;ImportOrg.apache.poi.hwpf.converter.PicturesManager;ImportOrg.apache.poi.hwpf.converter.WordToHtmlConverter;Importorg.apache.poi.hwpf.usermodel.Picture;ImportOrg.apache.poi.hwpf.usermodel.PictureType;Importorg.w3c.dom.Document;/*** @date 2015-3-16 17:22:05 *@authory * @desc*/ Public classPoiutil {/*** Excel to HTML *@paramFileName *@paramOutputFile *@throwsFileNotFoundException *@throwsIOException *@throwsparserconfigurationexception *@throwstransformerconfigurationexception *@throwstransformerexception*/ Public Static voidexceltohtml (String fileName, string outputFile)throwsFileNotFoundException, IOException, Parserconfigurationexception, Transformerconfigurationexception, transformerexception {InputStream is=NewFileInputStream (fileName); Hssfworkbook Excelbook=NewHssfworkbook (IS); Exceltohtmlconverter ethc=NewExceltohtmlconverter (Documentbuilderfactory.newinstance (). Newdocumentbuilder (). NewDocument ()); Ethc.setoutputcolumnheaders (false); Ethc.setoutputrownumbers (false); Ethc.processworkbook (Excelbook); Document HTMLDocument=ethc.getdocument (); Bytearrayoutputstream out=NewBytearrayoutputstream (); Domsource Domsource=NewDomsource (HTMLDocument); Streamresult Streamresult=NewStreamresult (out); Transformerfactory TF=transformerfactory.newinstance (); Transformer Serializer=Tf.newtransformer (); Serializer.setoutputproperty (outputkeys.encoding,"UTF-8"); Serializer.setoutputproperty (Outputkeys.indent,"Yes"); Serializer.setoutputproperty (Outputkeys.method,"HTML"); Serializer.transform (Domsource, Streamresult); Out.close (); String Htmlstr=NewString (Out.tobytearray ()); Htmlstr= Htmlstr.replace ("). Replace ("). Replace ("). Replace ("). Replace ("); WriteFile (Htmlstr, outputFile); } /*** Word to HTML * *@paramFileName *@paramOutputFile *@throwsIOException *@throwsparserconfigurationexception *@throwstransformerexception*/ Public Static voidWordtohtml (String fileName, String outputFile)throwsIOException, Parserconfigurationexception, transformerexception {hwpfdocument WordDoc=NewHwpfdocument (NewFileInputStream (fileName)); Wordtohtmlconverter wthc=NewWordtohtmlconverter (Documentbuilderfactory.newinstance (). Newdocumentbuilder (). NewDocument ()); Wthc.setpicturesmanager (NewPicturesmanager () {@Override PublicString SavePicture (byte[] Bytes, PictureType pt, string string,floatFfloatF1) { returnstring; } }); Wthc.processdocument (WordDoc); List<Picture> pics =worddoc.getpicturestable (). Getallpictures (); if(NULL! = Pics && pics.size () > 0) { for(picture pic:pics) {pic.writeimagecontent (NewFileOutputStream (Pic.suggestfullfilename ())); }} Document HTMLDocument=wthc.getdocument (); Bytearrayoutputstream out=NewBytearrayoutputstream (); Domsource Domsource=NewDomsource (HTMLDocument); Streamresult Streamresult=NewStreamresult (out); Transformerfactory TF=transformerfactory.newinstance (); Transformer Serializer=Tf.newtransformer (); Serializer.setoutputproperty (outputkeys.encoding,"UTF-8"); Serializer.setoutputproperty (Outputkeys.indent,"Yes"); Serializer.setoutputproperty (Outputkeys.method,"HTML"); Serializer.transform (Domsource, Streamresult); Out.close (); String Htmlstr=NewString (Out.tobytearray ()); WriteFile (Htmlstr, outputFile); } Public Static voidWriteFile (string content, string path) {FileOutputStream fos=NULL; BufferedWriter BW=NULL; File File=NewFile (path); Try{fos=Newfileoutputstream (file); Bw=NewBufferedWriter (NewOutputStreamWriter (FOS, "UTF-8")); Bw.write (content); } Catch(FileNotFoundException ex) {Logger.getlogger (poiutil.class. GetName ()). log (Level.severe,NULL, ex); } Catch(Unsupportedencodingexception ex) {Logger.getlogger (poiutil.class. GetName ()). log (Level.severe,NULL, ex); } Catch(IOException ex) {Logger.getlogger (poiutil.class. GetName ()). log (Level.severe,NULL, ex); } finally { Try { if(NULL!=bw) {Bw.close (); } if(NULL!=Fos) {Fos.close (); } } Catch(IOException ex) {Logger.getlogger (poiutil.class. GetName ()). log (Level.severe,NULL, ex); } } }}
2.test.java
ImportCom.wzh.poi.PoiUtil;Importjava.io.IOException;ImportJava.util.logging.Level;ImportJava.util.logging.Logger;Importjavax.xml.parsers.ParserConfigurationException;Importjavax.xml.transform.TransformerException;/** To change the license header, choose License Headers in the Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. *//** * * @authory*/ Public classTest {/** * @paramargs the command line arguments*/ Public Static voidMain (string[] args) {Try{poiutil.exceltohtml ("T2.xls", "test.html"); } Catch(IOException ex) {Logger.getlogger (Test.class. GetName ()). log (Level.severe,NULL, ex); } Catch(Parserconfigurationexception ex) {Logger.getlogger (Test.class. GetName ()). log (Level.severe,NULL, ex); } Catch(Transformerexception ex) {Logger.getlogger (Test.class. GetName ()). log (Level.severe,NULL, ex); } } }
Use POI to convert word Excel to HTML