Use POI to convert Word to HTML, support Doc,docx, convert to keep pictures, styles
Required JAR Packages:
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> < Version>3.14</version></dependency><dependency> <groupid>org.apache.poi</groupid > <artifactId>poi-scratchpad</artifactId> <version>3.14</version></dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version></dependency><dependency> <groupId> Fr.opensagres.xdocreport</groupid> <artifactId>xdocreport</artifactId> <version>1.0.6 </version></dependency><dependency> <groupId>org.apache.poi</groupId> < Artifactid>poi-ooxml-schemas</artifactid> <version>3.14</version></dependency>< Dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version></dependency>
code to convert:
Import Org.apache.poi.hwpf.hwpfdocument;import Org.apache.poi.hwpf.converter.wordtohtmlconverter;import Org.apache.poi.hwpf.usermodel.picture;import Org.apache.poi.xwpf.converter.core.basicuriresolver;import Org.apache.poi.xwpf.converter.core.fileimageextractor;import Org.apache.poi.xwpf.converter.xhtml.XHTMLConverter ; Import Org.apache.poi.xwpf.converter.xhtml.xhtmloptions;import org.apache.poi.xwpf.usermodel.XWPFDocument; Import Org.w3c.dom.document;import Javax.xml.parsers.documentbuilderfactory;import Javax.xml.transform.OutputKeys ; Import Javax.xml.transform.transformer;import Javax.xml.transform.transformerfactory;import Javax.xml.transform.dom.domsource;import Javax.xml.transform.stream.streamresult;import Java.io.File;import Java.io.fileinputstream;import Java.io.fileoutputstream;import Java.io.outputstreamwriter;import java.util.List; public class Test {//doc converted to HTML void doctohtml () throws Exception {String sourceFileName = "C:\\doc\\test.d OC "; String TARGETFIlename = "c:\\html\\test.html"; String imagepathstr = "c:\\html\\image\\"; Hwpfdocument worddocument = new Hwpfdocument (new FileInputStream (sourceFileName)); Wordtohtmlconverter wordtohtmlconverter = new Wordtohtmlconverter (Documentbuilderfactory.newinstance (). Newdocumentbuilder (). NewDocument ()); Sets the relative path of the picture Wordtohtmlconverter.setpicturesmanager ((A, B, Suggestedname, D, E), "image" + File.separator + sug Gestedname); Wordtohtmlconverter.processdocument (worddocument); List< Picture> pics = Worddocument.getpicturestable (). Getallpictures (); for (picture pic:pics) {//Generate image Pic.writeimagecontent (new FileOutputStream (Imagepathstr + Pic.sugg Estfullfilename ())); } Document HTMLDocument = Wordtohtmlconverter.getdocument (); Domsource Domsource = new Domsource (htmldocument); Streamresult Streamresult = new Streamresult (new File (TargetFileName)); Transformerfactory tf = Transformerfactory.newinstance (); Transformer serializer = Tf.newtransformer (); Serializer.setoutputproperty (outputkeys.encoding, "utf-8"); Serializer.setoutputproperty (outputkeys.indent, "yes"); Serializer.setoutputproperty (Outputkeys.method, "html"); Serializer.transform (Domsource, Streamresult); }//docx converted to HTML public void docxtohtml () throws Exception {String sourceFileName = "C:\\doc\\test.doc"; String TargetFileName = "c:\\html\\test.html"; String imagepathstr = "c:\\html\\image\\ "; FileOutputStream fileoutputstream = null; OutputStreamWriter outputstreamwriter = null; try {xwpfdocument document = new Xwpfdocument (new FileInputStream (sourceFileName)); Xhtmloptions options = Xhtmloptions.create (); The folder Options.setextractor (new Fileimageextractor (IMAGEPATHSTR)) where the picture is stored; The path to the picture in the HTML options. Uriresolver (New Basicuriresolver ("image")); FileOutputStream = new FileOutputStream (targetfilename); OutputStreamWriter = new OutputStreamWriter (FileOutputStream, "utf-8"); Xhtmlconverter Xhtmlconverter = (xhtmlconverter) xhtmlconverter.getinstance (); Xhtmlconverter.convert (document, OutputStreamWriter, options); } finally {if (outputstreamwriter! = null) {outputstreamwriter.close (); } if (FileOutputStream! = null) {fileoutputstream.close (); } } }}
Full source code in Http://git.oschina.net/xiaoyun_studio/xiaoyun-studio, path: src/main/java/studio/xiaoyun/common/tool/ Poitool.java
Use POI to convert Word to HTML