Here are the two classes: the first class is HTML to Word, the second is word to HTML (most of which are attached with the jar package download link)
Package com.wz.poi.wordHtml;
/**
* 2018/4/24
* @author Administrator
*
*/
Import Java.io.BufferedReader;
Import Java.io.ByteArrayInputStream;
Import Java.io.FileInputStream;
Import Java.io.FileOutputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import Java.io.InputStreamReader;
Import Java.io.OutputStream;
Import Org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class Htmltoword {
/**
* External interface
* @param the path to the Htmlpath HTML file
* @param the path to the Csspath CSS file
* @param the path to the Wordpath Word file (save local path)
* @param code encoding Method (generally utf-8)
* @throws Exception
*/
public void HtmlToWord2 (string htmlpath, String Csspath, String Wordpath, String code) throws Exception {
/*inputstream Bodyis = new FileInputStream ("h:\\mytest\\java\\test_show\\test.html");
InputStream cssis = new FileInputStream ("H:\\mytest\\java\\test_show\\test.css"); */
InputStream Bodyis = new FileInputStream (Htmlpath);
InputStream cssis = new FileInputStream (Csspath);
String BODY = this.getcontent (Bodyis);
String CSS = this.getcontent (cssis);
To spell a standard HTML format document
String content = "InputStream is = new Bytearrayinputstream (content.getbytes (code));
OutputStream OS = new FileOutputStream (Wordpath);
This.inputstreamtoword (is, OS);
}
/**
* Writes is to the corresponding word output stream OS regardless of the catch of the exception, thrown directly
*
* @param is
* @param os
* @throws IOException
*/
private void Inputstreamtoword (InputStream is, OutputStream os) throws IOException {
Poifsfilesystem fs = new Poifsfilesystem ();
Corresponds to Org.apache.poi.hdf.extractor.WordDocument
Fs.createdocument (IS, "worddocument");
Fs.writefilesystem (OS);
Os.close ();
Is.close ();
Fs.close ();
}
/**
* The contents of the input stream are UTF-8 encoded when the text is taken out. Without considering exceptions, throw directly
*
* @param ises
* @return
* @throws IOException
*/
Private String getcontent (InputStream ... ises) throws IOException {
if (ises! = null) {
StringBuilder result = new StringBuilder ();
BufferedReader BR;
String Line;
for (InputStream is:ises) {
br = new BufferedReader (new InputStreamReader (IS, "UTF-8"));
while (line = Br.readline ()) = null) {
Result.append (line);
}
}
return result.tostring ();
}
return null;
}
}
Package com.wz.poi.wordHtml;
/**
* 2018/4/24
* @author Administrator
*
*/
Import Java.io.File;
Import Java.io.FileInputStream;
Import java.io.FileNotFoundException;
Import Java.io.FileOutputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import Java.io.OutputStream;
Import Javax.xml.parsers.DocumentBuilderFactory;
Import javax.xml.parsers.ParserConfigurationException;
Import Javax.xml.transform.OutputKeys;
Import Javax.xml.transform.Transformer;
Import javax.xml.transform.TransformerException;
Import Javax.xml.transform.TransformerFactory;
Import Javax.xml.transform.dom.DOMSource;
Import Javax.xml.transform.stream.StreamResult;
Import org.apache.poi.hwpf.HWPFDocument;
Import Org.apache.poi.hwpf.converter.PicturesManager;
Import Org.apache.poi.hwpf.converter.WordToHtmlConverter;
Import Org.apache.poi.hwpf.usermodel.PictureType;
Import Org.apache.poi.xwpf.converter.core.BasicURIResolver;
Import Org.apache.poi.xwpf.converter.core.FileImageExtractor;
Import Org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
Import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
Import org.apache.poi.xwpf.usermodel.XWPFDocument;
Import org.w3c.dom.Document;
public class Wordtohtml {
/**
* Called template
* @param args
*/
public static void Main (string[] args) {
try {
Word2003tohtml ("h:\\mytest\\java\\", "Test", ". Doc");
} catch (IOException e) {
TODO auto-generated Catch block
E.printstacktrace ();
} catch (Transformerexception e) {
TODO auto-generated Catch block
E.printstacktrace ();
} catch (Parserconfigurationexception e) {
TODO auto-generated Catch block
E.printstacktrace ();
}
}
/**
* Convert word2003 to HTML file 2017-2-27
* @param wordpath Word file path
* @param wordname Word file name no suffix
* @param suffix Word file suffix
* @throws IOException
* @throws transformerexception
* @throws parserconfigurationexception
*/
public static string word2003tohtml (string wordpath,string wordname,string suffix) throws IOException, Transformerexception, Parserconfigurationexception {
String Htmlpath = wordpath + file.separator + wordname + "_show" + file.separator;
String HTMLName = Wordname + ". html";
Final String ImagePath = Htmlpath + "image" + file.separator;
Determine if an HTML file exists
File Htmlfile = new file (Htmlpath + htmlname);
if (htmlfile.exists ()) {
return Htmlfile.getabsolutepath ();
}
Original Word Document
Final String file = Wordpath + file.separator + wordname + suffix;
InputStream input = new FileInputStream (new file);
Hwpfdocument worddocument = new hwpfdocument (input);
Wordtohtmlconverter wordtohtmlconverter = new Wordtohtmlconverter (Documentbuilderfactory.newinstance (). Newdocumentbuilder (). NewDocument ());
Set the location where the picture is stored
Wordtohtmlconverter.setpicturesmanager (New Picturesmanager () {
Public String savepicture (byte[] content, PictureType PictureType, String suggestedname, float widthinches, float Heighti Nches) {
File Imgpath = new file (ImagePath);
if (!imgpath.exists ()) {//Picture directory does not exist then create
Imgpath.mkdirs ();
}
File File = new file (ImagePath + suggestedname);
try {
OutputStream OS = new FileOutputStream (file);
Os.write (content);
Os.close ();
} catch (FileNotFoundException e) {
E.printstacktrace ();
} catch (IOException e) {
E.printstacktrace ();
}
Path relative path of the picture on the HTML file
Return "image/" + suggestedname;
}
});
Parsing Word documents
Wordtohtmlconverter.processdocument (worddocument);
Document HTMLDocument = Wordtohtmlconverter.getdocument ();
Generate HTML file ancestor folder
File Folder = new file (Htmlpath);
if (!folder.exists ()) {
Folder.mkdirs ();
}
Generate HTML file Address
OutputStream OutStream = new FileOutputStream (htmlfile);
Domsource Domsource = new Domsource (htmldocument);
Streamresult Streamresult = new Streamresult (OutStream);
Transformerfactory factory = Transformerfactory.newinstance ();
Transformer serializer = Factory.newtransformer ();
Serializer.setoutputproperty (outputkeys.encoding, "utf-8");
Serializer.setoutputproperty (outputkeys.indent, "yes");
Serializer.setoutputproperty (Outputkeys.method, "html");
Serializer.transform (Domsource, Streamresult);
Outstream.close ();
return Htmlfile.getabsolutepath ();
}
/**
* 2007 version Word converted to HTML 2017-2-27
* @param wordpath Word file path
* @param wordname Word file name no suffix
* @param suffix Word file suffix
* @return
* @throws IOException
*/
public string word2007tohtml (string wordpath,string wordname,string suffix) throws IOException {
String Htmlpath = wordpath + file.separator + wordname + "_show" + file.separator;
String HTMLName = Wordname + ". html";
String ImagePath = Htmlpath + "image" + file.separator;
Determine if an HTML file exists
File Htmlfile = new file (Htmlpath + htmlname);
if (htmlfile.exists ()) {
return Htmlfile.getabsolutepath ();
}
Word file
File Wordfile = new file (Wordpath + file.separator + wordname + suffix);
1) Load Word document to generate Xwpfdocument object
InputStream in = new FileInputStream (wordfile);
Xwpfdocument document = new Xwpfdocument (in);
2) Parse the XHTML configuration (set Iuriresolver here to set the directory for picture storage)
File Imgfolder = new file (ImagePath);
Xhtmloptions options = Xhtmloptions.create ();
Options.setextractor (New Fileimageextractor (Imgfolder));
Path relative path of picture in HTML
Options. Uriresolver (New Basicuriresolver ("image"));
Options.setignorestylesifunused (FALSE);
Options.setfragment (TRUE);
3) Convert Xwpfdocument to XHTML
Generate HTML file ancestor folder
File Folder = new file (Htmlpath);
if (!folder.exists ()) {
Folder.mkdirs ();
}
OutputStream out = new FileOutputStream (htmlfile);
Xhtmlconverter.getinstance (). Convert (document, out, options);
return Htmlfile.getabsolutepath ();
}
}
Attached Baidu network disk download connection:
Link: Https://pan.baidu.com/s/1t_jXUq3CuhZo9j_UI4URAQ Password: R2qi
POI operations Word and HTML convert each other