POI operations Word and HTML convert each other

Source: Internet
Author: User

Here are the two classes: the first class is HTML to Word, the second is word to HTML (most of which are attached with the jar package download link)

Package com.wz.poi.wordHtml;

/**
* 2018/4/24
* @author Administrator
*
*/

Import Java.io.BufferedReader;
Import Java.io.ByteArrayInputStream;
Import Java.io.FileInputStream;
Import Java.io.FileOutputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import Java.io.InputStreamReader;
Import Java.io.OutputStream;

Import Org.apache.poi.poifs.filesystem.POIFSFileSystem;

public class Htmltoword {

/**
* External interface
* @param the path to the Htmlpath HTML file
* @param the path to the Csspath CSS file
* @param the path to the Wordpath Word file (save local path)
* @param code encoding Method (generally utf-8)
* @throws Exception
*/
public void HtmlToWord2 (string htmlpath, String Csspath, String Wordpath, String code) throws Exception {
/*inputstream Bodyis = new FileInputStream ("h:\\mytest\\java\\test_show\\test.html");
InputStream cssis = new FileInputStream ("H:\\mytest\\java\\test_show\\test.css"); */
InputStream Bodyis = new FileInputStream (Htmlpath);
InputStream cssis = new FileInputStream (Csspath);
String BODY = this.getcontent (Bodyis);
String CSS = this.getcontent (cssis);
To spell a standard HTML format document
String content = "InputStream is = new Bytearrayinputstream (content.getbytes (code));
OutputStream OS = new FileOutputStream (Wordpath);
This.inputstreamtoword (is, OS);
}

/**
* Writes is to the corresponding word output stream OS regardless of the catch of the exception, thrown directly
*
* @param is
* @param os
* @throws IOException
*/
private void Inputstreamtoword (InputStream is, OutputStream os) throws IOException {
Poifsfilesystem fs = new Poifsfilesystem ();
Corresponds to Org.apache.poi.hdf.extractor.WordDocument
Fs.createdocument (IS, "worddocument");
Fs.writefilesystem (OS);
Os.close ();
Is.close ();
Fs.close ();
}

/**
* The contents of the input stream are UTF-8 encoded when the text is taken out. Without considering exceptions, throw directly
*
* @param ises
* @return
* @throws IOException
*/
Private String getcontent (InputStream ... ises) throws IOException {
if (ises! = null) {
StringBuilder result = new StringBuilder ();
BufferedReader BR;
String Line;
for (InputStream is:ises) {
br = new BufferedReader (new InputStreamReader (IS, "UTF-8"));
while (line = Br.readline ()) = null) {
Result.append (line);
}
}
return result.tostring ();
}
return null;
}


}

Package com.wz.poi.wordHtml;

/**
* 2018/4/24
* @author Administrator
*
*/

Import Java.io.File;
Import Java.io.FileInputStream;
Import java.io.FileNotFoundException;
Import Java.io.FileOutputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import Java.io.OutputStream;

Import Javax.xml.parsers.DocumentBuilderFactory;
Import javax.xml.parsers.ParserConfigurationException;
Import Javax.xml.transform.OutputKeys;
Import Javax.xml.transform.Transformer;
Import javax.xml.transform.TransformerException;
Import Javax.xml.transform.TransformerFactory;
Import Javax.xml.transform.dom.DOMSource;
Import Javax.xml.transform.stream.StreamResult;

Import org.apache.poi.hwpf.HWPFDocument;
Import Org.apache.poi.hwpf.converter.PicturesManager;
Import Org.apache.poi.hwpf.converter.WordToHtmlConverter;
Import Org.apache.poi.hwpf.usermodel.PictureType;
Import Org.apache.poi.xwpf.converter.core.BasicURIResolver;
Import Org.apache.poi.xwpf.converter.core.FileImageExtractor;
Import Org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
Import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
Import org.apache.poi.xwpf.usermodel.XWPFDocument;
Import org.w3c.dom.Document;

public class Wordtohtml {

/**
* Called template
* @param args
*/
public static void Main (string[] args) {
try {
Word2003tohtml ("h:\\mytest\\java\\", "Test", ". Doc");
} catch (IOException e) {
TODO auto-generated Catch block
E.printstacktrace ();
} catch (Transformerexception e) {
TODO auto-generated Catch block
E.printstacktrace ();
} catch (Parserconfigurationexception e) {
TODO auto-generated Catch block
E.printstacktrace ();
}
}

/**
* Convert word2003 to HTML file 2017-2-27
* @param wordpath Word file path
* @param wordname Word file name no suffix
* @param suffix Word file suffix
* @throws IOException
* @throws transformerexception
* @throws parserconfigurationexception
*/
public static string word2003tohtml (string wordpath,string wordname,string suffix) throws IOException, Transformerexception, Parserconfigurationexception {
String Htmlpath = wordpath + file.separator + wordname + "_show" + file.separator;
String HTMLName = Wordname + ". html";
Final String ImagePath = Htmlpath + "image" + file.separator;

Determine if an HTML file exists
File Htmlfile = new file (Htmlpath + htmlname);
if (htmlfile.exists ()) {
return Htmlfile.getabsolutepath ();
}

Original Word Document
Final String file = Wordpath + file.separator + wordname + suffix;
InputStream input = new FileInputStream (new file);

Hwpfdocument worddocument = new hwpfdocument (input);
Wordtohtmlconverter wordtohtmlconverter = new Wordtohtmlconverter (Documentbuilderfactory.newinstance (). Newdocumentbuilder (). NewDocument ());
Set the location where the picture is stored
Wordtohtmlconverter.setpicturesmanager (New Picturesmanager () {
Public String savepicture (byte[] content, PictureType PictureType, String suggestedname, float widthinches, float Heighti Nches) {
File Imgpath = new file (ImagePath);
if (!imgpath.exists ()) {//Picture directory does not exist then create
Imgpath.mkdirs ();
}
File File = new file (ImagePath + suggestedname);
try {
OutputStream OS = new FileOutputStream (file);
Os.write (content);
Os.close ();
} catch (FileNotFoundException e) {
E.printstacktrace ();
} catch (IOException e) {
E.printstacktrace ();
}
Path relative path of the picture on the HTML file
Return "image/" + suggestedname;
}
});

Parsing Word documents
Wordtohtmlconverter.processdocument (worddocument);
Document HTMLDocument = Wordtohtmlconverter.getdocument ();

Generate HTML file ancestor folder
File Folder = new file (Htmlpath);
if (!folder.exists ()) {
Folder.mkdirs ();
}

Generate HTML file Address
OutputStream OutStream = new FileOutputStream (htmlfile);

Domsource Domsource = new Domsource (htmldocument);
Streamresult Streamresult = new Streamresult (OutStream);

Transformerfactory factory = Transformerfactory.newinstance ();
Transformer serializer = Factory.newtransformer ();
Serializer.setoutputproperty (outputkeys.encoding, "utf-8");
Serializer.setoutputproperty (outputkeys.indent, "yes");
Serializer.setoutputproperty (Outputkeys.method, "html");

Serializer.transform (Domsource, Streamresult);

Outstream.close ();

return Htmlfile.getabsolutepath ();
}

/**
* 2007 version Word converted to HTML 2017-2-27
* @param wordpath Word file path
* @param wordname Word file name no suffix
* @param suffix Word file suffix
* @return
* @throws IOException
*/
public string word2007tohtml (string wordpath,string wordname,string suffix) throws IOException {
String Htmlpath = wordpath + file.separator + wordname + "_show" + file.separator;
String HTMLName = Wordname + ". html";
String ImagePath = Htmlpath + "image" + file.separator;

Determine if an HTML file exists
File Htmlfile = new file (Htmlpath + htmlname);
if (htmlfile.exists ()) {
return Htmlfile.getabsolutepath ();
}

Word file
File Wordfile = new file (Wordpath + file.separator + wordname + suffix);

1) Load Word document to generate Xwpfdocument object
InputStream in = new FileInputStream (wordfile);
Xwpfdocument document = new Xwpfdocument (in);

2) Parse the XHTML configuration (set Iuriresolver here to set the directory for picture storage)
File Imgfolder = new file (ImagePath);
Xhtmloptions options = Xhtmloptions.create ();
Options.setextractor (New Fileimageextractor (Imgfolder));
Path relative path of picture in HTML
Options. Uriresolver (New Basicuriresolver ("image"));
Options.setignorestylesifunused (FALSE);
Options.setfragment (TRUE);

3) Convert Xwpfdocument to XHTML
Generate HTML file ancestor folder
File Folder = new file (Htmlpath);
if (!folder.exists ()) {
Folder.mkdirs ();
}
OutputStream out = new FileOutputStream (htmlfile);
Xhtmlconverter.getinstance (). Convert (document, out, options);

return Htmlfile.getabsolutepath ();
}

}

Attached Baidu network disk download connection:

Link: Https://pan.baidu.com/s/1t_jXUq3CuhZo9j_UI4URAQ Password: R2qi

POI operations Word and HTML convert each other

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.