使用POI將doc檔案轉換為html

來源:互聯網
上載者:User

標籤:xhtml   jsoup   運算式   share   stack   com   builder   parser   span   

 

需要的jar包有:有一些是依賴包,可以使用maven下載

doc檔案轉換為html檔案

package com.gsww.sxzz.controller.service;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.PicturesManager;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.Picture;import org.apache.poi.hwpf.usermodel.PictureType;import org.jsoup.Jsoup; import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerException;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import java.io.*;import java.util.List;/** * Created by Carey on 15-2-2. */public class docTohtml {    public static void main(String argv[]) {        try {            convert2Html("D:\\b.doc","D:\\1.html");        } catch (Exception e) {            e.printStackTrace();        }    }    //輸出html檔案     public static void writeFile(String content, String path) {        FileOutputStream fos = null;         BufferedWriter bw = null;        org.jsoup.nodes.Document doc = Jsoup.parse(content);        String styleOld=doc.getElementsByTag("style").html();        //統一字型格式為宋體        styleOld=styleOld.replaceAll("font-family:.+(?=;\\b)", "font-family:SimSun");                doc.getElementsByTag("head").empty();        doc.getElementsByTag("head").append("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");        doc.getElementsByTag("head").append(" <style type=\"text/css\"></style>");        doc.getElementsByTag("style").append(styleOld);        /*Regex查詢字型內容:font-family:.+(?=;\b)*/        System.out.println(content);        content=doc.html();        content=content.replace("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">", "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");        try {            File file = new File(path);            fos = new FileOutputStream(file);            bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));            bw.write(content);        } catch (FileNotFoundException fnfe) {            fnfe.printStackTrace();        } catch (IOException ioe) {            ioe.printStackTrace();        } finally {            try {                if (bw != null)                    bw.close();                if (fos != null)                    fos.close();            } catch (IOException ie) {            }        }    }    //word 轉 html     public static void convert2Html(String fileName, String outPutFile)            throws TransformerException, IOException,            ParserConfigurationException {        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));         //相容2007 以上版本//        XSSFWorkbook  xssfwork=new XSSFWorkbook(new FileInputStream(fileName));        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(                DocumentBuilderFactory.newInstance().newDocumentBuilder()                        .newDocument());        wordToHtmlConverter.setPicturesManager( new PicturesManager()        {            public String savePicture( byte[] content,                                       PictureType pictureType, String suggestedName,                                       float widthInches, float heightInches )            {                return "test/"+suggestedName;            }        } );        wordToHtmlConverter.processDocument(wordDocument);                //save pictures        List pics=wordDocument.getPicturesTable().getAllPictures();        if(pics!=null){            for(int i=0;i<pics.size();i++){                Picture pic = (Picture)pics.get(i);                System.out.println();                try {                    pic.writeImageContent(new FileOutputStream("D:/test/"                            + pic.suggestFullFileName()));                } catch (FileNotFoundException e) {                    e.printStackTrace();                }            }        }        Document htmlDocument = wordToHtmlConverter.getDocument();                ByteArrayOutputStream out = new ByteArrayOutputStream();        DOMSource domSource = new DOMSource(htmlDocument);               StreamResult streamResult = new StreamResult(out);        TransformerFactory tf = TransformerFactory.newInstance();        Transformer serializer = tf.newTransformer();             serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");        serializer.setOutputProperty(OutputKeys.INDENT, "yes");        serializer.setOutputProperty(OutputKeys.METHOD, "HTML");        serializer.transform(domSource, streamResult);        out.close();        writeFile(new String(out.toByteArray()), outPutFile);    }}

遇到的問題,當doc轉換為html時不會將映像的線條給轉換過來。只有在table表格中才可以轉換為span標籤。如果要作下滑線,可以放一個table的儲存格只設定下邊框就可以完美轉換為html了。

 

將html轉換為pdf

package com.gsww.sxzz.controller.service;import com.lowagie.text.pdf.BaseFont;import org.xhtmlrenderer.pdf.ITextFontResolver;import org.xhtmlrenderer.pdf.ITextRenderer;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.OutputStream;/** * Created by Carey on 15-2-2. */public class htmlToPdf {    public boolean convertHtmlToPdf(String inputFile, String outputFile)             {                try {                OutputStream     os = new FileOutputStream(outputFile);              ITextRenderer renderer = new ITextRenderer();                String url = new File(inputFile).toURI().toURL().toString();                renderer.setDocument(url);                // 解決中文支援問題                ITextFontResolver fontResolver = renderer.getFontResolver();                /*fontResolver.addFont("C:\\Windows\\Fonts\\simsunb.ttf", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);                     *///宋體檔案的相對路徑                fontResolver.addFont("C:\\Windows\\Fonts\\simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);                                    renderer.getSharedContext().setBaseURL("file:/D:/");                renderer.layout();                renderer.createPDF(os);                os.flush();                os.close();        } catch (Exception e) {            // TODO Auto-generated catch block            e.printStackTrace();        }              return true;    }     public   static  void  main(String [] args){         htmlToPdf html2Pdf =new htmlToPdf();         try {             html2Pdf.convertHtmlToPdf("D:\\1.html","D:\\index.pdf");         } catch (Exception e) {             e.printStackTrace();         }     }}

 

使用POI將doc檔案轉換為html

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.