Java calls openoffice to convert office documents to PDF. openofficepdf
Leading:
During the release process, we often use java to convert office documents to PDF files. Generally, Microsoft openoffice + jodconverter is used to convert documents.
Openoffice is available in both windows and linux. There is no need to worry that the production environment is a linux system.
1. openoffice depends on jar. maven is used as an example:
<Dependency> <groupId> com. artofsolving </groupId> <artifactId> jodconverter </artifactId> <version> 2.2.1 </version> </dependency> <groupId> org. openoffice </groupId> <artifactId> jurt </artifactId> <version> 3.0.1 </version> </dependency> <groupId> org. openoffice </groupId> <artifactId> ridl </artifactId> <version> 3.0.1 </version> </dependency> <groupId> org. openoffice </group Id> <artifactId> juh </artifactId> <version> 3.0.1 </version> </dependency> <groupId> org. openoffice </groupId> <artifactId> unoil </artifactId> <version> 3.0.1 </version> </dependency> <! -- Jodconverter2.2.1 must depend on the slf4j-jdk14 must be this version, otherwise the log in the source code will report an error, a very low problem --> <dependency> <groupId> org. slf4j </groupId> <artifactId> slf4j-jdk14 </artifactId> <version> 1.4.3 </version> </dependency>
2. directly convert the code. You only need to listen to port 8100 of the openoffice application.
Public void convert (File sourceFile, File targetFile) {try {// 1: Open the connection OpenOfficeConnection connection = new SocketOpenOfficeConnection (8100); connection. connect (); DocumentConverter converter = new OpenOfficeDocumentConverter (connection); // 2: Get Format DocumentFormatRegistry factory = new BasicDocumentFormatRegistry (); DocumentFormat inputDocumentFormat = factory. getFormatByFileExtension (getExtensionName (sourceFile. getAbsolutePath (); DocumentFormat outputDocumentFormat = factory. getFormatByFileExtension (getExtensionName (targetFile. getAbsolutePath (); // 3: Convert to converter. convert (sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat);} catch (ConnectException e) {log.info ("document conversion PDF failed ");}}
3. Note: The xxx.docxdocument behind the conversion of jodconverter in 2007133 may report an error. For example, the document "xxx.doc 07" is later than xxx.docx.
Except jodconvertersource code discovery documentformatdoes not support public DocumentFormat getFormatByFileExtension (String extension) in BasicDocumentFormatRegistry in xxx.docx format
BasicDocumentFormatRegistry class source code
// // JODConverter - Java OpenDocument Converter // Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com> // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com.artofsolving.jodconverter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; public class BasicDocumentFormatRegistry implements DocumentFormatRegistry { private List/*<DocumentFormat>*/ documentFormats = new ArrayList(); public void addDocumentFormat(DocumentFormat documentFormat) { documentFormats.add(documentFormat); } protected List/*<DocumentFormat>*/ getDocumentFormats() { return documentFormats; } /** * @param extension the file extension * @return the DocumentFormat for this extension, or null if the extension is not mapped */ public DocumentFormat getFormatByFileExtension(String extension) { if (extension == null) { return null; } String lowerExtension = extension.toLowerCase(); for (Iterator it = documentFormats.iterator(); it.hasNext();) { DocumentFormat format = (DocumentFormat) it.next(); if (format.getFileExtension().equals(lowerExtension)) { return format; } } return null; } public DocumentFormat getFormatByMimeType(String mimeType) { for (Iterator it = documentFormats.iterator(); it.hasNext();) { DocumentFormat format = (DocumentFormat) it.next(); if (format.getMimeType().equals(mimeType)) { return format; } } return null; } }
The default implementation class DefaultDocumentFormatRegistry of BasicDocumentFormatRegistry supports the following file formats:
// // JODConverter - Java OpenDocument Converter // Copyright (C) 2004-2007 - Mirko Nasato <mirko@artofsolving.com> // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com.artofsolving.jodconverter; public class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry { public DefaultDocumentFormatRegistry() { final DocumentFormat pdf = new DocumentFormat("Portable Document Format", "application/pdf", "pdf"); pdf.setExportFilter(DocumentFamily.DRAWING, "draw_pdf_Export"); pdf.setExportFilter(DocumentFamily.PRESENTATION, "impress_pdf_Export"); pdf.setExportFilter(DocumentFamily.SPREADSHEET, "calc_pdf_Export"); pdf.setExportFilter(DocumentFamily.TEXT, "writer_pdf_Export"); addDocumentFormat(pdf); final DocumentFormat swf = new DocumentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf"); swf.setExportFilter(DocumentFamily.DRAWING, "draw_flash_Export"); swf.setExportFilter(DocumentFamily.PRESENTATION, "impress_flash_Export"); addDocumentFormat(swf); final DocumentFormat xhtml = new DocumentFormat("XHTML", "application/xhtml+xml", "xhtml"); xhtml.setExportFilter(DocumentFamily.PRESENTATION, "XHTML Impress File"); xhtml.setExportFilter(DocumentFamily.SPREADSHEET, "XHTML Calc File"); xhtml.setExportFilter(DocumentFamily.TEXT, "XHTML Writer File"); addDocumentFormat(xhtml); // HTML is treated as Text when supplied as input, but as an output it is also // available for exporting Spreadsheet and Presentation formats final DocumentFormat html = new DocumentFormat("HTML", DocumentFamily.TEXT, "text/html", "html"); html.setExportFilter(DocumentFamily.PRESENTATION, "impress_html_Export"); html.setExportFilter(DocumentFamily.SPREADSHEET, "HTML (StarCalc)"); html.setExportFilter(DocumentFamily.TEXT, "HTML (StarWriter)"); addDocumentFormat(html); final DocumentFormat odt = new DocumentFormat("OpenDocument Text", DocumentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt"); odt.setExportFilter(DocumentFamily.TEXT, "writer8"); addDocumentFormat(odt); final DocumentFormat sxw = new DocumentFormat("OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw"); sxw.setExportFilter(DocumentFamily.TEXT, "StarOffice XML (Writer)"); addDocumentFormat(sxw); final DocumentFormat doc = new DocumentFormat("Microsoft Word", DocumentFamily.TEXT, "application/msword", "doc"); doc.setExportFilter(DocumentFamily.TEXT, "MS Word 97"); addDocumentFormat(doc); final DocumentFormat rtf = new DocumentFormat("Rich Text Format", DocumentFamily.TEXT, "text/rtf", "rtf"); rtf.setExportFilter(DocumentFamily.TEXT, "Rich Text Format"); addDocumentFormat(rtf); final DocumentFormat wpd = new DocumentFormat("WordPerfect", DocumentFamily.TEXT, "application/wordperfect", "wpd"); addDocumentFormat(wpd); final DocumentFormat txt = new DocumentFormat("Plain Text", DocumentFamily.TEXT, "text/plain", "txt"); // set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog // alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed txt.setImportOption("FilterName", "Text"); txt.setExportFilter(DocumentFamily.TEXT, "Text"); addDocumentFormat(txt); final DocumentFormat wikitext = new DocumentFormat("MediaWiki wikitext", "text/x-wiki", "wiki"); wikitext.setExportFilter(DocumentFamily.TEXT, "MediaWiki"); addDocumentFormat(wikitext); final DocumentFormat ods = new DocumentFormat("OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods"); ods.setExportFilter(DocumentFamily.SPREADSHEET, "calc8"); addDocumentFormat(ods); final DocumentFormat sxc = new DocumentFormat("OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc"); sxc.setExportFilter(DocumentFamily.SPREADSHEET, "StarOffice XML (Calc)"); addDocumentFormat(sxc); final DocumentFormat xls = new DocumentFormat("Microsoft Excel", DocumentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls"); xls.setExportFilter(DocumentFamily.SPREADSHEET, "MS Excel 97"); addDocumentFormat(xls); final DocumentFormat csv = new DocumentFormat("CSV", DocumentFamily.SPREADSHEET, "text/csv", "csv"); csv.setImportOption("FilterName", "Text - txt - csv (StarCalc)"); csv.setImportOption("FilterOptions", "44,34,0"); // Field Separator: ','; Text Delimiter: '"' csv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); csv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "44,34,0"); addDocumentFormat(csv); final DocumentFormat tsv = new DocumentFormat("Tab-separated Values", DocumentFamily.SPREADSHEET, "text/tab-separated-values", "tsv"); tsv.setImportOption("FilterName", "Text - txt - csv (StarCalc)"); tsv.setImportOption("FilterOptions", "9,34,0"); // Field Separator: '\t'; Text Delimiter: '"' tsv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); tsv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "9,34,0"); addDocumentFormat(tsv); final DocumentFormat odp = new DocumentFormat("OpenDocument Presentation", DocumentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp"); odp.setExportFilter(DocumentFamily.PRESENTATION, "impress8"); addDocumentFormat(odp); final DocumentFormat sxi = new DocumentFormat("OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi"); sxi.setExportFilter(DocumentFamily.PRESENTATION, "StarOffice XML (Impress)"); addDocumentFormat(sxi); final DocumentFormat ppt = new DocumentFormat("Microsoft PowerPoint", DocumentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt"); ppt.setExportFilter(DocumentFamily.PRESENTATION, "MS PowerPoint 97"); addDocumentFormat(ppt); final DocumentFormat odg = new DocumentFormat("OpenDocument Drawing", DocumentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg"); odg.setExportFilter(DocumentFamily.DRAWING, "draw8"); addDocumentFormat(odg); final DocumentFormat svg = new DocumentFormat("Scalable Vector Graphics", "image/svg+xml", "svg"); svg.setExportFilter(DocumentFamily.DRAWING, "draw_svg_Export"); addDocumentFormat(svg); } }
Solution: override the public DocumentFormat getFormatByFileExtension (String extension) method in the BasicDocumentFormatRegistry class. If the extension name contains doc, use the documentFormat document format of doc.
/// JODConverter-Java OpenDocument Converter // Copyright (C) 2004-2007-Mirko Nasato <mirko@artofsolving.com> /// This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. /// This library is distributed in the hope that it will be useful, // but without any warranty; without even the implied warranty of // MERCHANTABILITY or fitness for a specified PURPOSE. see the GNU // Lesser General Public License for more details. // http://www.gnu.org/copyleft/lesser.html // package com. artofsolving. jodconverter; import java. util. arrayList; import java. util. iterator; import java. util. list;/*** rewrite the BasicDocumentFormatRegistry document format * @ author HuGuangJun */public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {private List/* <DocumentFormat> */documentFormats = new ArrayList (); public void addDocumentFormat (DocumentFormat documentFormat) {documentFormats. add (documentFormat);} protected List/* <DocumentFormat> */getDocumentFormats () {return documentFormats ;} /*** @ param extension * the file extension * @ return the DocumentFormat for this extension, or null if the extension * is not mapped */public DocumentFormat getFormatByFileExtension (String extension) {if (extension = null) {return null;} // convert the filename suffix to if (extension. indexOf ("doc")> = 0) {extension = "doc";} if (extension. indexOf ("ppt")> = 0) {extension = "ppt";} if (extension. indexOf ("xls")> = 0) {extension = "xls";} String lowerExtension = extension. toLowerCase (); for (Iterator it = documentFormats. iterator (); it. hasNext ();) {DocumentFormat format = (DocumentFormat) it. next (); if (format. getFileExtension (). equals (lowerExtension) {return format ;}} return null;} public DocumentFormat getFormatByMimeType (String mimeType) {for (Iterator it = documentFormats. iterator (); it. hasNext ();) {DocumentFormat format = (DocumentFormat) it. next (); if (format. getMimeType (). equals (mimeType) {return format ;}} return null ;}}
The above is all the content of this article. I hope it will be helpful for your learning and support for helping customers.