Java conversion to various file types in Word

Source: Internet
Author: User

One of the programs used to Poi,jacob,openoffice,icepdf,itext bag, you can search the Internet.

PS: When using OpenOffice to convert a PDF to a picture, the prerequisite is to have the OpenOffice software installed locally and to turn on the service:

After installing OpenOffice

1. After installing OpenOffice, enter the default installation directory.

CD C:\Program Files (x86) \openoffice 4\program

Perform

Soffice-headless-accept= "SOCKET,HOST=127.0.0.1,PORT=8100;URP;"-nofirststartwizard

2. See if the installation is successful

2.1 View the port corresponding to the PID

Netstat-ano|findstr "8100"

2.2 View the PID corresponding service program name

Tasklist|findstr "IPD Value"

Not much to say, directly on the code:

Package com.officefileparser.utils;
Import Java.awt.image.BufferedImage;
Import Java.awt.image.RenderedImage;
Import Java.io.File;
Import Java.io.FileInputStream;
Import java.io.FileNotFoundException;
Import Java.io.FileOutputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import java.net.ConnectException;
Import java.net.MalformedURLException;
Import java.util.ArrayList;
Import Java.util.HashMap;
Import java.util.List;
Import Javax.imageio.ImageIO;
Import Org.apache.commons.io.FilenameUtils;
Import org.apache.pdfbox.pdmodel.PDDocument;
Import Org.apache.pdfbox.util.PDFTextStripper;
Import Org.apache.poi.hslf.HSLFSlideShow;
Import Org.apache.poi.hslf.extractor.PowerPointExtractor;
Import Org.apache.poi.hslf.model.Picture;
Import Org.apache.poi.hslf.model.Slide;
Import Org.apache.poi.hslf.model.TextRun;
Import Org.apache.poi.hslf.usermodel.PictureData;
Import Org.apache.poi.hslf.usermodel.SlideShow;
Import Org.apache.poi.hwpf.extractor.WordExtractor; Import ORG.APACHE.POi.poifs.filesystem.POIFSFileSystem;
Import org.icepdf.core.pobjects.Document;
Import Org.icepdf.core.pobjects.Page;
Import org.icepdf.core.util.GraphicsRenderingHints;
Import Com.artofsolving.jodconverter.DefaultDocumentFormatRegistry;
Import Com.artofsolving.jodconverter.DocumentConverter;
Import Com.artofsolving.jodconverter.DocumentFormat;
Import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
Import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
Import Com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
Import com.jacob.activeX.ActiveXComponent;

Import Com.jacob.com.Dispatch;
	 public class Officefileparserutils {/** * use POI to read doc file * @param docurl * doc file path * @return * A String representing the contents of the doc file
		* @throws IOException */public static void Readoc (String docpath) throws ioexception{FileInputStream in;
		in = new FileInputStream (Docpath);
	Wordextractor extractor = new Wordextractor (in);	Hwpfdocument hwpfdocument = new Hwpfdocument (in);
	 An element is a piece of content string[] paratexts = Extractor.getparagraphtext ();      for (int i=0 i<paratexts.length; i++) {     //a period of deposit in       &N Bsp &NBSP;SYSTEM.OUT.PRINTLN (/* "Paragraph" + (i+1) + ":" +*/paratexts[i]);        } }/**      * use POI to write Word documents      * @param destfile Purpose Address      * @param filecon   Pending write string      */    public void Exportdoc (String de Stfile,string Filecon) {        try {           //doc content   & nbsp
        Bytearrayinputstream Bais = new Bytearrayinputstream (Filecon.getbytes ());
            Poifsfilesystem fs = new Poifsfilesystem ();             DirectoryEntry directory = Fs.getroot ();        &NBSP;
    Directory.createdocument ("Worddocument", Bais);
            FileOutputStream ostream = new FileOutputStream (destfile);
            Fs.writefilesystem (ostream);
            bais.close ();
            ostream.close ();        } catch (IOException e) {            e.printstacktrace (); &nbsp ;      }    }/** * use PDFBox to read PDF Document information * @param pdfpath PDF file path * @return PDF information String * @th
		Rows Exception */public static string Getpdftext (String pdfpath) throws Exception {Boolean sort = false;
		int startpage = 1;
		int endpage = 10;
		PDDocument document = null;
		File File = new file (Pdfpath); if (!file.exists ()) {System.out.println (File.getabsolutepath () +) does not exist ...
			");
		Return "";
		} System.out.println (file);
			Try {try {document = Pddocument.load (file); } CATCH (Malformedurlexception e) {} pdftextstripper stripper = new Pdftextstripper ();
			Stripper.setsortbyposition (sort);
			Stripper.setstartpage (StartPage);
			Stripper.setendpage (EndPage);
		return Stripper.gettext (document);
			catch (Exception e) {e.printstacktrace ();
		Return "";
			finally {if (document!= null) {document.close ();
			}}//function checks whether the file is ppt public static Boolean checkfile (file file) {Boolean isppt = false;
			String filename = File.getname ();
			String suffixname = null; if (filename!= null && filename.indexof (".")!=-1) {suffixname = filename.substring (Filename.indexof ("."))
				;
				System.out.println (Suffixname);
				if (Suffixname.equals (". ppt")) {isppt = true;
			return isppt;
			else {return isppt; /** * Use POI to extract all content of ppt * @param pptpath * @return * @throws ioexception/public static String READPP TAll (String pptpath) throws ioexception{PowerpointextractoR powerpointextractor = new Powerpointextractor (Pptpath);
		System.out.println (Powerpointextractor.gettext ());
	return Powerpointextractor.gettext (); /** * Use POI to extract PPT content * @param pptpath * @return * @throws ioexception * * public static HASHMAP Readppto Nebyone (String pptpath,string outpath) throws ioexception{content = new//stringbuffer (""); StringBuffer the
		Content of ppt InputStream InputStream = new FileInputStream (Pptpath); Hashmap<integer, string> contentpagemap =new Hashmap<integer, string> ();
		 //constructs a Powerpoint document from a input stream.
		
		Slideshow slideshow = new Slideshow (new Hslfslideshow (InputStream));
		To get each slide, think of a slides element as a ppt  slide[] slides = slideshow.getslides ();
			for (int i = 0; i < slides.length i++) {//Get PPT page number pagenum int pagenum = Slides[i].getslidenumber ();
Read the contents of a slide (including the title), in order to get the text content of the slide, establish textrun,textrun to represent a ppt textrun[] textruns = Slides[i].gettextruns ();			System.out.println ("The first" +pagenum+ "Zhang");
Read the title of a slide             String title=slides[i].gettitle (); *            //get system line break             String lineseparator = (St Ring) java.security.AccessController.doPrivileged (                    N
EW sun.security.action.GetPropertyAction ("Line.separator"));             System.out.print (lineseparator);///If there is no title in a ppt, take the contents of the first text box as the title   &NB Sp         if (title = null) {            title = Textruns[0].gettext (). Re 
Placeall ("[\n\r]", "");/Remove line break            }             
			            SYSTEM.OUT.PRINTLN ("title:" +title); for (int j = 0; J < Textruns.length J + +) {//Get content in a text box for ppt text String text = Textruns[j].gettext (). ReplaceAll ("[\n\r]", "");
				Contentpagemap.put (i, text);
				System.out.print (text);
			System.out.println ();
		}//Extract all pictures in ppt//Get PPT page number pagenum file OutDir = new file (Outpath);
		if (!outdir.exists ()) {Outdir.mkdir ();
		} extractimageofppt (Pptpath,outpath);
	return contentpagemap; ///Use POI to extract pictures in ppt private static void Extractimageofppt (String pptpath,string outpath) throws IOException {//sli
			The Desshow class represents a ppt file slideshow ppt = new Slideshow (new Hslfslideshow (Pptpath));
			An element in the Sildes array represents a ppt slide[] slides = ppt.getslides ();
			Extract all pictures contained in the presentation picturedata[] pdata = Ppt.getpicturedata ();
				for (int i = 0; i < pdata.length i++) {PictureData pict = pdata[i];
				Picture data byte[] data = Pict.getdata ();
				int type = Pict.gettype ();
				String ext;
					Switch (type) {case Picture.JPEG:ext = '. jpg ';
				Break
					Case Picture.PNG:ext = ". PNG";
				Break Case PictUre.
					Wmf:ext = ". Wmf";
				Break
					Case Picture.EMF:ext = ". EMF";
				Break
					Case Picture.PICT:ext = ". PICT";
				Break
				Default:continue;
				FileOutputStream out = new FileOutputStream (outpath+ "pict_" + i + ext);
				Out.write (data);
			Out.close ();
		 /** * Use OpenOffice to convert Doc's various types of files to PDF format * @param docpath * @param pdfpath * @throws connectexception */public static void Doc2pdf (String docpath, String pdfpath) throws Connectexception {          File Inputfile = new file (docpath);//Pre-turn file           File outputfile = new file (Pdfpath); PDF file           openofficeconnection connection = new socketopenofficeconnection (8100);           connection.connect ()//Set up connection           Documentconverter Co Nverter = new Openofficedocumentconverter (connection);           DEFAUltdocumentformatregistry Formatreg = new Defaultdocumentformatregistry ();               Documentformat txt = formatreg.getformatbyfileextension ("ODT");//Set file format           Documentformat PDF = formatreg.getformatbyfileextension ("PDF");/Set file format   &N Bsp       Converter.convert (inputfile, TXT, outputfile, pdf)/File conversion           CONNEC Tion.disconnect ()//close connection        } /**  * use OpenOffice to turn doc documents into pictures, it works well, thinking: first doc document into PDF, and then convert the PDF to a picture  * @param docpath  * @param imgdirpath  */public static void Doc2imags (String do CPath, String imgdirpath) {   string pdfpath =string.format ("%s%s.pdf",  filenameutils.getfullpath ( Docpath), Filenameutils.getbasename (Docpath));      try {      System.out.println (Filenameutils.getfullpath (Docpath) + " " +file Nameutils.getbasename (DoCPath));
		        doc2pdf (Docpath, Pdfpath);        pdf2imgs (Pdfpath, Imgdirpath);          file PDF =  new File (pdfpath);          if (Pdf.isfile ()) {           /pdf.delete () &nbsp
		 ;       &NBSP}       &nbsp} catch (Connectexception e) {      &NBSP ;  e.printstacktrace ();     &NBSP} catch (Exception e) {         e.printstacktrace ();     &NBSP}    }/**   * convert a PDF to a picture using icepdf and return the picture name    * @param pdfpath    * @param imagepath   * @return Returns the name of the converted picture    * @throws exception   */  public s Tatic list<string> Pdf2imgs (String pdfpath, String imgdirpath) throws Exception {      Document do Cument = new Document ();
	      Document.setfile (Pdfpath);       FLOAT scale = 5f;//magnification       float rotation = 0f;//rotation degree       Li st<string> imgnames = new arraylist<string> ();       int pagenum = Document.getnumberofpages ();       File imgdir = new file (Imgdirpath);        if (!imgdir.exists ()) {          Imgdir.mkdirs ()       }       for (int i = 0; i < pagenum; i++) {          BufferedImage image = (bufferedimage) document.getpageimage (i, Graphicsrenderinghints.screen,             &NBS P
	     Page.boundary_cropbox, rotation, scale);         renderedimage rendimage = image;            try {              String FilePath = Imgdirpath + File.separator + i + ". jpg";   &NBSp           File File = new file (FilePath);               Imageio.write (rendimage, "JPG", file);               Imgnames.add (Filenameutils.getname (FilePath));          } catch (IOException e) {              e.prints Tacktrace ();               return null;           {          Image.flush ();      }       document.dispose ();       return imgnames;  }  /**  * use Jacob to turn ppt into pictures  * @param inputfile  * @param imgfilepath  */PU Blic static synchronized void ppt2img (String inputfile,string imgfilepath) {      &NBSP;SYSTEM.OUT.PRINTL
	 N ("ppt2pdf========== entry");
	        activexcomponent app = null;        try {            app = new Activexcomponent ("PowerPoint.Application");
	       &NBSP} catch (Exception e) {           e.printstacktrace ();
	       &NBSP}        system.out.println ("--------------------");
	        app.setproperty ("Visible", true);
	        dispatch ppts = App.getproperty ("Presentations"). Todispatch ();
	       &NBSP;SYSTEM.OUT.PRINTLN ("ppt2pdf========== ready to open PPT document");
	       &NBSP;SYSTEM.OUT.PRINTLN (new File (Inputfile). exists ());        dispatch ppt = Dispatch.call (ppts,                 &N Bsp                   "Open",               & nbsp                    inputfile,   &NBsp                                true,//read Only                                 &NB Sp  true,//untitled Specify whether the file has a title                         &NBSP ;          true//withwindow Specify whether file is visible                 &NBSP ;
	                   Todispatch ();
	       &NBSP;SYSTEM.OUT.PRINTLN ("ppt2pdf========== prepare to convert PPT document");        dispatch.call (ppt, "SaveCopyAs", imgfilepath,17);//convert to JPG           & nbsp               &NBSP;SYSTEM.OUT.PRINTLN ("ppt2pdf========== prepare to close PPT document");
	               dispatch.call (ppt, "close");     &NBSP;
	  app.invoke ("Quit");   &NBSP}}



Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.