Apache's POI project can be used to work with MS Office documents, and CodePlex has a. NET version of it. The POI project creates and maintains various Java APIs that operate on Ooxml and OLE2 file formats. Most MS Office is in OLE2 format. POI HSMF Subproject to support Outlook, support Visio by HDGF subprojects, and support publisher through HPBF subprojects.
Use POI to extract a simple example of Word:
To introduce both the Poi-3.7.jat and POI-SCRATCHPAD-3.7.AJR packages.
Copy Code code as follows:
Package MSOffice;
Import Java.io.File;
Import Java.io.FileInputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import org.apache.poi.hwpf.HWPFDocument;
Import Org.apache.poi.hwpf.extractor.WordExtractor;
Import Org.apache.poi.hwpf.usermodel.CharacterRun;
Import Org.apache.poi.hwpf.usermodel.Paragraph;
Import Org.apache.poi.hwpf.usermodel.Range;
Import org.apache.poi.hwpf.usermodel.Section;
public class Word {
Direct extraction of all content
public static String ReadDoc1 (InputStream is) throws IOException {
Wordextractor extractor = new Wordextractor (IS);
return Extractor.gettext ();
}
Chapter section, paragraph paragraph, string Characterrun extraction
public static void ReadDoc2 (InputStream is) throws IOException {
Hwpfdocument doc=new hwpfdocument (IS);
Range R=doc.getrange ();
for (int x=0;x<r.numsections (); x + +) {
Section s=r.getsection (x);
for (int y=0;y<s.numparagraphs (); y++) {
Paragraph p=s.getparagraph (y);
for (int z=0;z<p.numcharacterruns (); z++) {
Characterrun Run=p.getcharacterrun (z);
String Text=run.text ();
System.out.print (text);
}
}
}
}
public static void Main (string[] args) {
File File = new file ("/home/orisun/1.doc");
try {
FileInputStream fin = new FileInputStream (file);
String cont = READDOC1 (FIN);
SYSTEM.OUT.PRINTLN (cont);
Fin.close ();
Fin = new FileInputStream (file);
READDOC2 (Fin);
Fin.close ();
catch (IOException e) {
E.printstacktrace ();
}
}
}
POI sampling ppt Sample:
Copy Code code as follows:
Package MSOffice;
Import Java.io.File;
Import Java.io.FileInputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import Org.apache.poi.hslf.HSLFSlideShow;
Import Org.apache.poi.hslf.extractor.PowerPointExtractor;
Import Org.apache.poi.hslf.model.Slide;
Import Org.apache.poi.hslf.model.TextRun;
Import Org.apache.poi.hslf.usermodel.SlideShow;
public class PPT {
Directly extract the entire contents of a slide
public static String ReadDoc1 (InputStream is) throws ioexception{
Powerpointextractor extractor=new Powerpointextractor (IS);
return Extractor.gettext ();
}
One slide to read one slide
public static void ReadDoc2 (InputStream is) throws ioexception{
Slideshow Ss=new Slideshow (new Hslfslideshow (IS));
Slide[] Slides=ss.getslides ();
for (int i=0;i<slides.length;i++) {
Read the title of a slide
String Title=slides[i].gettitle ();
System.out.println ("title:" +title);
Read the contents of a slide (including the title)
Textrun[] Runs=slides[i].gettextruns ();
for (int j=0;j<runs.length;j++) {
System.out.println (Runs[j].gettext ());
}
}
}
public static void Main (string[] args) {
File File = new file ("/home/orisun/2.ppt");
try{
FileInputStream fin=new fileinputstream (file);
String Cont=readdoc1 (Fin);
SYSTEM.OUT.PRINTLN (cont);
Fin.close ();
Fin=new fileinputstream (file);
READDOC2 (Fin);
Fin.close ();
}catch (IOException e) {
E.printstacktrace ();
}
}
}
An Excel file consists of multiple workbook, one workbook consisting of multiple sheet.
Poi Extract Excel Simple example:
Copy Code code as follows:
Package MSOffice;
Import Java.io.File;
Import Java.io.FileInputStream;
Import java.io.IOException;
Import Java.io.InputStream;
Import Java.util.Iterator;
Import Org.apache.poi.hssf.usermodel.HSSFCell;
Import Org.apache.poi.hssf.usermodel.HSSFRow;
Import Org.apache.poi.hssf.usermodel.HSSFSheet;
Import Org.apache.poi.hssf.usermodel.HSSFWorkbook;
Import Org.apache.poi.hssf.extractor.ExcelExtractor;
Import Org.apache.poi.poifs.filesystem.POIFSFileSystem;
Import Org.apache.poi.ss.usermodel.Row;
public class Excel {
Read all the contents of Excel directly
public static String ReadDoc1 (InputStream is) throws ioexception{
Hssfworkbook wb=new Hssfworkbook (New Poifsfilesystem (IS));
Excelextractor extractor=new excelextractor (WB);
Extractor.setformulasnotresults (FALSE);
Extractor.setincludesheetnames (TRUE);
return Extractor.gettext ();
}
Thinning to sheet, rows, or even cells while reading
public static double Getavg (InputStream is) throws ioexception{
Hssfworkbook wb=new Hssfworkbook (New Poifsfilesystem (IS));
Get the first sheet of sheet
Hssfsheet sheet=wb.getsheetat (0);
Double molecule=0.0;
Double denominator=0.0;
Traversing sheet by rows
Iterator<row> Riter=sheet.rowiterator ();
while (Riter.hasnext ()) {
Hssfrow row= (Hssfrow) riter.next ();
Hssfcell Cell1=row.getcell (4);
Hssfcell Cell2=row.getcell (4);
if (Cell1.getcelltype ()!=hssfcell.cell_type_numeric) {
System.err.println ("Number type is wrong!") ");
System.exit (-2);
}
if (Cell2.getcelltype ()!=hssfcell.cell_type_numeric) {
System.err.println ("Number type is wrong!") ");
System.exit (-2);
}
Denominator+=double.parsedouble (cell2.tostring (). Trim ());
Molecule+=double.parsedouble (cell2.tostring (). Trim ()) *float.parsefloat (cell1.tostring (). Trim ());
}
return molecule/denominator;
}
public static void Main (string[] args) {
File File = new file ("/home/orisun/3.xls");
try{
FileInputStream fin=new fileinputstream (file);
String Cont=readdoc1 (Fin);
SYSTEM.OUT.PRINTLN (cont);
Fin.close ();
Fin=new fileinputstream (file);
SYSTEM.OUT.PRINTLN ("Weighted average score" +getavg (Fin));
Fin.close ();
}catch (IOException e) {
E.printstacktrace ();
}
}
}