Parse the PDF file and display it.
Parse PDF files need to export box jar package, for: http://pdfbox.apache.org/download.html
The following is the PDF parsing code:
Package com. lingjoin. extractors;
Import java. Io. bufferedreader;
Import java. Io. filenotfoundexception;
Import java. Io. ioexception;
Import java. Io. stringreader;
Import java. util. date;
Import org.apachedomainbox. pdmodel. pddocument;
Import org.apachedomainbox. pdmodel. pddocumentinformation;
Import org.apachedomainbox. util. Specify textstripper;
Import com. lingjoin. paser. lingjoinfile;
/**
* PDF parser
*
* @ Author ansj
*
*/
Public class extends extractor extends actextractor {
Private string getcontent (lingjoinfile f ){
// Todo auto-generated method stub
Pddocument Doc = NULL;
Try {
Doc = pddocument. Load (f );
Required textstripper stripper = new required textstripper ();
/**
* Set file information
*/
This. setlingjoinfileinfo (F, Doc
. Getdocumentinformation ());
Return stripper. gettext (DOC );
} Catch (filenotfoundexception e ){
// Todo auto-generated Catch Block
E. printstacktrace ();
} Catch (ioexception e ){
// Todo auto-generated Catch Block
E. printstacktrace ();
} Finally {
If (Doc! = NULL ){
Try {
Doc. Close ();
} Catch (ioexception e ){
// Todo auto-generated Catch Block
E. printstacktrace ();
}
}
}
Return "";
}
Private bufferedreader getcontentreader (lingjoinfile f ){
Return new bufferedreader (New stringreader (this. getcontent (F )));
}
/**
*
* Project name: filepaser
* Class description: sets the file information.
* Created by ansj
* Creation Time: 04:27:57
* Modify remarks:
* @ Version
*/
Private void setlingjoinfileinfo (lingjoinfile F, pddocumentinformation info ){
If (info. getauthor ()! = NULL ){
F. setlauthor (info. getauthor ());
}
}
Public void paserfiletoreader (lingjoinfile f) throws exception {
F. setlcontentreader (this. getcontentreader (f ));
}
Public void paserfiletostring (lingjoinfile f) throws exception {
// Todo auto-generated method stub
F. setlcontent (this. getcontent (f ));
}
Public extends Extractor (integer typeflag ){
// Todo auto-generated constructor stub
This. typeflag = typeflag;
}
Private integer typeflag = NULL;
Public integer gettypeflag (){
// Todo auto-generated method stub
Return typeflag;
}
}
The above is implemented using the javasbox, but it only contains some incomplete code. At the same time, it also uses the dynamic library so file to read images.
Source http://download.csdn.net/detail/dahuaishu2010_/5292196