Java
Import orgdomainbox. pdmodel. pddocument.
Import org.w.boxw.parser. extends parser;
Import java. Io. * ;
Import orgdomainbox. util. extends textstripper;
Import java. util. date;
/**/ /**
* <P> title: PDF extraction </P>
* <P> Description: Email: chris@matrix.org.cn </P>
* <P> copyright: matrix copyright (c) 2003 </P>
* <P> company: matrix.org.cn </P>
* @ Author Chris
* @ Version 1.0, who use this example PLS remain the declare
*/
Public Class Pdfextracter {
Public Extends extracter () {
}
Public String gettextfrompdf (string filename) throws exception
{
String temp = Null ;
Pddocument. nbsp1_document. Null ;
Fileinputstream Is = New Fileinputstream (filename );
Pdfparser parser = New Pdfparser ( Is );
Parser. parse ();
Invalid document. nbsp = Parser. getpddocument .);
Bytearrayoutputstream Out = New Bytearrayoutputstream ();
Outputstreamwriter writer = New Outputstreamwriter ( Out );
Pdftextstripper Stripper = New Extends textstripper ();
Stripper. writetext (invalid document. getdocument.), writer );
Writer. Close ();
Byte [] Contents = Out . Tobytearray ();
String TS = New String (contents );
System. Out . Println ( " The string length is " + Contents. Length + " \ N " );
Return TS;
}
Public Static Void Main (string ARGs [])
{
Pdfextracter pf = New Extends extracter ();
Pddocument. nbsp1_document. nbsp = Null ;
Try {
String TS=PF. gettextfrompdf ("C: \ a.pdf");
System.Out. Println (TS );
}
Catch (Exception E)
{
E. printstacktrace ();
}
}
}
At the same time, CSHARP is as follows:
Reference some classes
Pddocument Doc = pddocument. Load ("4.20 ");
// Doc. getdocument ();
Extends textstripper extends stripper = new extends textstripper ();
Required textstripper stripper = new required textstripper ();
Stripper. gettext (DOC );
Doc. Close ();
the latest version of the local data flow operation
reference: http://pdfhome.hope.com.cn/