Using Java to read data in PDF files: Step 1: Download The PDFBox-0.7.2.jar. Provide one: http://pdfhome.hope.com.cn/Resource.aspx? Cid = 63844604-5253-4ae1-b023-258c9e324061 & rid = 20cd8f94-1cee-40b6-a3df-0ef024f8e0d2 unzip the Lib file under the PDFBox-0.7.2.jar, PDFBox-0.7.2-log4j.jar put your classpath path. (I put the source code and jar package in the attachment below for your use .) Step 2: write a simple program for reading PDF files. (Pdfreader. Java) Import java. Io. file;
Import java. Io. fileoutputstream;
Import java. Io. outputstreamwriter;
Import java. Io. Writer;
Import java.net. malformedurlexception;
Import java.net. url;
Import orgdomainbox. pdmodel. pddocument;
Import orgshortbox. util. writable textstripper; public class pdfreader {public void readfdf (string file) throws exception {
// Sort or not
Boolean sort = false;
// PDF file name
String pdffile = file;
// Enter the text file name
String textfile = NULL;
// Encoding method
String encoding = "UTF-8 ";
// Start page Extraction
Int startpage = 1;
// End number of extracted pages
Int endpage = integer. max_value;
// File input stream to generate a text file
Writer output = NULL;
// PDF document stored in memory
Pddocument document = NULL;
Try {
Try {
// First load the file as a URL, and then load the file from the local file system if an exception occurs. //
URL url = new URL (pdffile); // note that the parameter is not a previous version of URL. But a file.
Document = pddocument. Load (pdffile );
// Obtain the PDF file name
String filename = URL. GetFile ();
// Name the generated TXT file with the original PDF name
If (filename. Length ()> 4 ){
File outputfile = new file (filename. substring (0, filename
. Length ()-4)
+ ". Txt ");
Textfile = outputfile. getname ();
}
} Catch (malformedurlexception e ){
// If an exception occurs when loading as a URL, load the file from the file system. // note that the parameter is not a URL. But a file in a previous version.
Document = pddocument. Load (pdffile );
If (pdffile. Length ()> 4) {textfile = pdffile. substring (0, pdffile. Length ()-4)
+ ". Txt ";
}
}
// File input stream, which is written into the file inverted textfile
Output = new outputstreamwriter (New fileoutputstream (textfile ),
Encoding );
// Extract textstripper to extract text
Optional textstripper stripper = NULL;
Stripper = new jsontextstripper ();
// Set whether to sort
Stripper. setsortbyposition (SORT );
// Set the start page
Stripper. setstartpage (startpage );
// Set the end page
Stripper. setendpage (endpage );
// Call javastextstripper's writetext to extract and output the text
Stripper. writetext (document, output );
} Finally {
If (output! = NULL ){
// Close the output stream
Output. Close ();
}
If (document! = NULL ){
// Close the PDF document
Document. Close ();
}
}
}/**
* @ Param ARGs
*/
Public static void main (string [] ARGs ){
// Todo auto-generated method stub
Pdfreader = new pdfreader ();
Try {
// Obtain the springguide.pdf content from the edrive
Pdfreader. readfdf ("E: // springguide.pdf ");
} Catch (exception e ){
E. printstacktrace ();
}
} This completes reading data from PDF. Generate a TXT file with the same name in the directory where your PDF file is located.
The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion;
products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the
content of the page makes you feel confusing, please write us an email, we will handle the problem
within 5 days after receiving your email.
If you find any instances of plagiarism from the community, please send an email to:
info-contact@alibabacloud.com
and provide relevant evidence. A staff member will contact you within 5 working days.