/*
* Use Lucene and XPDF to process PDF files
* */
Package PDFBox;
Import Java.io.File;
Import java.io.IOException;
public class Pdf2test {
PDF file Name
Private File Pdffile;
Where the converter is stored, the default is E:\\xpdf
Private String Convertor_stored_path = "E:\\xpdf";
The name of the converter, which defaults to Pdftotext
Private String Convertor_name = "Pdftotext";
constructor, the path to the PDF file for the parameter
Public Pdf2test (String pdffile) throws IOException {
This (new File (Pdffile));
}
constructor, parameter is a PDF file object
Public pdf2test (file file) {
This.pdffile = Pdffile;
}
Convert a PDF to a text document
public void Totextfile () throws ioexception{
Totextfile (Pdffile, true);
}
Convert PDF to a text document with parameters as the path to the destination file, using the layout of the PDF file by default
public void Totextfile (String targetfile) throws ioexception{
Totextfile (New File (TargetFile), true);
}
Convert PDF To text this article, the path of the parameter one-dimensional target file
A parameter of 2 bit true indicates that the layout in the PDF file is used
public void Totestfile (String targetfile, Boolean islayout) throws ioexception{
Totextfile (New File (targetfile), islayout);
}
Convert PDF to text document with parameter as target file
public void Totextfile (File targetfile) throws ioexception{
Totextfile (TargetFile, true);
}
Convert PDF to document, parametric one-dimensional destination file
Parameter two-bit true, indicating that the layout in the PDF file was used
public void Totextfile (File targetfile, Boolean islayout) throws ioexception{
string[] cmd = Getcmd (targetfile, islayout);
Process p = runtime.getruntime (). exec (CMD);
}
Get converter
Public String Getconvertor_stored_path () {
return convertor_stored_path;
}
Set the path to the PDF converter
public void Setconvertor_stored_path (String PATH) {
if (!path.trim (). EndsWith ("\ \"))
Path = Path.trim () + "\ \";
This. Convertor_stored_path = PATH;
}
Parsing command-line arguments
Private string[] Getcmd (File targetfile, Boolean islayout) {
Command character
String command = Convertor_stored_path + convertor_name;
Absolute Road strength for PDF files
String Source_absolutepath = Pdffile.getabsolutepath ();
Absolute path to output text file
String Target_absolutepath = Targetfile.getabsolutepath ();
Keep the original layout
String layout = "-layout";
Setting the Encoding method
String encoding = "-enc";
String character = "GBK";
Set not to print any messages and errors
String mistake = "-Q";
Do not add pagination between pages
String nopagebrk = "-NOPGBRK";
If Islayout is false, the setting does not maintain the original layout
if (!islayout)
Layout = "";
return new string[]{
command, layout, encoding, character, mistake, Nopagebrk, Source_absolutepath, Target_absolutepath
};
}
public static void Main (string[] args) {
TODO auto-generated Method Stub
try{
Parameter input PDF file storage location
Pdf2test p2t = new Pdf2test ("E:\\lucene Project \\c language code. pdf");
Setting the position of the converter
P2t.setconvertor_stored_path ("E:\\xpdftest\\xpdf");
Set the location where the text file is stored
P2t.totextfile ("E:\\lucene Project \\XPDF\\");
}catch (Exception e) {
E.printstacktrace ();
}
}
}
Use Lucene and XPDF to process PDF files