1 fromPdfminer.converterImportPdfpageaggregator2 fromPdfminer.layoutImportLaparams3 fromPdfminer.pdfparserImportPdfparser4 fromPdfminer.pdfparserImportpdfdocument5 fromPdfminer.pdfinterpImportPdfresourcemanager6 fromPdfminer.pdfinterpImportPdfpageinterpreter7 fromPdfminer.pdfdeviceImportPdfdevice8 9 #Get Document ObjectTenfp = open ("banreport.pdf","RB") One A #Create a PDF document interpreter -Parser =pdfparser (FP) - the #objects for PDF documents -Doc =pdfdocument () - - #connection interpreter and Document Object + parser.set_document (DOC) - Doc.set_parser (parser) + A #Initializing a document atDoc.initialize ("') - - #Create PDF Explorer -Resource =Pdfresourcemanager () - - #parametric analyzers inLaparam =Laparams () - to #Create an aggregator +device = Pdfpageaggregator (resource,laparams=Laparam) - #Create a PDF page interpreter theInterpreter = Pdfpageinterpreter (resource,device=device) * $ #get a collection of pages using a Document objectPanax Notoginseng forPageinchdoc.get_pages (): - #use the page interpreter to read the interpreter.process_page (page) + A #using aggregators to get content theLayout =Device.get_result () + - forOutinchlayout: $ ifHasattr (out,"Get_text"): $ Print(Out.get_text ())
Python read PDF document