Pdf.js can be implemented in HTML under the direct view of the PDF document, is an open source PDF document Read parsing plug-in, very powerful, can render the PDF file as a canvas. Pdf.js mainly consists of two library files, one pdf.js and one pdf.worker.js, one responsible for API parsing, and one for core parsing.
First introduce the Pdf.js file <script type="text/javascript" src=‘pdf.js‘></script>
Pdf.js Most of the usage is based on promise, the Pdfjs.getdocument (URL) method returns a promise:
Pdfjs.getdocument (' helloworld.pdf '). Then (function (pdf) {
The parsing of a PDF needs to be done through Pdf.getpage (page), which returns a promise, so you can parse the PDF pages by page:
Pdf.getpage (1). Then (function (
Website address: http://mozilla.github.io/pdf.js/
Render page
Each PDF page has its own window, which defines the pixel size (n.72dpi and initial rotation. By default, the window zooms to a PDF, but you can change the action by modifying the view. When a view is created, an initial transformation matrix is also created, taking into account the desired size, rotation, and converting the coordinate system (0 points)to the left of the bottom of the PDF document, while canvas 0 is left.
var scale = 1.5; var viewport = page.getviewport (scale); var canvas = document.getElementById (' The-canvas '); var context = Canvas.getcontext (' 2d '== viewport.width; var rendercontext = { canvascontext:context, viewport:viewport};p Age.render ( Rendercontext);
You can also customize the canvas size:
var desiredwidth = +; var viewport = page.getviewport (1); var scale = desiredwidth/ viewport.width; var scaledviewport = page.getviewport (scale);
An example of the official given:
Ar url = '//cdn.mozilla.net/pdfjs/helloworld.pdf ';PDFJS.WORKERSRC = '//mozilla.github.io/pdf.js/build/pdf.worker.js ';varLoadingtask =pdfjs.getdocument (URL); LoadingTask.promise.then (function(pdf) {Console.log (' PDF loaded '); varPageNumber = 1; Pdf.getpage (pagenumber). Then (function(page) {Console.log (' Page loaded '); varScale = 1.5; varViewport =Page.getviewport (scale); varCanvas = document.getElementById (' The-canvas '); varcontext = Canvas.getcontext (' 2d '); Canvas.height=Viewport.height; Canvas.width=Viewport.width; varRendercontext ={canvascontext:context, viewport:viewport}; varRendertask =Page.render (Rendercontext); Rendertask.then (function() {Console.log (' Page rendered '); }); });}, function(reason) {console.error (reason);});
In addition, larger PDF files can be loaded in base 64 encoding, for example:
varPdfdata =Atob (' Jvberi0xljckcjegmcbvymogicugzw50cnkgcg9pbnqkpdwkicavvhlwzsavq2f0ywxvzwog ' + ' ic9qywdlcyayidagugo+ Pgplbmrvymokcjigmcbvymokpdwkicavvhlwzsavugfnzxmkicav ' + ' twvkawfcb3ggwyawidagmjawidiwmcbdciagl0nvdw50idekicavs2lkcybbidmgmcbsif0k ' + ' Pj4kzw5kb2jqcgozidagb2jqcjw8ciagl1r5cgugl1bhz2ukicavugfyzw50idigmcbsciag ' + ' l1jlc291cmnlcya8paogicagl0zvbnqgpdwkicagicagl0yxidqgmcbsiaogicagpj4kica+ ' + ' pgogic9db250zw50cya1idagugo+ Pgplbmrvymokcjqgmcbvymokpdwkicavvhlwzsavrm9u ' + ' Daogic9tdwj0exblic9uexblmqogic9cyxnlrm9udcavvgltzxmtum9tyw4kpj4kzw5kb2jq ' + ' Cgo1idagb2jqicalihbhz2ugy29udgvudao8paogic9mzw5ndgggndqkpj4kc3ryzwftckju ' + ' Cjcwiduwifreci9gmsaxmibuzgoosgvsbg8sihdvcmxkiskgvgokrvqkzw5kc3ryzwftcmvu ' + ' zg9iagokehjlzgowidykmdawmdawmdawmca2ntuznsbmiaowmdawmdawmdewidawmdawig4g ' + ' Cjawmdawmdawnzkgmdawmdagbiakmdawmdawmde3myawmdawmcbuiaowmdawmdawmzaxidaw ' + ' mdawig4gcjawmdawmdazodagmdawmdagbiakdhjhawxlcgo8paogic9taxplidykicavum9v ' + ' dcaxidagugo+ pgpzdgfydhhyzwykndkyciulru9g ');
PDFJS.WORKERSRC = '//mozilla.github.io/pdf.js/build/pdf.worker.js ';varLoadingtask =pdfjs.getdocument ({data:pdfdata}); LoadingTask.promise.then (function(pdf) {Console.log (' PDF loaded '); varPageNumber = 1; Pdf.getpage (pagenumber). Then (function(page) {Console.log (' Page loaded '); varScale = 1.5; varViewport =Page.getviewport (scale); varCanvas = document.getElementById (' The-canvas '); varcontext = Canvas.getcontext (' 2d '); Canvas.height=Viewport.height; Canvas.width=Viewport.width; varRendercontext ={canvascontext:context, viewport:viewport}; varRendertask =Page.render (Rendercontext); Rendertask.then (function() {Console.log (' Page rendered '); }); });}, function(reason) {console.error (reason);});
PDF page Turn processing:
//If Absolute URL from the remote server is provided, configure the CORS//header on the server.varurl = '//cdn.mozilla.net/pdfjs/tracemonkey.pdf ';//The Workersrc property shall is specified.PDFJS.WORKERSRC = '//mozilla.github.io/pdf.js/build/pdf.worker.js ';varPdfdoc =NULL, Pagenum= 1, Pagerendering=false, Pagenumpending=NULL, Scale= 0.8, Canvas= document.getElementById (' The-canvas '), CTX= Canvas.getcontext (' 2d ');/** Get page info from document, resize canvas Accordingly, and render page. * @param num page number.*/functionrenderpage (num) {pagerendering=true;
Pdfdoc.getpage (num). Then (function(page) {varViewport =Page.getviewport (scale); Canvas.height=Viewport.height; Canvas.width=Viewport.width; varRendercontext ={canvascontext:ctx, viewport:viewport}; varRendertask =Page.render (Rendercontext);
RenderTask.promise.then (function() {pagerendering=false; if(Pagenumpending!==NULL) {renderpage (pagenumpending); Pagenumpending=NULL; } }); });document.getElementById (' Page_num '). Textcontent =num;}
functionqueuerenderpage (num) {if(pagerendering) {pagenumpending=num; } Else{renderpage (num); }}
functionOnprevpage () {if(Pagenum <= 1) { return; } pagenum--; Queuerenderpage (pagenum);} document.getElementById (' prev '). AddEventListener (' click ', Onprevpage);
functionOnnextpage () {if(Pagenum >=pdfdoc.numpages) {return; } pagenum++; Queuerenderpage (pagenum);} document.getElementById (' Next '). AddEventListener (' click ', Onnextpage);
pdfjs.getdocument (URL). Then (function(PDFDOC_) {Pdfdoc=pdfdoc_; document.getElementById (' Page_count '). Textcontent =pdfdoc.numpages;renderpage (pagenum);});
About the use of page mode:
Parsing the results, we can look at the methods provided by this object:
Method |
return |
Getannotations |
A promise that's resolved with an {Array} of the annotation objects. |
Gettextcontent |
That's resolved a Textcontent object that represent the page text content. |
GetViewport |
Contains ' width ' and ' height ' properties along with transforms required for rendering. |
Render |
An object of that contains the promise, which was resolved when the page finishes rendering. |
We can try to call the Gettextcontent method and print out the results:
Pdf.getpage (1). Then (function(page) { console.log (page);});
The input format is roughly as follows:
{ "Items": [ { "str": "XXX", "dir": "XXX", "width": xxx, "Height": xxx, "Transform": [ 48, 0, 0, 48, 45.32495, 679.04 ], "FontName": "G_d0_f1" }, { "Str": "", "dir": "Ltr", "Width": 9.600000000000001, "Height": 2304, "Transform": [ 48, 0, 0, 48, 285.325, 679.04 ], "FontName": "G_d0_f2" } ], "Styles": { "G_d0_f1": { "fontFamily": "Monospace", "Ascent": 1.05810546875, "Descent":-0.26171875, "Vertical":false }, "G_d0_f2": { "fontFamily": "Sans-serif", "Ascent": 0.74365234375, "Descent":-0.25634765625 } } }
Pdf.js can parse the string, position, and font of every page of text.
viewer.js:http://mozilla.github.io/pdf.js/web/for the official website Viewer.html, first the basemap is a canvas, and the content is the same as the PDF (as can be obtained by the Page.render method described below), above the Basemap is a textlayer, this layer is through the page.gettextcontent () The position and style of the font are obtained, and then overlaid on the canvas.
We can directly use the official website view.html demo, and then modify the style to remove the function can not be removed, simple rough. Only need to add parameters after the jump link, example: http://xxxx/viewer.html?file= ' xxxx.pdf ';
View PDF files online, pdf.js how to use them