Find the coordinates of a keyword in a PDF file

Source: Internet
Author: User
Tags gety stringbuffer

1. function description;

The main function of this function is to find the specified key information of the specified page after reading the information in the PDF file, and obtain the corresponding coordinates.

First of all, Itextpdf.jar, Itext.jar bag.

Get the number of pages in a PDF
Pdfreader reader= New Pdfreader (Generatepath);
int pagecount= reader.getnumberofpages ();


/**
Look for the specified text content in the file
@param reader
@param pagenumber
@param keyword
@return
@throws Exception
*/
public static Matchitem Matchpage (Com.itextpdf.text.pdf.PdfReader Reader,
Integer pagenumber, String keyword) throws Exception {
Keywordpositionlistener Renderlistener = new Keywordpositionlistener ();
Renderlistener.setkeyword (keyword);
Pdfreadercontentparser parse = new Pdfreadercontentparser (reader);
Rectangle Rectangle = reader.getpagesize (pagenumber);
Renderlistener.setpagenumber (pagenumber);
Renderlistener.setcurpagesize (Rectangle);
Parse.processcontent (PageNumber, Renderlistener);
Matchitem Matchitem = findkeyworditems (renderlistener, keyword);
The current page is not found, to the previous page query
if (null = = Matchitem) {pagenumber =pagenumber-1; Renderlistener.setpagenumber (pagenumber); Renderlistener.setcurpagesize (Rectangle); Parse.processcontent (PageNumber, Renderlistener); Matchitem = Findkeyworditems (renderlistener, keyword); } matchitem.setpagenum (pagenumber); return matchitem; }

/**
Find a matching keyword block
@param renderlistener
@param keyword
@return
*/
@SuppressWarnings ({"Unchecked", "Rawtypes"})
public static Matchitem Findkeyworditems (Keywordpositionlistener Renderlistener,
String keyword) {
First determine if there are any keywords on this page
List AllItems = Renderlistener.getallitems ();//All blocks list
StringBuffer sbtemp = new StringBuffer ("");
for (int i = 0; i < allitems.size (); i++) {//To concatenate all the block contents of a page together to form a string.
Sbtemp.append (((Matchitem) allitems.get (i)). GetContent ());
}
if (sbtemp.tostring (). LASTINDEXOF (keyword) = = 1) {//page-composed string no keywords, direct return
Return Renderlistener.getmatches (). get (0);
}
First case: An item with a keyword that exactly matches the block content
List matches = Renderlistener.getmatches ();
The second case: multiple blocks of content into a keyword, then one by one to match, assembled into a keyword
Sbtemp = new StringBuffer ("");
List tempitems = new ArrayList ();
for (int i = 0; i < allitems.size (); i++) {
1, the keyword exists in a block 2, the continuous block assembly = Keyword 3, avoid a block exact match keyword
Keywords China Mobile and block for middle, country, mobile
Key words Chinese people and block for China People's Republic this situation can not be resolved, nor allowed to exist
if (Keyword.indexof ((Matchitem) allitems.get (i)). GetContent ())! =-1
&&!keyword.equals (((Matchitem) allitems.get (i))
. GetContent ())) {
Tempitems.add (Allitems.get (i));
Sbtemp.append (((Matchitem) allitems.get (i)). GetContent ());
if (Keyword.indexof (sbtemp.tostring ()) = =-1) {//If the string and keywords are staged
When no longer matches
Sbtemp = new StringBuffer (
((Matchitem) allitems.get (i)). GetContent ());
Tempitems.clear ();
Tempitems.add (Allitems.get (i));
}
}
The third case: The keyword exists in the block
for (int j = 0; J < Allitems.size (); j + +) {
if ((Matchitem) Allitems.get (j)). GetContent (). INDEXOF (keyword)! =-1
&&!keyword.equals ((Matchitem) Allitems.get (j))
. GetContent ())) {
Matches.add (Allitems.get (j));
}
}
}
Return (Matchitem) matches.get (0);
}




Import Com.itextpdf.awt.geom.Rectangle2D;
Import Com.itextpdf.text.Rectangle;
Import Com.itextpdf.text.pdf.parser.ImageRenderInfo;
Import Com.itextpdf.text.pdf.parser.RenderListener;
Import Com.itextpdf.text.pdf.parser.TextRenderInfo;
Import java.util.ArrayList;
Import java.util.List;
public class Keywordpositionlistener implements Renderlistener {


Private list<matchitem> matches = new arraylist<matchitem> ();
Private list<matchitem> AllItems = new arraylist<matchitem> ();
Private Rectangle curpagesize;


/**
* Matching Keywords
*/
Private String keyword;
/**
* Matches the current page
*/
Private Integer pagenumber;


public void Begintextblock () {
Do nothing
}


public void RenderText (Textrenderinfo renderinfo) {
String content = Renderinfo.gettext ();
Content = Content.replace ("<", ""). Replace ("", "" "). Replace (" ("," "). Replace (" ("," ")." ("", "" "). Replace (" \ "", ""). ", "")
. replace (">", ""). Replace ("", "" "). Replace (") "," ""). Replace (")", ""). Replaces (",", ""). Replace (".", "" ")
. Replace (":", ""). Replace (":", ""). Replace ("", "");
Rectangle2d.float Textrectangle = Renderinfo.getdescentline (). Getboundingrectange ();
Matchitem item = new Matchitem ();
Item.setcontent (content);
Item.setpagenum (pagenumber);
Item.setpagewidth (Curpagesize.getwidth ());
Item.setpageheight (Curpagesize.getheight ());
Item.setx ((float) textrectangle.getx ());
Item.sety ((float) textrectangle.gety ());
if (content!=null && content!= "") {
if (content.equalsignorecase (keyword)) {
Matches.add (item);
}
}else{
Item.setcontent ("empty string");
}
Allitems.add (item);//Save All items First
}


public void Endtextblock () {
Do nothing
}


public void RenderImage (Imagerenderinfo renderinfo) {
Do nothing
}


/**
* Set the current page that needs to be matched
* @param pagenumber
*/
public void Setpagenumber (Integer pagenumber) {
This.pagenumber = pagenumber;
}


/**
* Set keywords that need to be matched, ignoring case
* @param keyword
*/
public void Setkeyword (String keyword) {
This.keyword = keyword;
}


/**
* Returns a list of matching results
* @return
*/
Public list<matchitem> getmatches () {
return matches;
}


void Setcurpagesize (Rectangle rect) {
This.curpagesize = rect;
}


Public list<matchitem> Getallitems () {
return allitems;
}


public void Setallitems (list<matchitem> allitems) {
This.allitems = AllItems;
}
}


public class Matchitem {
Private Integer Pagenum;
Private Float x;
Private Float y;
Private Float PageWidth;
Private Float PageHeight;
Private String content;


Public Integer Getpagenum () {
return pagenum;
}
public void Setpagenum (Integer pagenum) {
This.pagenum = Pagenum;
}
Public Float GetX () {
return x;
}
public void SetX (Float x) {
this.x = x;
}
Public Float GetY () {
return y;
}
public void Sety (Float y) {
This.y = y;
}
Public Float Getpagewidth () {
return pagewidth;
}
public void Setpagewidth (Float pagewidth) {
This.pagewidth = PageWidth;
}
Public Float Getpageheight () {
return pageheight;
}
public void Setpageheight (Float pageheight) {
This.pageheight = PageHeight;
}
Public String getcontent () {
return content;
}
public void SetContent (String content) {
this.content = content;
}


@Override
Public String toString () {
Return "Matchitem [pagenum=" + Pagenum + ", x=" + x + ", y=" + y
+ ", pagewidth=" + PageWidth + ", pageheight=" + pageheight
+ ", content=" + content + "]";
}
}

This allows you to get the coordinates of the Matchpage keyword after calling this method.



Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.