Six implementation methods of Lucene range search

Source: Internet
Author: User
Tags bitset

 

 

Six implementation methods of range search

When you want to use some rules (such as time ranges) to filter queries, Lucene provides us with many implementation methods. The more choices, the more flexibility, but the more opportunities for making incorrect choices. The following code describes the usage and performance of six filters. And added selection suggestions.

Import java. Io. ioexception;

Import org. Apache. Lucene. Index. indexreader;
Import org. Apache. Lucene. Index. term;
Import org. Apache. Lucene. Search. booleanclause;
Import org. Apache. Lucene. Search. booleanquery;
Import org. Apache. Lucene. Search. constantscorequery;
Import org. Apache. Lucene. Search. constantscorerangequery;
Import org. Apache. Lucene. Search. filter;
Import org. Apache. Lucene. Search. filteredquery;
Import org. Apache. Lucene. Search. Hits;
Import org. Apache. Lucene. Search. indexsearcher;
Import org. Apache. Lucene. Search. query;
Import org. Apache. Lucene. Search. queryfilter;
Import org. Apache. Lucene. Search. rangefilter;
Import org. Apache. Lucene. Search. rangequery;
Import org. Apache. Lucene. Search. termquery;

/**
* Test the filter performance.
* Tests show that rangequery-based queries are slow. Filter-based queries provide greater flexibility and speed.
*/
Public class filterperformancetests
{

Private Static indexreader reader;
Private Static indexsearcher searcher;

Public static void main (string [] ARGs) throws exception
{
Reader = indexreader. Open ("/indexes/Enron ");
Searcher = new indexsearcher (Reader );

/// The query to run in all our tests
Termquery TQ = new termquery (new term ("contents", "drink "));

// The filter criteria used by all tests
String filterfield = "date ";
String lowerrange = "20000101 ";
String upperrange = "20001012 ";
Int numqueriespertest = 100;

Timequery ("plain term query", TQ, null, numqueriespertest );

/*
* Method 1: booleanquery with mandatory termquery and rangequery
*
* Average Time: 22.5 MS
* Changed search result score: Yes
* Note: not recommended.
* If the range of rangequery search is too large, the system throws the "too many clses" exception!
* Due to the lack of filter term, its performance is unsatisfactory.
* However, this method is used a lot, because it is the only method that can fully use queryparser.
* Syntax. It is used by people who have not read the Lucene API.
*/
Rangequery rq1 = new rangequery (new term (filterfield, lowerrange ),
New term (filterfield, upperrange), true );
Booleanquery bq1 = new booleanquery ();
Bq1.add (New booleanclause (TQ, booleanclause. occur. Must ));
Bq1.add (New booleanclause (rq1, booleanclause. occur. Must ));
Timequery ("booleanquery with range", bq1, null, numqueriespertest );

/*
* Method 2: termquery with filter passed to searcher
*
* Average Time: 4.53 MS
* Changed search result score: No
* Annotation: recommended.
*/
Rangefilter rf2 = new rangefilter (filterfield, lowerrange, upperrange, true, true );
Timequery ("query + rangefilter", TQ, rf2, numqueriespertest );

/*
* Method 3: filteredquery using termquery and rangefilter
*
* Average Time: 4.38 MS
* Changed search result score: No
* Annotation: recommended. The fastest choice and the most flexible use, because many filters can be encapsulated in a query.
*/
Rangefilter Rf3 = new rangefilter (filterfield, lowerrange, upperrange, true, true );
Filteredquery fq3 = new filteredquery (TQ, Rf3 );
Timequery ("filteredquery with rangefilter", fq3, null, numqueriespertest );

/*
* Method 4: booleanquery with mandatory termquery and constantscorequery (takes a filter)
*
* Average Time: 4.85 MS
* Changed search result score: Yes
* Annotation: A Way to query a filter table. (Score is a constant ).
* This is not a true filter because it will be used as a query for running.
*/
Rangefilter rf4 = new rangefilter (filterfield, lowerrange, upperrange, true, true );
Constantscorequery csq4 = new constantscorequery (rf4 );
Booleanquery bq4 = new booleanquery ();
Bq4.add (New booleanclause (TQ, booleanclause. occur. Must ));
Bq4.add (New booleanclause (csq4, booleanclause. occur. Must ));
Timequery ("constantscorequery", bq4, null, numqueriespertest );

/*
* Method 5: booleanquery with mandatory termquery and constantscorerangequery
*
* Average Time: 4.68 MS
* Changed search result score: Yes
* Annotation: A simple encoding method. Constantscorequery encapsulates a rangefilter internally.
*/
Constantscorerangequery crq5 = new constantscorerangequery (filterfield, lowerrange, upperrange, true, true );
Booleanquery bq5 = new booleanquery ();
Bq5.add (New booleanclause (TQ, booleanclause. occur. Must ));
Bq5.add (New booleanclause (crq5, booleanclause. occur. Must ));
Timequery ("constantscorerangequery", bq5, null, numqueriespertest );

/*
* Method 6: termquery with filter of queryfilter wrapping a rangequery
*
* Average Time: 0.94 MS
* Changed search result score: No
* Note: This query performs faster, but it is unfair to other queries. Because queryfilter has a rangequery,
* It caches the query results and stores them in a bitset. This bitset is reused in loop search.
* In a more simulated test or in a real environment, the requirements for setting and filtering applications change,
* This cache is meaningless and is not recommended.
* In addition, the use of the internal rangequery will also lead to "too many clauses" exceptions.
*
**/
Rangequery rq6 = new rangequery (new term (filterfield, lowerrange ),
New term (filterfield, upperrange), true );
Queryfilter qf6 = new queryfilter (rq6 );
Timequery ("query + queryfilter wrapping a rangequery", TQ, qf6, numqueriespertest );

Searcher. Close ();
Reader. Close ();
}

Private Static void timequery (string querytype, query TQ,
Filter filter, int numloops) throws ioexception
{
Long start = system. currenttimemillis ();
Int numdocs = 0;
Int top3docs [] = new int [3];
Float top3scores [] = new float [3];
For (int l = 0; L <numloops; l ++)
{
Hits H = searcher. Search (TQ, filter );
Numdocs = H. Length ();

For (INT I = 0; I <math. Min (top3docs. length, numdocs); I ++)
{
H.doc (I). Get ("title ");
If (L = 0)
{
Top3docs [I] = H. ID (I );
Top3scores [I] = H. Score (I );
}
}
}
Long end = system. currenttimemillis ();
Float Ave = (float) (end-Start)/(float) numloops;
System. Out. println (querytype + "took AVG" + Ave + "millis found. numdocs =" + numdocs );
System. Out. Print ("/t Top Docs [score] = ");
For (INT I = 0; I <top3docs. length; I ++)
{
System. Out. Print (top3docs [I] + "[" + top3scores [I] + "]/t ");
}
System. Out. println ();
}

}

From http://hi.baidu.com/got_from_jlu/blog/item/54ccc8c7a0f862c039db49b0.html

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.