Because the work needs, the need to use PHP to achieve a large number of Web site full text search, and the most popular Full-text search engine library is Lucene, it is a subproject of Apache Jakarta, and provides a simple and practical API, with these APIs, you can For full-text retrieval of any basic text data, including the database.
Because PHP itself supports calling external Java classes, you first write a class in Java, which implements two methods by invoking the Lucene API:
* public String createIndex(String indexDir_path,String dataDir_path)
* public String searchword(String ss,String index_path)
Where CreateIndex is the method of creating an index, passing in two parameters is Indexdir_path (index file directory), Datadir_path (indexed file directory), returns the indexed file list string, and the other is Searchword, Index is retrieved by the incoming keyword parameter (ss), which is the directory of the index file (index_path). Returns all the retrieved files.
Here is the source code, very simple, you can refer to: Txtfileindexer.java
Package testlucene;
Import Java.io.file;import Java.io.FileReader;
Import Java.io.Reader;
Import Java.util.Date;
Import Org.apache.lucene.analysis.Analyzer;
Import Org.apache.lucene.analysis.standard.StandardAnalyzer;
Import org.apache.lucene.document.Document;
Import Org.apache.lucene.document.Field;
Import Org.apache.lucene.index.IndexWriter;
Import Org.apache.lucene.index.Term;
Import org.apache.lucene.search.Hits;
Import Org.apache.lucene.search.IndexSearcher;
Import Org.apache.lucene.search.TermQuery;
Import Org.apache.lucene.store.FSDirectory;
public class Txtfileindexer ... {
Public String test () ... {
Return "test is OK Hohoho";
}
/**//**
* @param args
*/
public string CreateIndex (string indexdir_path,string datadir_path) throws Exception ... {
String result = "";
File Indexdir = new file (Indexdir_path);
File DataDir = new file (Datadir_path);
Analyzer Luceneanalyzer = new StandardAnalyzer ();
file[] datafiles = Datadir.listfiles ();
IndexWriter indexwriter = new IndexWriter (indexdir,luceneanalyzer,true);
Long starttime = new Date (). GetTime ();
for (int i=0 i < datafiles.length; i++) ... {
if (datafiles [I].isfile () && datafiles[i].getname (). EndsWith (". html")) ... {
result = = "Indexing file" + datafiles[i].getcanonicalpath () + "<br/>";
Document document = new document ();
Reader Txtreader = new FileReader (datafiles[i]);
Document.add (Field.text ("path", Datafiles [I].getcanonicalpath ()));
Document.add (Field.text ("Contents", Txtreader));
Indexwriter.adddocument (document);
}
}
Indexwriter.optimize ();
Indexwriter.close ();
Long endtime = new Date (). GetTime ();
result = "It takes" + (Endtime-starttime)
+ ' milliseconds to ' create index for the ' files in directory '
+ Datadir.getpath ();
return result;
}
public string Searchword (string ss,string index_path) throws Exception ... {
String querystr = SS;
String result = "result:<br/>";
This is the directory of hosts the Lucene index
File Indexdir = new file (Index_path);
Fsdirectory directory = fsdirectory.getdirectory (indexdir,false);
Indexsearcher searcher = new Indexsearcher (directory);
if (!indexdir.exists ()) ... {
result = "The Lucene index isn't exist";
return result;
}
Term Term = new Term ("Contents", Querystr.tolowercase ());
Termquery lucenequery = new Termquery (term);
Hits Hits = Searcher.search (lucenequery);
for (int i = 0; i < hits.length (); i++) ... {
Document document = Hits.doc (i);
result = "<br/><a href= ' getfile.php?w=" +ss+ "&f=" +document.get ("path") + "' >file:" + document.get (" Path ") +" </a>n ";
}
return result;
}
}