php通過調用lucene庫實現檢索應用。注意為下面安裝的php,tomcat,java,apache添加path路徑。
被檢索的資料存放在mysql資料庫中,檢索的資料是使用python從網路上爬取的,已儲存為json格式,直接可以java讀取插入資料庫。
用到的jar包。
1、gson-2.2.1.jar(使用者直接將json格式轉化為java對象)
2、lucene-core-3.0.2.jar(lucene庫)
3、mysql-connector-java-5.1.37-bin.jar (java串連mysql驅動庫)
4、LuceneExample.jar(自己編寫的使用Lucene的樣本庫)
第一步:
安裝Java,配置好環境變數。將上面的jar包拷貝到java虛擬機器運行環境jre\lib\ext目錄下,這樣在虛擬機器運行時就會自動載入這些庫了。
第二步:
安裝tomcat,由於php調用java需要用到php-java-bridge,而下載下來的是JavaBridge.war,所以需要用tomcat來將下載的JavaBridge.war檔案解析一下,具體方法為:將JavaBridge.war放在tomcat的webapps\目錄,啟動tomcat,此時tomcat會自動解析在webapps\下的JavaBridge.war,生產JavaBridge檔案夾,將這個檔案夾拷貝到第二步的apache運行目錄。
第三步:
安裝php,安裝apache,將第一步中得到的JavaBridge檔案夾拷貝到htdocs\目錄。(JavaBridge檔案夾裡包含一些類似標頭檔的東西)
第四步:
環境已經基本建立好了,下面開始具體實施。
1、登入到mysql終端,利用SQL指令建立tiku資料庫,建立math資料庫表。
create database tiku;
use tiku;
create table math(index_num int(11) primary key not null auto_increment,question text not null,answer text);
建完表後可以 desc math; 查看一下。
2、終端中進入載入資料代碼的目錄D:\soft\yangyang\luc,資料檔案為out.data,為json格式。
編譯LoadData.java並執行,
javac LoadData.java
java LoadData
此時在終端中查看math資料庫,即可看到資料已存入資料庫表math中。
select * from math limit 2;
3、編譯生產自己編寫的使用Lucene的樣本庫
同樣在上述目錄中執行下面命令
javac LuceneExample.java 編譯產生class檔案
jar -cvf LuceneExample.jar LuceneExample.class 打包class檔案
接著將LuceneExample.jar檔案拷貝至上面說的jre\lib\ext目錄下。
4、編寫服務檔案
進入apache運行目錄htdocs\編寫test.php檔案來調用上面的LuceneExample.jar庫實現檢索。具體代碼見附錄
第五步:
開啟服務環境,查看運行結果:
1、首先保證開啟mysql服務,可以在cmd中運行命令 net start mysql
2、進入第三步的JavaBridge\WEB-INF\lib目錄下,雙擊運行JavaBridge.jar程式(如果不運行,手動進入該目錄開啟java -jar JavaBridge.jar,選擇8080連接埠,確定)
3、進入apache的bin目錄下雙擊ApacheMonitor.exe開啟apache服務。
4、此時在瀏覽器中輸入http:\localhost\test.php即可看到查詢結果(若為亂碼,請修改編碼方式為UTF-8)。
LoadData.java源碼
import java.io.BufferedReader;import java.io.InputStreamReader;import java.io.FileInputStream;import com.google.gson.reflect.TypeToken;import java.lang.reflect.Type;import com.google.gson.Gson;import java.sql.Connection;import java.sql.DriverManager;import java.sql.PreparedStatement;import java.util.ArrayList;import java.util.List;public class LoadData { public static void loadJson(String fileName){ try{ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "utf-8")); String data = br.readLine(); Gson gson = new Gson(); /* List<TiMu> tt = new ArrayList<TiMu>(); tt.add(new TiMu("fff", "fsdfsd")); tt.add(new TiMu("444", "3333232")); System.out.println(gson.toJson(tt)); */ Type type = new TypeToken<List<TiMu>>(){}.getType(); List<TiMu> ps = gson.fromJson(data, type); System.out.println(ps.toString()); //JDBC Section Class.forName("com.mysql.jdbc.Driver").newInstance(); Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm"); String sql="insert into math(question,answer) values(?,?)"; PreparedStatement preStmt =conn.prepareStatement(sql); for(TiMu ti:ps){ preStmt.setString(1,ti.getQ()); preStmt.setString(2,ti.getA()); preStmt.executeUpdate(); } } catch (Exception e) { e.printStackTrace(); } } public static void main(String[] args) { LoadData.loadJson("out.data"); }}public class TiMu{ private String Q; private String A; public TiMu(){} public TiMu(String q, String a){ this.Q = q; this.A = a; } public String getQ(){ return Q; } public void setQ(String Q){ this.Q = Q; } public String getA(){ return A; } public void setA(String A){ this.A = A; }}
LuceneExample.java源碼
import java.io.File;import java.sql.Connection;import java.sql.DriverManager;import java.sql.ResultSet;import java.sql.Statement;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriter.MaxFieldLength;import org.apache.lucene.index.Term;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.FuzzyQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;public class LuceneExample { public static final File INDEX_DIRECTORY = new File("./"); public void createIndex() { System.out.println("-- Indexing --"); try { //JDBC Section Class.forName("com.mysql.jdbc.Driver").newInstance(); //Assuming database bookstore exists Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm"); Statement stmt = conn.createStatement(); String sql = "select question from math"; ResultSet rs = stmt.executeQuery(sql); //Lucene Section Directory directory = new SimpleFSDirectory(INDEX_DIRECTORY); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexWriter iWriter = new IndexWriter(directory, analyzer, true,MaxFieldLength.UNLIMITED); //Looping through resultset and adding to index file int count = 0; while(rs.next()) { Document doc = new Document(); //System.out.println("question=" + rs.getString("question")); doc.add(new Field("question", rs.getString("question"), Field.Store.YES, Field.Index.ANALYZED )); //doc.add(new Field("book_title", rs.getString("book_title"), Field.Store.YES, Field.Index.ANALYZED )); //doc.add(new Field("book_details", rs.getString("book_details"), Field.Store.YES, Field.Index.ANALYZED )); //Adding doc to iWriter iWriter.addDocument(doc); count++; } System.out.println(count+" record indexed"); //Closing iWriter iWriter.optimize(); iWriter.commit(); iWriter.close(); //Closing JDBC connection rs.close(); stmt.close(); conn.close(); } catch (Exception e) { e.printStackTrace(); } } public String search(String keyword) { System.out.println("-- Seaching --"); String result = ""; try { //Searching IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIRECTORY), true); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); //MultiFieldQueryParser is used to search multiple fields //String[] filesToSearch = {"question"}; QueryParser mqp = new QueryParser(Version.LUCENE_30, "question" , analyzer); Query query = mqp.parse(keyword);//search the given keyword //Query query = new FuzzyQuery(new Term("question", keyword), 0.01f); System.out.println("query >> " + keyword); TopDocs hits = searcher.search(query, 5); // run the query System.out.println("Results found >> " + hits.totalHits); for (int i = 0; i < hits.totalHits; i++) { Document doc = searcher.doc(hits.scoreDocs[i].doc);//get the next document System.out.println(doc.get("question")); result = doc.get("question"); break; } } catch (Exception e) { e.printStackTrace(); } return result; } public String getResult(String que){ //String que = search(question); if(que.equals("")) return ""; try{ //JDBC Section Class.forName("com.mysql.jdbc.Driver").newInstance(); Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm"); Statement stmt = conn.createStatement(); String sql = "select answer from math where question='" + que + "'"; System.out.println("sql = " + sql); ResultSet rs = stmt.executeQuery(sql); while(rs.next()) { return rs.getString("answer"); } } catch (Exception e) { e.printStackTrace(); } return ""; } public static void main(String[] args) { LuceneExample obj = new LuceneExample(); //creating index obj.createIndex(); //searching keyword //System.out.println("a1: " + obj.search("")); //using wild card serach String question = obj.search("5"); System.out.println("a1: " + question); System.out.println("a1: " + obj.getResult(question)); //using logical operator //obj.search("data1 OR data2"); //obj.search("data1 AND data2"); }}
test.php源碼
<?php require_once("JavaBridge/java/Java.inc"); #print "fffff<br/>"; #$param = $_POST['question']; $tf = new Java('LuceneExample'); $tf->createIndex(); $q = java_values($tf->search("王阿姨")); #$s = $tf->test(); print "題目:".$q; print "<br/>"; $a = java_values($tf->getResult($q)); print "答案:".$a;?>
json資料
[{"Q": "王阿姨買了3千克龍眼和8千克西瓜一共花了46元。已知1千克西瓜的價錢正好是1千克龍眼的1/5。龍眼和西瓜的單價分別是多少元。(5分)", "A": "設1千克西瓜的價錢是x,那麼龍眼的價錢是5x,3*5x+8x=46,x=2,所以龍眼的單價是10元,西瓜的單價是2元"},{"Q": "有13個乒乓球,有12個品質相同,另有一個較輕一點,如果用天平稱,至少稱 次保證能找出這個乒乓球.", "A": "http://www.tiku.cn/q/1010405.html"}, {"Q": "有9瓶鈣片,次品的一瓶少了4片.用天平至少稱 次可以保證找出次品.", "A": "http://www.tiku.cn/q/1010406.html"}]