First, object-oriented programming
Divided into two parts, search object design and constraints, service level design reference.
Package news;
/** * <p>title: News search Engine </p>
* <p>copyright:copyright (c) 2003</p> * <p>company: </p> * @author computer 99630 Shen * @version 1.0 * @ Download:http://www.codefans.net * *
Import java.io.IOException;
Import Org.apache.lucene.analysis.cn.ChineseAnalyzer; Import org.apache.lucene.document.Document; Import Org.apache.lucene.document.Field; Import Org.apache.lucene.index.IndexWriter;
public class Index {
IndexWriter _writer = null; Index () throws Exception {_writer = new IndexWriter ("C:\\news\\index", New Chineseanaly Zer (), true); }
/** * Add each piece of news to the index * @param URL of a news message * @param title of Title * @throws java.lang.Exception */void Addnews (String URL, String title) throws Exception {document _doc = new Document (); _doc.add (Field.text ("title", title)); _doc.add (field.unindexed ("url", url)); _writer.adddocument (_doc); }
/** * Optimize and clean resources * @throws java.lang.Exception */void Close () throws Exception {_writer.optimize (); _writer.close (); } }
Package news;
/** * <p>title: News search Engine </p>
* <p>copyright:copyright (c) 2003</p> * <p>company: </p> * @author computer 99630 Shen * @version 1.0 * @ Download:http://www.codefans.net * *
Import Java.util.Iterator; Import Java.util.Vector;
Import Com.heaton.bot.HTMLPage; Import Com.heaton.bot.HTTP; Import Com.heaton.bot.Link;
public class Htmlparse {HTTP _http = null; Public Htmlparse (http http) {_http = http; }
/** * Index Web page after parsing */public void start () { try { HtmlPage _page = new HtmlPage (_http); _page.open (_http.geturl (), NULL); Vector _links = _page.getlinks (); Index _index = new Index (); Iterator _it = _links.iterator (); int n = 0; while (_it.hasnext ()) { link _link = (link) _ It.next (); String _herf = input (_link.gethref (). Trim ()); String _title = input (_link.getprompt (). Trim ()); _index. Addnews (_herf, _title); n++; } System.out.println ("Total scan to" + N + "news"); _index.close (); } catch (Exception ex) { System.out.println (ex); & nbsp; } } /** * Solve Chinese problem in Java * @param str input Chinese * @return Decoded Chinese */public static string input (String str) { string temp = null; if (str! = null) { try { temp = new String (str.getbytes ("iso8859_1")); } catch (Exception e) { } } return temp; }
}
Package news;
/** * <p>title: News search Engine </p>
* <p>copyright:copyright (c) 2003</p> * <p>company: </p>
* @version 1.0 * @Download: http://www.codefans.net */
Import Com.heaton.bot.HTTP; Import Com.heaton.bot.HTTPSocket; Import com.heaton.bot.ISpiderReportable; Import com.heaton.bot.IWorkloadStorable; Import Com.heaton.bot.Spider; Import Com.heaton.bot.SpiderInternalWorkload;
public class Searcher implements Ispiderreportable { public static void main (string[] args) thro WS Exception { iworkloadstorable wl = new Spiderinternalworkload (); Searcher _searc her = new Searcher (); spider _spider = new Spider (_searcher, "/HTTP/ Www.chenshen.com/index.html ", new Httpsocket (), (+), WL); _spider.setmaxbody (100); _spider.start (); } Public boolean foundinternallink (String URL) { return false, } public Boo Lean foundexternallink (string url) { return false; } public Boolean foundotherlink (string URL) { return false; } public void Processpage (http http) { System.ou T. println ("Scan page:" + http.geturl ()); New Htmlparse (HTTP). Start (); } public void Completepage (http http, boolean error) { } public Boolean getremovequery () {&NB sp; return true; } public void Spidercomplete () { }}
Package newsserver;
Import java.io.IOException; Import Java.io.PrintWriter; Import javax.servlet.ServletException; Import Javax.servlet.http.HttpServlet; Import Javax.servlet.http.HttpServletRequest; Import Javax.servlet.http.HttpServletResponse;
Import Org.apache.lucene.analysis.Analyzer; Import Org.apache.lucene.analysis.cn.ChineseAnalyzer; Import org.apache.lucene.document.Document; Import Org.apache.lucene.queryParser.QueryParser; Import org.apache.lucene.search.Hits; Import Org.apache.lucene.search.IndexSearcher; Import Org.apache.lucene.search.Query;
/** * <p>title: News search Engine </p>
* <p>copyright:copyright (c) 2003</p> * <p>company: </p>
* @version 1.0 * @Download: http://www.codefans.net */
Public class Results extends HttpServlet {private static final String Content_Type = "text/html; CHARSET=GBK "; //INITIALIZE Global variables public void init () throws servletexception { }
//process The HTTP Get request public void doget (HttpServletRequest request, httpservletresponse response ) throws Servletexception, IOException { String QC = Request.getparameter ("Querycontent"); if (QC = = null) { QC = ""; } Else { QC = input (QC); } Response.setcontenttype (Content_Type); PrintWriter out = Response.getwriter (); try { Search (QC, out); } catch ( Exception ex) { System.out.println (ex.getmessage ()); } }
public void Search (String QC, printwriter out) throws Exception { //create an index from the index directory Indexsearcher _searcher = new Indexsearcher ("C:\\news\\index"); //creation of standard analyzers Analyzer Analyzer = new Chineseanalyzer (); //Query conditions String line = QC; //Query is an abstract class query query = Queryparser.parse (line, "title", analyzer);
out.println ("
Final int hits_per_page = 10; for (int start = 0; start < Hits.length (); start + = Hits_per_page) { int end = Math.min (Hits.length (), start + hits_per_page); for (int i = start; i < end; i++) { Document Doc = Hits.doc (i); String url = doc.get ("url"); if (url! = null) { Out.println ((i + 1) + "<a href=" + URL + "' >" +   ; Replace (Doc.get ("title"), QC) + " </a><br> "); } else { System.out.println ("not Found! "); } } } out.println ("< /body>
public string input (String str) {string temp = null; if (str = null) {try {temp = new String (str.getbytes ("iso8859_1")); } catch (Exception e) {}} return temp; }
public string Replace (string title, string keyword) {return title.replaceall (keyword, "<font color= ' Red ' >" + ke Yword + "</font>"); };
Clean up resources public void Destroy () {}}
Team) object-oriented design