Java course design based on the college website search engine to crawl, Jianso (need Chinese word segmentation), sorting (optional), search, summary display. It can be either a GUI or a Web interface. First, Team introduction
School Number |
class |
name |
Introduction |
201621123049 |
Network 1612 |
[Group leader] Wondsching |
Passionate about cyber security |
201621123047 |
Network 1612 |
Chen Fangyi |
Have thought, have depth, have ability |
201621044079 |
Network 1612 |
Han Ye |
Strong learning ability, the person of Korean cute |
201621123055 |
Network 1612 |
Liu Bing |
Call Five community hair elder brother, have strong ability to do hands |
201621123054 |
Network 1612 |
Zhang Morning |
In charge of all school clubs, football level 6 |
Second, the project git address
Code Cloud Address
Third, the project Git commit record
Iv. The main use of the project technology Httplcientjsoup multi-threaded database DAO mode Ikananyzerlucenejavascript/jquerybootstrapweb Five, The remaining features of the project fault-tolerant processing perfect interface beautiful with a large number of configuration files when the query speed is still fast six, project function frame composition and main function flowchart
Vii. operation of the project
Viii. key code of the project
try { Document doc=Jsoup.connect("http://cec.jmu.edu.cn/").get(); Elements links = doc.select(".menu0_0_"); for (Element link : links) { lis1.add(oriurl+link.attr("href")); } } catch (IOException e1) { e1.printStackTrace(); }
try {closeablehttpresponse response = Httpclient.execute (httpget, context); try {httpentity entity = response.getentity (); Document Doc=jsoup.parse (entityutils.tostring (Entity, "UTF8")); Elements links=doc.select (". c124907"); for (Element link:links) {lis1.add (URL +link.attr ("href")); The String pattern = "\\?a2t= ([0-9]{1,}) &a2p=[0-9]{1,}&a2c=10&urltype=tree. Treetempurl&wbtreeid= ([0-9]{1,}) "; Elements links1=doc.select ("a[href]"); for (Element link1:links1) {String line=link1.attr ("href"); Pattern R = pattern.compile (pattern); Matcher m = r.matcher (line); int i=0; if (M.find ()) {//System.out.println ("Found value:" + m.group (0)); int J=integer.parseiNT (M.group (1)); if (j>7) {for (int k=1;k<j+1;k++) {lis.add ("? a2t=" +string.va Lueof (j) + "&a2p=" +string.valueof (k) + "&a2c=10&urltype=tree. Treetempurl&wbtreeid= "+m.group (2)); }} else{Lis.add (M.group (0)); }
CloseableHttpResponse response = httpClient.execute(httpget, context); try { HttpEntity entity = response.getEntity(); Document doc=Jsoup.parse(EntityUtils.toString(entity,"utf8")); Elements links=doc.select(".c124907"); for (Element link : links) { lis.add(link.attr("href")); }
try { HttpEntity entity = response.getEntity(); Document doc=Jsoup.parse(EntityUtils.toString(entity,"utf8"));
Crawl crawl=new Crawl(httpget.getURI().toString(),doc.title().toString(),title); CrawlDaoImpl test=new CrawlDaoImpl(); try { if(bool){ test.add(crawl); System.out.println(httpget.toString()+"添加成功"); } else{ System.out.println("添加失败");
jdbc.url=jdbc:mysql://localhost:3306/testjdbc.username=rootjdbc.password=rootjdbc.driver=com.mysql.jdbc.Driver
@Override public Crawl findById(int id) throws SQLException { Connection conn = null; PreparedStatement ps = null; ResultSet rs = null; Crawl p = null; String sql = "select url,abs,description from crawl where id=?"; try{ conn = DBUtils.getConnection(); ps = conn.prepareStatement(sql); ps.setInt(1, id); rs = ps.executeQuery(); if(rs.next()){ p = new Crawl(); p.setId(id); p.setUrl(rs.getString(1)); p.setAbs(rs.getString(2)); p.setDescription(rs.getString(3)); } }catch(SQLException e){ e.printStackTrace(); throw new SQLException("*"); }finally{ DBUtils.close(rs, ps, conn); } return p; }
public class Indexmanager {@Test public void CreateIndex () throws Exception {//Collect data Crawldao DAO = New Crawldaoimpl (); list<crawl> list = Dao.findall (); Encapsulates the collected data into the Document object list<document> docList = new ArrayList (); Document document; for (Crawl crawl:list) {document = new document (); Store: If yes, the description is stored in the document Field field id = new Intfield ("id", Crawl.getid (), store.yes); Field url = new Storedfield ("url", Crawl.geturl ()); Field ABS = new Storedfield ("abs", Crawl.getabs ()); Field Description = new TextField ("description", Crawl.getdescription (), store.yes); Document.add (ID); Document.add (URL); Document.add (ABS); Document.add (description); Doclist.add (document); }//Create a word breaker, standard word breaker//Analyzer Analyzer = new StandardAnalyzer (); Using Ikanalyzer AnaLyzer Analyzer = new Ikanalyzer (); Create IndexWriter indexwriterconfig cfg = new Indexwriterconfig (version.lucene_4_10_3, analyzer); Specifies the address of the index library file Indexfile = new file ("c:\\test1\\aaa\\"); Directory directory = Fsdirectory.open (indexfile); IndexWriter writer = new IndexWriter (directory, CFG); Writes the Document to the index library through the IndexWriter object for (Document doc:doclist) {writer.adddocument (doc); } writer.close (); }
public class Indexsearch {list<crawl> lis1=new ArrayList (); Public List dosearch (query query) throws Invalidtokenoffsetsexception {//Create Indexsearcher//Specify the address of the index library try {file Indexfile = new File ("c:\\test1\\aaa\\"); Directory directory = Fsdirectory.open (indexfile); Indexreader reader = directoryreader.open (directory); Indexsearcher searcher = new Indexsearcher (reader); Search index Library by searcher//second parameter: Specify n bar Topdocs topdocs = searcher.search (query, 20) for the top record that needs to be displayed; Total number of records matched according to query criteria int count = topdocs.totalhits;//scoredoc[] Scoredocs = Topdocs.scoredocs; String filed= "description"; Topdocs top=searcher.search (query, 100); Queryscorer score=new Queryscorer (query,filed);//Incoming ratings simplehtmlformatter fors=new simplehtmlformatter ("<s Pan style=\ "color:red;\" > "," </span> ");//Custom Highlight LabelHighlighter highlighter=new highlighter (fors,score);//Highlight Analyzer//Highlighter.setmaxdoccharstoanalyze (10);//Set Highlight The number of characters for (Scoredoc Sd:topDocs.scoreDocs) {Document doc=searcher.doc (sd.doc); String Description=doc.get (filed); All the information of the participle in lucene can be obtained from the Tokenstream stream. Tokenstream Token=tokensources.getanytokenstream (Searcher.getindexreader (), Sd.doc, "description", New IKAnalyzer ( true));//Get Tokenstream Fragmenter fragment=new Simplespanfragmenter (score); Create a new object based on this score highlighter.settextfragmenter (fragment); The most appropriate highlighter.settextfragmenter (new Simplefragmenter ()) must be selected;//set the number of characters to return per String St R=highlighter.getbestfragment (token, description);//Get the highlighted fragment, you can limit its number Crawl Crawl = new Crawl (); Crawl.setdescription (str); Crawl.setabs (Doc.get ("abs")); Crawl.seturl (doc.get ("url")); Lis1.add (crawl); } reader.close (); } catch (IOException e) {e.printstacktrace (); } return LIS1; }
<div id="test"> </div> <form action="./query2.jsp" method="GET"> <div class="search-wrapper"> <div class="input-holder"> <input type="text" class="search-input" placeholder="" name="name"/> <button class="search-icon" onclick="searchToggle(this, event);"><span></span></button> </div> <span class="close" onclick="searchToggle(this, event);"></span> <div class="result-container"> </div> </div> </form>
<script src= "Js/jquery-1.11.0.min.js" type= "Text/javascript" ></script><script type= "text/ JavaScript > function searchtoggle (obj, evt) {var container = $ (obj). Closest ('. Search-wrapper '); if (!container.hasclass (' active ')) {container.addclass (' active '); Evt.preventdefault (); } else if (Container.hasclass (' active ') && $ (obj). Closest ('. Input-holder '). Length = = 0) {Contai Ner.removeclass (' active '); Container.find ('. Search-input '). Val ('); Container.find ('. Result-container '). FadeOut ((), function () {$ (this). empty ();}); }} function Submitfn (obj, evt) {value = $ (obj). Find ('. Search-input '). Val (). Trim (); _html = "Yup yup! Your search text sounds like this: "; if (!value.length) {_html = "Yup yup! Add some text friend:D "; } else{_html + = "<b>" + value + "</b>"; } $ (obj). Find ('. Result-container '). html (' <span> ' + _html + ' </span> '); $ (obj). Find ('. Result-container '). FadeIn (100); Evt.preventdefault (); }</script>
<script type="text/javascript">$(function(){ var Count = "<%=i %>";//记录条数 var tmp = "<%=test %>"; var PageSize=5;//设置每页示数目 var PageCount=Math.ceil(Count/PageSize);//计算总页数 var currentPage =1;//当前页,默认为1。 //造个简单的分页按钮 for(var i=1;i<=PageCount;i++){ if(PageCount==1){ }//如果页数为1的话,那么我们就是不分页 else{ var pageN='<li style=\"font-size:30px\"><a href="?name='+tmp+'&a='+i+'">'+i+'</a></li>'; $('.pagination').append(pageN); } } //显示默认页(第一页)}); </script>
<%string D =request.getparameter ("a"); Out.print (d+ "<br>"), int b=0;int k=0;if (i!=0&&d==null) {for (Crawl Crawl:lis) {if (5>k&& ; k>=0) {Out.print ("
Yet to be improved or the new idea variable naming is not too canonical you can try to do a search engine that can crawl other websites only by modifying some of the parameters.
Team member task assignments
name |
Task |
Wondsching |
Using Lucene and Ikanalyzer to retrieve, some front-end content and module interface |
Chen Fangyi |
Use HttpClient and Jsoup, crawl and parse, partial database content |
Han Ye |
Using the DAO pattern of the database to store jsoup parsed content, part of the front end and logo design |
Liu Bing |
Design and backend code implementation of front-end interface using Bootstrap and JSP |
Zhang Morning |
Design of front-end interface and implementation of backend code using jquery and JSP |
Java Course design Team (search engine)