Brief introduction
Sphinx is an open source search engine that supports full-text retrieval in English. So if you build sphinx alone, you can already use full-text indexing.
But sometimes we have to do Chinese word segmentation all SCWS appear, we can also use coreseek to build Chinese index, and Coreseek 's kernel is Sphinx
1. Environment
PHP 5.5.23+centos 6.5+mysql Ver 14.14 distrib 5.1.73
2, Installation Sphinx
Cd/usr/local/srcwget http://sphinxsearch.com/files/sphinx-2.3.1-beta.tar.gztar-zxf sphinx-2.3.1-beta.tar.gz CD Sphinx-2.3.1-beta./configure--prefix=/usr/local/sphinx--with-mysql=/usr/local/mysql./configure--prefix=/usr/ Local/sphinx --with-mysql-includes=/usr/include/mysql/ --with-mysql-libs=/usr/lib/mysql/make && Make install
First configuration: If you use the source package when MySQL is installed
Second configuration: If you use Yum when MySQL is installed
3, Installation Sphinxclient
4. install pecl extension Sphinx
wget https://pecl.php.net/get/sphinx-1.3.3.tgztar-zxf sphinx-1.3.3.tgz CD sphinx-1.3.3phpize./configure-- With-sphinx=/usr/local/sphinx/libsphinxclient--with-php-config=php-configmake && Make Installecho extension=sphinx.so >/etc/php.d/sphinx.ini
5. Installation Scws and PECL extension
wget http://www.xunsearch.com/scws/down/scws-1.2.2.tar.bz2tar-jxf scws-1.2.2.tar.bz2 CD scws-1.2.2./configure-- prefix=/usr/local/scws/make && make install CD./phpext/ phpize./configure -- With-php-config=php-config make && make installecho extension = scws.so >/etc/php.d/scws.iniecho Scws.default.charset = Utf-8 >>/etc/php.d/scws.iniecho scws.default.fpath =/usr/local/scws/etc >>/ Etc/php.d/scws.ini
6. Installation Word Library
wget Http://www.xunsearch.com/scws/down/scws-dict-chs-utf8.tar.bz2tar XVJF scws-dict-chs-utf8.tar.bz2-c/usr/local /scws/etc/
7. Create a database and write information
drop TABLE IF EXISTS ' userinfo '; CREATE TABLE ' userinfo ' (' id ' int (one) unsigned not null auto_increment, ' userid ' int (one) unsigned not null DEFAULT ' 0 ', ' Addtime ' datetime NOT NULL, ' post ' varchar (no null DEFAULT ' ', ' summary ' text NOT NULL, PRIMARY KEY (' id ')) ENG Ine=innodb auto_increment=17 DEFAULT Charset=utf8; LOCK TABLES ' userinfo ' write;insert into ' userinfo ' VALUES (null,1, ' 2012-06-01 00:24:54 ', ' OPS director ', ' Ops '), (null,2, ' 2014-08-19 10:24:54 ', ' Big account Manager ', ' client '), (null,3, ' 2015-08-01 12:24:54 ', ' Senior DBA Engineer ', ' DBA '), (null,4, ' 2013-08-01 00:24:54 '), ' Advanced PHP engineer ', ' php '); UNLOCK tables;drop TABLE IF EXISTS ' users '; CREATE TABLE ' users ' (' id ' int (one) unsigned not null auto_increment, ' username ' varchar "not null DEFAULT ', PRIMA RY KEY (' id ')) engine=innodb auto_increment=1 DEFAULT Charset=utf8; LOCK TABLES ' users ' write;insert into ' users ' VALUES (null, ' Zhang San '), (null, ' Zhang Si '), (null, ' John Doe '), (null, ' Harry '); UNLOCK TABLES;
8, configuring Sphinx
SOURCE users{type= mysqlsql_host= localhostsql_user= rootsql_pass= 123456sql_db= testsql_port= 3306# Optional, default is 3306sql_query_pre = set NAMES UTF8 sql_query_pre = set SESSION Query_cache_type=off sql_query = Selec T a.id, A.userid,b.username, Unix_timestamp (a.addtime) as Addtime, A.post, a.summary from UserInfo a left join users B on A.userid = b.id Sql_attr_uint = UserID Sql_field_string = Username Sql_field_string = Post Sql_attr_timestamp = Addtim Esql_ranged_throttle = 0 sql_attr_uint= group_idsql_attr_timestamp= date_addedsql_ranged_throttle= 0}source src1throttled:users {sql_ranged_throttle= 100}indexer{mem_limit= 128m}searchd{listen= 9312log=/usr/local/sphinx/ var/log/searchd.logquery_log=/usr/local/sphinx/var/log/query.logread_timeout= 5client_timeout= 300max_children= 30persistent_connections_limit= 30pid_file=/usr/local/sphinx/var/log/searchd.pidseamless_rotate= 1preopen_ indexes= 1unlink_old= 1mva_updates_pool= 1mmax_packet_size= 8mmax_filters= 256max_filter_values= 4096max_batch_queries= 32workers= Threads # for RT to Work}index users {Source = Users path = /usr/local/sphinx/var/data/users DocInfo = extern Mlock = 0 morphology = None Min_word_len = 1 Html_strip = 1 CharSet _table = U+ff10. u+ff19->0..9, 0..9, u+ff41. U+ff5a->a. Z, U+FF21. U+ff3a->a. Z,a. Z->a. Z, A.. Z, u+0149, u+017f, u+0138, U+00DF, U+00FF, u+00c0. U+00d6->u+00e0. U+00f6,u+00e0. U+00f6, U+00d8. U+00de->u+00f8. U+00fe, U+00f8. U+00fe, u+0100->u+0101, u+0101,u+0102->u+0103, u+0103, u+0104->u+0105, u+0105, U+0106->U+0107, U+0107, U+ 0108->u+0109,u+0109, u+010a->u+010b, u+010b, u+010c->u+010d, u+010d, u+010e->u+010f, U+010F,U+0110->U +0111, u+0111, u+0112->u+0113, u+0113, u+0114->u+0115, u+0115, u+0116->u+0117,u+0117, U+0118->U+0119, U+ 0119, u+011a->u+011b, u+011b, u+011c->u+011d, u+011d,u+011e->u+011f, u+011f, u+0130->u+0131, U+0131, U+ 0132->u+0133, u+0133, u+0134->u+0135,u+0135, u+0136->u+0137, u+0137, u+0139->u+013a, u+013a, u+013b->u+013c, u+013c,u+013d->u+013e, U+013E, U+013F- >u+0140, u+0140, u+0141->u+0142, u+0142, u+0143->u+0144,u+0144, u+0145->u+0146, U+0146, U+0147->U+0148 , u+0148, u+014a->u+014b, u+014b,u+014c->u+014d, u+014d, u+014e->u+014f, u+014f, U+0150->U+0151, U+0151, U+ 0152->u+0153,u+0153, u+0154->u+0155, u+0155, u+0156->u+0157, u+0157, u+0158->u+0159, U+0159,U+015A->U +015b, u+015b, u+015c->u+015d, u+015d, u+015e->u+015f, u+015f, u+0160->u+0161,u+0161, U+0162->U+0163, U+ 0163, u+0164->u+0165, u+0165, u+0166->u+0167, u+0167,u+0168->u+0169, u+0169, u+016a->u+016b, U+016B, U+ 016C->U+016D, u+016d, u+016e->u+016f,u+016f, u+0170->u+0171, u+0171, u+0172->u+0173, U+0173, U+0174->U +0175, u+0175,u+0176->u+0177, u+0177, u+0178->u+00ff, u+00ff, u+0179->u+017a, u+017a, U+017B->U+017C,U+ 017C, u+017d->u+017e, u+017e, u+0410..u+042f->u+0430..u+044f, U+0430..u+044f,u+05d0. U+05ea, u+0531..u+0556->u+0561..u+0586, u+0561..u+0587, u+0621..u+063a, U+01B9,U+01BF, U+0640..U+064A, U+0660..U+ 0669, u+066e, u+066f, U+0671..u+06d3, u+06f0. u+06ff,u+0904..u+0939, u+0958..u+095f, u+0960..u+0963, u+0966..u+096f, u+097b. U+097F,U+0985..U+09B9, U+09ce, U+09DC. U+09e3, U+09e6. U+09ef, U+0a05. U+0a39, U+0a59. U+0a5e,u+0a66. u+0a6f, U+0a85. U+0AB9, U+0ae0. U+0ae3, U+0ae6. U+0AEF, U+0b05. u+0b39,u+0b5c. U+0b61, u+0b66. u+0b6f, U+0b71, u+0b85. U+0BB9, U+0be6. U+0BF2, U+0C05. U+0c39,u+0c66. u+0c6f, u+0c85. U+0CB9, U+0CDE. U+0ce3, U+0ce6. U+0CEF, u+0d05. U+0d39, U+0d60,u+0d61, U+0d66. u+0d6f, u+0d85. U+0DC6, u+1900..u+1938, u+1946..u+194f, u+a800. u+a805,u+a807. u+a822, U+0386->U+03B1, u+03ac->u+03b1, U+0388->u+03b5, U+03ad->u+03b5,u+0389->u+03b7, U+03AE->U+ 03b7, U+038A->U+03B9, U+0390->U+03B9, U+03AA->U+03B9,U+03AF->U+03B9, U+03CA->U+03B9, U+038C->U+ 03BF, U+03CC->U+03BF, U+038E->U+03C5,U+03AB->U+03C5, u+03b0->U+03C5, U+03cb->u+03c5, U+03cd->u+03c5, U+038f->u+03c9,u+03ce->u+03c9, U+03C2->U+03C3, U+ 0391..u+03a1->u+03b1. U+03c1,u+03a3. U+03a9->u+03c3. U+03C9, u+03b1. U+03C1, U+03C3. U+03C9, U+0e01. U+0e2e,u+0e30. U+0E3A, U+0e40. U+0e45, U+0e47, U+0e50. U+0e59, u+a000. u+a48f, u+4e00. U+9FBF,U+3400..U+4DBF, U+20000..U+2A6DF, u+f900. U+faff, u+2f800. U+2FA1F, U+2e80. U+2eff,u+2f00. U+2FDF, u+3100..u+312f, u+31a0. U+31BF, u+3040..u+309f, u+30a0. U+30ff,u+31f0. U+31FF, u+ac00. U+D7AF, u+1100..u+11ff, u+3130..u+318f, u+a000. u+a48f,u+a490. U+A4CF Ngram_len = 1 Ngram_chars = u+4e00. U+9FBF, U+3400..U+4DBF, U+20000..U+2A6DF, u+f900. u+faff,u+2f800. U+2FA1F, U+2e80. U+2eff, u+2f00. U+2FDF, u+3100..u+312f, u+31a0. u+31bf,u+3040..u+309f, u+30a0. U+30ff,u+31f0. U+31FF, u+ac00. U+D7AF, u+1100..u+11ff,u+3130..u+318f, u+a000. u+a48f, u+a490. U+A4CF}common{}
10. Start Sphinx
Pkill searchd /usr/local/sphinx/bin/indexer--config/usr/local/sphinx/etc/sphinx.conf--all /usr/local/ Sphinx/bin/searchd--config/usr/local/sphinx/etc/sphinx.conf
11. Testing
<?php $key = "Zhang San"; $index = "Users"; $so = Scws_new (); $so->set_charset (' utf-8 '); $so->add_dict (Ini_get (' Scws.default.fpath '). '/dict.utf8.xdb '); $so->set_rule (Ini_get (' Scws.default.fpath '). '/rules.utf8.ini '); $so->set_ignore (TRUE); $so->set_multi (false); $so->set_duality (false); $so->add_dict ('./dd.txt ', scws_xdict_txt); $so->send_text ($key); $words _array = $so->get_result (); $words = ""; foreach ($words _array as $v) { $words = $words. ' | ('. $v [' word ']. ') '; } Add the whole word # $words = ' ('. $key. ') '. $words; $words = Trim ($words, ' | '); $so->close (); Echo ' <p> input: '. $key. ' </p> '; Echo ' <p> participle: '. $words. ' </p> '; $SC = new Sphinxclient (); $SC->setserver (' 127.0.0.1 ', 9312); # $SC->setmatchmode (sph_match_all); $SC->setmatchmode (sph_match_extended); $SC->setarrayresult (TRUE); $res = $sc->query ($words, $index); Print_r ($res);
Reference:
http://www.ibm.com/developerworks/cn/opensource/os-sphinx/
http://ourmysql.com/archives/965
Http://www.cnblogs.com/yjf512/p/3581869.html
Http://www.coreseek.cn/docs/coreseek_4.1-sphinx_2.0.1-beta.html
Http://blog.51yip.com/mysql/1658.html
The full text retrieval of PHP based on SPHINX+SWCS Chinese word segmentation