One dollar participle: refers to each word in the sentence into words, by word segmentation, not to combine, similar to English words. UTF8 encodes the next character if the first character ASCII code is not greater than 192, it only accounts for 1 bytes. If the first character ASCII code greater than 192 is less than 224 to occupy 2 bytes, or take up 3 bytes, the unary participle needs to be added in the MySQL my.ini file ft_min_word_len=1
The code is as follows:
/** * Unary Word algorithm * You can use the MySQL query statement show variables like '%ft% ' view MySQL full text search Related Settings * * @access global * @param string $STR * @param b Oolean $unique whether to remove duplicate values * @param boolean $merge whether to combine added value * @return Array */function Seg_word ($str, $unique =false, $merge =true {$str = Trim (Strip_tags ($STR)), $strlen = strlen ($STR), if ($strlen = = 0) return array (), $SPC = ";//increase the number of characters that need to be filtered as needed $sea RCH = Array (', ', '/', ' \ \ ', '. ', '; ', ': ', ' \ ', '! '), ' ~ ', ' ' ' ', ' ', ' ^ ', ' (', ') ', '? ', '-', ' \ t ', ' \ n ', ' \ ', ' < ', ' &G t; ', ' \ R ', ' \ r \ n ', ' \$ ', ' & ', '% ', ' # ', ' @ ', ' + ', ' = ', ' {', '} ', ' [', '] ', '. ', '。 ', ',', '! ', ';', '“', '”', '‘', ''', '[', ']', '、', '—', ' ', '《', '》', '-', '...', '【', '】',':'); $numpairs = Array (' 1 ' = ' one ', ' 2 ' = ' two ', ' 3 ' = ' three ', ' 4 ' = ' Four ', ' 5 ' = ' five ', ' 6 ' = ' Six ', ' 7 ' = ' seven ', ' 8 ' = ' = ') Eight ', ' 9 ' = ' nine ', ' 0 ' = ' 0 '); $str = Alab_num ($STR); $str = Str_replace ($search, ", $STR); $ord = $i = $k = 0; $prechar = 0;//0-blank 1-English and symbol Chinese $result = array (); $annex = Array (); while ($ord = Ord ($str [$i]){//1 Byte character if ($ord <= 0xC0) {//Remove empty string if ($ord <) {$prechar =0, $i + +; $k + +; continue;}//Append Chinese capital number conversion if (Isset ( $numpairs [$STR [$i]]) {$annex []= $numpairs [$STR [$i]];}//If the previous Chinese if ($prechar = = 2) {$result [+ + $k] = $str [$i];} else { $result [$k]. = $str [$i]; } $prechar = 1; $i + +; The Else//2-3 Byte character (Chinese) {if ($ord < 0xE0) $step = 2; else $step = 3; $c = substr ($str, $i, $step); if (false!== $key = array _search ($c, $numpairs)) {$annex [] = $key;} if ($prechar! = 0) {$result [+ + $k] = $c;} else {$result [$k]. = $c;} $prechar = 2; $i + = $step; }} $result = $merge? Array_merge ($result, $annex): $result; Return $unique? Array_unique ($result): $result; }