/** * Fix the method of finding binary method * Chinese Pinyin first Letter tool class * Note: The English string: Unchanged returns (including numbers) eg. abc123 = abc123 * Chinese string: Returns the pinyin first character eg. Test String = = CSZFC * Chinese-English mixed string: Returns the pinyin first character and English eg. I i i j = wiwj * eg. * $py = new str2py (); * $result = $py->getinitials (' Ah, just the hungry fly just fine I saw you oh flat to people is he UV I want to one in '); */ Class Str2py { Private $_pinyins = Array ( 176161 = ' A ', 176197 = ' B ', 178193 = ' C ', 180238 = ' D ', 182234 = ' E ', 183162 = ' F ', 184193 = ' G ', 185254 = ' H ', 187247 = ' J ', 191166 = ' K ', 192172 = ' L ', 194232 = ' M ', 196195 = ' N ', 197182 = ' O ', 197190 = ' P ', 198218 = ' Q ', 200187 = ' R ', 200246 = ' S ', 203250 = ' T ', 205218 = ' W ', 206244 = ' X ', 209185 = ' Y ', 212209 = ' Z ', ); Private $_charset = null; /** * constructor, specifying the required encoding Default:utf-8 * Support Utf-8, gb2312 * * @param unknown_type $charset */ Public function __construct ($charset = ' utf-8 ') { $this->_charset = $charset; } /** * Chinese string substr * * @param string $str * @param int $start * @param int $len * @return String */ Private Function _msubstr ($str, $start, $len) { $start = $start * 2; $len = $len * 2; $strlen = strlen ($STR); $result = "; for ($i = 0; $i < $strlen; $i + +) { if ($i >= $start && $i < ($start + $len)) { if (Ord (substr ($str, $i, 1)) > 129) $result. = substr ($str, $i, 2); else $result. = substr ($str, $i, 1); } if (Ord (substr ($str, $i, 1)) > 129) $i + +; } return $result; } /** * string is divided into arrays (Chinese characters or one character units) * * @param string $str * @return Array */ Private Function _cutword ($STR) { $words = Array (); while ($str! = "") { if ($this->_isascii ($STR)) {/* Non-Chinese */ $words [] = $str [0]; $str = substr ($str, strlen ($str [0]); }else{ $word = $this->_msubstr ($str, 0, 1); $words [] = $word; $str = substr ($str, strlen ($word)); } } return $words; } /** * Determines whether a character is an ASCII character * * @param string $char * @return BOOL */ Private Function _isascii ($char) { Return (Ord (substr ($char, 0,1)) < 160); } /** * Determines whether the first 3 characters of a string are ASCII characters * * @param string $str * @return BOOL */ Private Function _isasciis ($STR) { $len = strlen ($str) >= 3? 3:2; $chars = Array (); for ($i = 1; $i < $len-1; $i + +) { $chars [] = $this->_isascii ($str [$i])? ' Yes ': ' No '; } $result = Array_count_values ($chars); if (Empty ($result [' no ')]) { return true; } return false; } /** * Get the Pinyin first character of Chinese characters string * * @param string $str * @return String */ Public Function getinitials ($STR) { if (empty ($STR)) return '; if ($this->_isascii ($str [0]) && $this->_isasciis ($str)) { return $str; } $result = Array (); if ($this->_charset = = ' Utf-8 ') { $str = Iconv (' utf-8 ', ' gb2312 ', $str); } $words = $this->_cutword ($STR); foreach ($words as $word) { if ($this->_isascii ($word)) {/* Non-Chinese */ $result [] = $word; Continue } $code = Ord (substr ($word, 0,1)) * + ord (substr ($word, 1, 1)); /* Get pinyin initials a--z*/ if ($i = $this->_search ($code))! =-1) { $result [] = $this->_pinyins[$i]; } } Return Strtoupper (Implode (", $result)); } Private Function _getchar ($ASCII) { if ($ascii >= && $ascii <= 57) { return Chr ($ASCII); /* Number */ }elseif ($ascii >=65 && $ascii <=90) { return Chr ($ASCII); /* a--z*/ }elseif ($ascii >=97 && $ascii <=122) { Return Chr ($ascii-32); /* a--z*/ }else{ Return '-'; /* Other */ } } /** * Find the phonetic characters (gb2312) corresponding to the required Chinese characters (binary method) * * @param int $code * @return int */ Private Function _search ($code) { $data = Array_keys ($this->_pinyins); $lower = 0; $upper = sizeof ($data)-1; $middle = (int) round (($lower + $upper)/2); if ($code < $data [0]) return-1; for (;;) { if ($lower > $upper) { return $data [$lower-1]; } $tmp = (int) round (($lower + $upper)/2); if (!isset ($data [$tmp])) { return $data [$middle]; }else{ $middle = $tmp; } if ($data [$middle] < $code) { $lower = (int) $middle + 1; }else if ($data [$middle] = = $code) { return $data [$middle]; }else{ $upper = (int) $middle-1; } } } } ?> |