<? Php /** * Solution * Tool for the first letter of Chinese character and Pinyin * Note: The English string does not change and returns (including numbers) eg. abc123 => abc123 * Chinese character string: return the first character of pinyin. For example, test string => CSZFC * Chinese-English mixed string: return the first character of the Chinese alphabet and the English eg. I j => WIWJ * Eg. * $ Py = new str2PY (); * $ Result = $ py-> getInitials ('Ah, it's just like Ele. Me, I saw it. You know, it's just him uv, I want to be '); */ Class str2PY { Private $ _ pinyins = array ( 176161 => 'A ', 176197 => 'B ', 178193 => 'C ', 180238 => 'D ', 182234 => 'e ', 183162 => 'F ', 184193 => 'G ', 185254 => 'h ', 187247 => 'J ', 191166 => 'K ', 192172 => 'L ', 194232 =>'m ', 196195 => 'N ', 197182 => 'O ', 197190 => 'P ', 198218 => 'Q ', 200187 => 'R ', 200246 =>'s ', 203250 => 'T ', 205218 => 'W ', 206244 => 'X ', 209185 => 'y ', 212209 => 'Z ', ); Private $ _ charset = null; /** * Constructor, specifying the required encoding default: UTF-8 * UTF-8 and gb2312 supported * * @ Param unknown_type $ charset */ Public function _ construct ($ charset = 'utf-8 ') { $ This-> _ charset = $ charset; } /** * Chinese character string substr * * @ Param string $ str * @ Param int $ start * @ Param int $ len * @ Return string */ Private function _ msubstr ($ str, $ start, $ len) { $ Start = $ start * 2; $ Len = $ len * 2; $ Strlen = strlen ($ str ); $ Result = ''; For ($ I = 0; $ I <$ strlen; $ I ++ ){ If ($ I >=$ start & $ I <($ start + $ len )){ If (ord (substr ($ str, $ I, 1)> 129) $ result. = substr ($ str, $ I, 2 ); Else $ result. = substr ($ str, $ I, 1 ); } If (ord (substr ($ str, $ I, 1)> 129) $ I ++; } Return $ result; } /** * The string is partitioned into arrays (Chinese characters or characters in units) * * @ Param string $ str * @ Return array */ Private function _ cutWord ($ str) { $ Words = array (); While ($ str! = "") { If ($ this-> _ isAscii ($ str) {/* non-Chinese */ $ Words [] = $ str [0]; $ Str = substr ($ str, strlen ($ str [0]); } Else { $ Word = $ this-> _ msubstr ($ str, 0, 1 ); $ Words [] = $ word; $ Str = substr ($ str, strlen ($ word )); } } Return $ words; } /** * Determines whether the character is an ascii character. * * @ Param string $ char * @ Return bool */ Private function _ isAscii ($ char) { Return (ord (substr ($ char, 160) <); } /** * Determines whether the first three characters of a string are ascii characters. * * @ Param string $ str * @ Return bool */ Private function _ isAsciis ($ str) { $ Len = strlen ($ str)> = 3? 3: 2; $ Chars = array (); For ($ I = 1; $ I <$ len-1; $ I ++ ){ $ Chars [] = $ this-> _ isAscii ($ str [$ I])? 'Yes': 'no '; } $ Result = array_count_values ($ chars ); If (empty ($ result ['no']) { Return true; } Return false; } /** * Obtain the first character of a Chinese string in Chinese. * * @ Param string $ str * @ Return string */ Public function getInitials ($ str) { If (empty ($ str) return ''; If ($ this-> _ isAscii ($ str [0]) & $ this-> _ isAsciis ($ str )){ Return $ str; } $ Result = array (); If ($ this-> _ charset = 'utf-8 '){ $ Str = iconv ('utf-8', 'gb2312', $ str ); } $ Words = $ this-> _ cutWord ($ str ); Foreach ($ words as $ word) { If ($ this-> _ isAscii ($ word) {/* non-Chinese */ $ Result [] = $ word; Continue; } $ Code = ord (substr ($ word, 0, 1) * 1000 + ord (substr ($ word, 1, 1 )); /* Get the A--Z of the first letter of pinyin */ If ($ I = $ this-> _ search ($ code ))! =-1 ){ $ Result [] = $ this-> _ pinyins [$ I]; } } Return strtoupper (implode ('', $ result )); } Private function _ getChar ($ ascii) { If ($ ascii >=48 & $ ascii <= 57 ){ Return chr ($ ascii);/* number */ } Elseif ($ ascii >=65 & $ ascii <= 90 ){ Return chr ($ ascii);/X A--Z */ } Elseif ($ ascii >=97 & $ ascii <= 122 ){ Return chr ($ ascii-32);/* a -- z */ } Else { Return '-';/* other */ } } /** * Search for the expected Chinese character inner code (gb2312) corresponding to the Pinyin character (bipartite) * * @ Param int $ code * @ Return int */ Private function _ search ($ code) { $ Data = array_keys ($ this-> _ pinyins ); $ Lower = 0; $ Upper = sizeof ($ data)-1; $ Middle = (int) round ($ lower + $ upper)/2 ); If ($ code <$ data [0]) return-1; For (;;){ If ($ lower> $ upper ){ Return $ data [$ lower-1]; } $ Tmp = (int) round ($ lower + $ upper)/2 ); If (! Isset ($ data [$ tmp]) { Return $ data [$ middle]; } Else { $ Middle = $ tmp; } If ($ data [$ middle] <$ code ){ $ Lower = (int) $ middle + 1; } Else if ($ data [$ middle] = $ code ){ Return $ data [$ middle]; } Else { $ Upper = (int) $ middle-1; } } } } ?> |