<? Php /** * Tool for the first letter of Chinese character and Pinyin * Note: The English string does not change and returns (including numbers) eg. abc123 => abc123 * Chinese character string: return the first character of pinyin, for example, Hunan => HN * Chinese-English mixed string: return the first character of the Chinese alphabet and the English eg. I j => WIWJ * Eg. * $ Py = new str2PY (); * * $ Result = $ py-> getInitials ('Jay Chou '); * * // Obtain the first letter * $ Result = $ py-> getFirstString ('ABC'); // * $ Resutl = $ py-> getFirstString ("Yunqi community"); // y * */
Class str2py {
Private $ _ pinyins = array ( => 'A ', => 'B ', => 'C ', => 'D ', => 'E ', => 'F ', => 'G ', => 'H ', => 'J ', => 'K ', => 'L ', =>'M ', => 'N ', => 'O ', => 'P ', => 'Q ', => 'R ', =>'S ', => 'T ', => 'W ', => 'X ', => 'Y ', => 'Z ', ); Private $ _ charset = null;
/** * Constructor, specifying the required encoding default: UTF-8 * UTF-8 and gb2312 supported * * @ Param unknown_type $ charset */ Public function _ construct ($ charset = 'utf-8 ') { $ This-> _ charset = $ charset; }
/** * Chinese character string substr * * @ Param string $ str * @ Param int $ start * @ Param int $ len * @ Return string */ Private function _ msubstr ($ str, $ start, $ len) { $ Start = $ start * 2; $ Len = $ len * 2; $ Strlen = strlen ($ str ); $ Result = ''; For ($ I = 0; $ I <$ strlen; $ I ++) { If ($ I >=$ start & $ I <($ start + $ len )) { If (ord (substr ($ str, $ I, 1)> 129) { $ Result. = substr ($ str, $ I, 2 ); } Else { $ Result. = substr ($ str, $ I, 1 ); } } If (ord (substr ($ str, $ I, 1)> 129) { $ I ++; } } Return $ result; }
/** * The string is partitioned into arrays (Chinese characters or characters in units) * * @ Param string $ str * @ Return array */ Private function _ cutWord ($ str) { $ Words = array (); While ($ str! = "") { If ($ this-> _ isAscii ($ str )) {/* Non-Chinese */ $ Words [] = $ str [0]; $ Str = substr ($ str, strlen ($ str [0]); } Else { $ Word = $ this-> _ msubstr ($ str, 0, 1 ); $ Words [] = $ word; $ Str = substr ($ str, strlen ($ word )); } } Return $ words; }
/** * Determines whether the character is an ascii character. * * @ Param string $ char * @ Return bool */ Private function _ isAscii ($ char) { Return (ord (substr ($ char, 0, 1) <160 ); }
/** * Determines whether the first three characters of a string are ascii characters. * * @ Param string $ str * @ Return bool */ Private function _ isAsciis ($ str) { $ Len = strlen ($ str)> = 3? 3: 2; $ Chars = array (); For ($ I = 1; $ I <$ len-1; $ I ++) { $ Chars [] = $ this-> _ isAscii ($ str [$ I])? 'Yes': 'no '; } $ Result = array_count_values ($ chars ); If (empty ($ result ['no']) { Return true; } Return false; }
/** * Obtain the first character of a Chinese string in Chinese. * * @ Param string $ str * @ Return string */ Public function getInitials ($ str) { If (empty ($ str )) Return ''; If ($ this-> _ isAscii ($ str [0]) & $ this-> _ isAsciis ($ str )) { Return $ str; } $ Result = array (); If ($ this-> _ charset = 'utf-8 ') { $ Str = iconv ('utf-8', 'gb2312', $ str ); } $ Words = $ this-> _ cutWord ($ str ); Foreach ($ words as $ word) { If ($ this-> _ isAscii ($ word )) {/* Non-Chinese */ $ Result [] = $ word; Continue; } $ Code = ord (substr ($ word, 0, 1) * 1000 + ord (substr ($ word, 1, 1 )); /* Get the A--Z of the first letter of pinyin */ If ($ I = $ this-> _ search ($ code ))! =-1) { $ Result [] = $ this-> _ pinyins [$ I]; } } Return strtoupper (implode ('', $ result )); }
/** * 20140624 get the first letter of wangtianbao * @ Param string $ str * @ Return string */ Public function getFirstString ($ str) { // Convert Chinese characters into letters first $ New_string = $ this-> getInitials ($ str );
If (empty ($ new_string )) { Return ''; } Else { Return strtoupper (substr ($ new_string, 0, 1 )); } }
Private function _ getChar ($ ascii) { If ($ ascii >=48 & $ ascii <= 57) { Return chr ($ ascii);/* number */ } Elseif ($ ascii >=65 & $ ascii <= 90) { Return chr ($ ascii);/X A--Z */ } Elseif ($ ascii >=97 & $ ascii <= 122) { Return chr ($ ascii-32);/* a -- z */ } Else { Return '-';/* other */ } }
/** * Search for the expected Chinese character inner code (gb2312) corresponding to the Pinyin character (bipartite) * * @ Param int $ code * @ Return int */ Private function _ search ($ code) { $ Data = array_keys ($ this-> _ pinyins ); $ Lower = 0; $ Upper = sizeof ($ data)-1; $ Middle = (int) round ($ lower + $ upper)/2 ); If ($ code <$ data [0]) Return-1; For (;;) { If ($ lower> $ upper) { Return $ data [$ lower-1]; } $ Tmp = (int) round ($ lower + $ upper)/2 ); If (! Isset ($ data [$ tmp]) { Return $ data [$ middle]; } Else { $ Middle = $ tmp; } If ($ data [$ middle] <$ code) { $ Lower = (int) $ middle + 1; } Else if ($ data [$ middle] = $ code) { Return $ data [$ middle]; } Else { $ Upper = (int) $ middle-1; } } }
} |