This article describes how to use php to convert Chinese characters into first letters. The main functions are: clear functions, easy modification, maintenance, and expansion; English strings: Unchanged return (including numbers); Chinese strings: returns the first character of the Chinese alphabet.
This article describes how to use php to convert Chinese characters into first letters. The main functions are: clear functions, easy modification, maintenance, and expansion; English strings: Unchanged return (including numbers); Chinese strings: returns the first character of the Chinese alphabet.
There are many methods on the Internet, which share the same principle. According to the requirements, we have made a version of class files. The main functions are: clear functions, easy to modify, maintain, and expand; english string: returns unchanged (including numbers); Chinese string: returns the first character of Pinyin; Chinese-English string: returns the first character of Pinyin and English. This algorithm uses a binary search method to fix the error of reading the letter Z to Y. Good things need to be added to favorites, so we leave a mark here for future generations to study!
The Code is as follows:
/**
* Modified by @ 2013-05-07
* Solution
* Tool for the first letter of Chinese Character and Pinyin
* Note: The English string does not change and returns (including numbers) eg. abc123 => abc123
* Chinese character string: return the first character of Pinyin. For example, test string => CSZFC
* Chinese-English mixed string: return the first character of the Chinese alphabet and the English eg. I j => WIWJ
* Eg.
* $ Py = new str2PY ();
* $ Result = $ py-> getInitials ('Ah, it's just like ELE. Me, I saw it. You know, it's just him uv, I Want To Be ');
*/
Class str2PY
{
Private $ _ pinyins = array (
176161 => 'A ',
176197 => 'B ',
178193 => 'C ',
180238 => 'D ',
182234 => 'E ',
183162 => 'F ',
184193 => 'G ',
185254 => 'h ',
187247 => 'J ',
191166 => 'k ',
192172 => 'l ',
194232 => 'M ',
196195 => 'n ',
197182 => 'O ',
197190 => 'P ',
198218 => 'Q ',
200187 => 'R ',
200246 =>'s ',
203250 => 'T ',
205218 => 'w ',
206244 => 'x ',
209185 => 'y ',
212209 => 'Z ',
);
Private $ _ charset = null;
/**
* Constructor, specifying the required encoding default: UTF-8
* UTF-8 and gb2312 supported
*
* @ Param unknown_type $ charset
*/
Public function _ construct ($ charset = 'utf-8 ')
{
$ This-> _ charset = $ charset;
}
/**
* Chinese character string substr
*
* @ Param string $ str
* @ Param int $ start
* @ Param int $ len
* @ Return string
*/
Private function _ msubstr ($ str, $ start, $ len)
{
$ Start = $ start * 2;
$ Len = $ len * 2;
$ Strlen = strlen ($ str );
$ Result = '';
For ($ I = 0; $ I <$ strlen; $ I ++ ){
If ($ I >=$ start & $ I <($ start + $ len )){
If (ord (substr ($ str, $ I, 1)> 129) $ result. = substr ($ str, $ I, 2 );
Else $ result. = substr ($ str, $ I, 1 );
}
If (ord (substr ($ str, $ I, 1)> 129) $ I ++;
}
Return $ result;
}
/**
* The string is partitioned into arrays (Chinese characters or characters in units)
*
* @ Param string $ str
* @ Return array
*/
Private function _ cutWord ($ str)
{
$ Words = array ();
While ($ str! = "")
{
If ($ this-> _ isAscii ($ str) {/* non-Chinese */
$ Words [] = $ str [0];
$ Str = substr ($ str, strlen ($ str [0]);
} Else {
$ Word = $ this-> _ msubstr ($ str, 0, 1 );
$ Words [] = $ word;
$ Str = substr ($ str, strlen ($ word ));
}
}
Return $ words;
}
/**
* Determines whether the character is an ascii character.
*
* @ Param string $ char
* @ Return bool
*/
Private function _ isAscii ($ char)
{
Return (ord (substr ($ char, 160) <);
}
/**
* Determines whether the first three characters of a string are ascii characters.
*
* @ Param string $ str
* @ Return bool
*/
Private function _ isAsciis ($ str)
{
$ Len = strlen ($ str)> = 3? 3: 2;
$ Chars = array ();
For ($ I = 1; $ I <$ len-1; $ I ++ ){
$ Chars [] = $ this-> _ isAscii ($ str [$ I])? 'Yes': 'no ';
}
$ Result = array_count_values ($ chars );
If (empty ($ result ['no']) {
Return true;
}
Return false;
}
/**
* Obtain the first character of a Chinese string in Chinese.
*
* @ Param string $ str
* @ Return string
*/
Public function getInitials ($ str)
{
If (empty ($ str) return '';
If ($ this-> _ isAscii ($ str [0]) & $ this-> _ isAsciis ($ str )){
Return $ str;
}
$ Result = array ();
If ($ this-> _ charset = 'utf-8 '){
$ Str = iconv ('utf-8', 'gb2312', $ str );
}
$ Words = $ this-> _ cutWord ($ str );
Foreach ($ words as $ word)
{
If ($ this-> _ isAscii ($ word) {/* non-Chinese */
$ Result [] = $ word;
Continue;
}
$ Code = ord (substr ($ word, 0, 1) * 1000 + ord (substr ($ word, 1, 1 ));
/* Get the A--Z of the first letter of Pinyin */
If ($ I = $ this-> _ search ($ code ))! =-1 ){
$ Result [] = $ this-> _ pinyins [$ I];
}
}
Return strtoupper (implode ('', $ result ));
}
Private function _ getChar ($ ascii)
{
If ($ ascii >=48 & $ ascii <= 57 ){
Return chr ($ ascii);/* Number */
} Elseif ($ ascii >=65 & $ ascii <= 90 ){
Return chr ($ ascii);/X A--Z */
} Elseif ($ ascii >=97 & $ ascii <= 122 ){
Return chr ($ ascii-32);/* a -- z */
} Else {
Return '-';/* Other */
}
}
/**
* Search for the expected Chinese Character inner code (gb2312) corresponding to the pinyin character (bipartite)
*
* @ Param int $ code
* @ Return int
*/
Private function _ search ($ code)
{
$ Data = array_keys ($ this-> _ pinyins );
$ Lower = 0;
$ Upper = sizeof ($ data)-1;
$ Middle = (int) round ($ lower + $ upper)/2 );
If ($ code <$ data [0]) return-1;
For (;;){
If ($ lower> $ upper ){
Return $ data [$ lower-1];
}
$ Tmp = (int) round ($ lower + $ upper)/2 );
If (! Isset ($ data [$ tmp]) {
Return $ data [$ middle];
} Else {
$ Middle = $ tmp;
}
If ($ data [$ middle] <$ code ){
$ Lower = (int) $ middle + 1;
} Else if ($ data [$ middle] = $ code ){
Return $ data [$ middle];
} Else {
$ Upper = (int) $ middle-1;
}
}
}
}
?>