In a recent project, php is used as the development language for front-end data. in order to sort data by letter, Chinese pinyin must be extracted, I used this project to write a script for converting Chinese characters to pinyin. the script is relatively simple and the annotations are also detailed. I will not talk about it here. I will directly go to the code. Usage:
- $ Py = new PinYin ();
- $ All_py = $ py-> get_all_py ("wu Guowei"); // output ['wu', 'guo', 'Wei'], and the output string calls the join method, join ('', $ all_py)
- $ First_py = $ py-> get_first_py ($ all_py); // output wgw
- $ First_letter = $ py-> get_first_letter ($ all_py); // output w
Source code:
-
- /**
- * + ------------------------------------------------------
- * Converting PHP Chinese characters to pinyin
- * + ------------------------------------------------------
- * Usage:
- * $ Py = new PinYin ();
- * $ All_py = $ py-> get_all_py ("wu Guowei"); // output ['wu', 'guo', 'Wei'], and the output string calls the join method, join ('', $ all_py)
- * $ First_py = $ py-> get_first_py ($ all_py); // output wgw
- * $ First_letter = $ py-> get_first_letter ($ all_py); // output w
- *
- * + ------------------------------------------------------
- */
- Class PinYin
- {
- Private $ dict_list = array (
- 'A' =>-20319, 'Ai' =>-20317, 'any' =>-20304, 'ang '=>-20295, 'ao' =>-20292,
- 'Ba' =>-20283, 'ba' =>-20265, 'ban' =>-20257, 'bang' =>-20242, 'bao' =>-20230, 'bei' =>-20051, 'Ben' =>-20036, 'beng' =>-20032, 'bi' =>-20026, 'bian '=>-20002, 'biao' =>-19990, 'bie '=>-19986, 'bin' =>-19982, 'Bing' =>-19976, 'Bo' =>-19805, 'bu '=>-19784,
- 'CA' =>-19775, 'Cai '=>-19774, 'can' =>-19763, 'cang '=>-19756, 'Cao' =>-19751, 'Ce '=>-19746, 'ceng' =>-19741, 'Cha' =>-19739, 'chai' =>-19728, 'Chan' =>-19725, 'Chang' =>-19715, 'chunao' =>-19540, 'ch' =>-19531, 'Chen' =>-19525, 'cheng' =>-19515, 'chi '=>-19500, 'Chong' =>-19484, 'chou' =>-19479, 'chu' =>-19467, 'chuai' =>-19289, 'Shanghai' =>-19288, 'Shanghai' =>-19281, 'Shanghai' =>-19275, 'chun' =>-19270, 'chuo' =>-19263, 'ci' =>-19261, 'cong' =>-19249, 'cou' =>-19243, 'CU '=>-19242, 'cuan' =>-19238, 'Cui' =>-19235, 'cun' =>-19227, 'cuo '=>-19224,
- 'Da' =>-19218, 'Dai' =>-19212, 'Dan' =>-19038, 'Dang '=>-19023, 'Dao' =>-19018, 'dep' =>-19006, 'deng' =>-19003, 'di' =>-18996, 'Dian' =>-18977, 'diao' =>-18961, 'di' =>-18952, 'Ding' =>-18783, 'diu' =>-18774, 'dong' =>-18773, 'dou' =>-18763, 'du' =>-18756, 'duany' =>-18741, 'dui' =>-18735, 'dun' =>-18731, 'Duo' =>-18722,
- 'E' =>-18710, 'en' =>-18697, 'ER' =>-18696,
- 'FA '=>-18526, 'fan' =>-18518, 'fang '=>-18501, 'fei' =>-18490, 'fen' =>-18478, 'feng' =>-18463, 'fo' =>-18448, 'fou' =>-18447, 'Fu' =>-18446,
- 'GA' =>-18239, 'gai' =>-18237, 'Gan' =>-18231, 'Gang '=>-18220, 'gao' =>-18211, 'ge' =>-18201, 'gei' =>-18184, 'Gen' =>-18183, 'geng' =>-18181, 'gong' =>-18012, 'gou' =>-17997, 'GU' =>-17988, 'gua' =>-17970, 'guai' =>-17964, 'guany' =>-17961, 'guang '=>-17950, 'GU' =>-17947,
- 'Gun' =>-17931, 'guo' =>-17928,
- 'Haa' =>-17922, 'Hai' =>-17759, 'Han' =>-17752, 'hang' =>-17733, 'hao' =>-17730, 'hi' =>-17721, 'hei' =>-17703, 'hangzhou' =>-17701, 'hangzhou' =>-17697, 'Hong '=>-17692, 'hou' =>-17683, 'hu' =>-17676, 'hua' =>-17496, 'huai' =>-17487, 'huany' =>-17482, 'huangt' =>-17468, 'hui' =>-17454,
- 'Hun' =>-17433, 'huo' =>-17427,
- 'Ji '=>-17417, 'Jia' =>-17202, 'jian '=>-17185, 'Jiang' =>-16983, 'jiao' =>-16970, 'jie' =>-16942, 'Jin' =>-16915, 'Jing' =>-16733, 'jiong '=>-16708, 'jiu' =>-16706, 'ju' =>-16689, 'juany' =>-16664, 'jue '=>-16657, 'Jun' =>-16647,
- 'CA' =>-16474, 'Kai' =>-16470, 'kan '=>-16465, 'hang' =>-16459, 'kakao' =>-16452, 'ke' =>-16448, 'Ken' =>-16433, 'keng' =>-16429, 'Kong '=>-16427, 'kou' =>-16423, 'ku '=>-16419, 'kua' =>-16412, 'kuai' =>-16407, 'kua' =>-16403, 'kuang' =>-16401, 'kui' =>-16393, 'Ku' =>-16220, 'kuo' =>-16216,
- & Apos; la & apos; = & apos;-16212, & apos;-16205, & apos; lai & apos; = & apos;-16202, & apos; lan & apos; = & apos;-16187, & apos;-16180, 'le' =>-16171, 'lei' =>-16169, 'leng' =>-16158, 'Lil' =>-16155, 'Lil' =>-15959, 'liany' =>-15958, 'lilang' =>-15944, 'liao' =>-15933, 'Lil' =>-15920, 'lin' =>-15915, 'Ling' =>-15903, 'liu' =>-15889,
- 'Long' =>-15878, 'loud' =>-15707, 'Lu' =>-15701, 'LV' =>-15681, 'luan' =>-15667, 'lue' =>-15661, 'luns '=>-15659, 'luo' =>-15652,
- 'Ma' =>-15640, 'Mai' =>-15631, 'man' =>-15625, 'MANG' =>-15454, 'Mao '=>-15448, 'me' =>-15436, 'MEI' =>-15435, 'Men' =>-15419, 'meng' =>-15416, 'MI' =>-15408, 'miany' =>-15394, 'Miao' =>-15385, 'mie '=>-15377, 'min' =>-15375, 'Ming' =>-15369, 'miu' =>-15363, 'Mo' =>-15362, 'Mou' =>-15183, 'Mu '=>-15180,
- 'Na' =>-15165, 'Nai' =>-15158, 'Nan '=>-15153, 'nang' =>-15150, 'nao' =>-15149, 'nee' =>-15144, 'nei' =>-15143, 'nen' =>-15141, 'neng' =>-15140, 'ni' =>-15139, 'niance' =>-15128, 'niang '=>-15121, 'niao' =>-15119, 'nia' =>-15117, 'nin' =>-15110, 'nin' =>-15109, 'Niu' =>-14941,
- 'Nong' =>-14937, 'Nu '=>-14933, 'nv' =>-14930, 'nuan' =>-14929, 'Nue '=>-14928, 'nuo' =>-14926,
- 'O' =>-14922, 'ou '=>-14921,
- 'Pa' =>-14914, 'Pai '=>-14908, 'pan' =>-14902, 'pang' =>-14894, 'pao' =>-14889, 'pei '=>-14882, 'pen' =>-14873, 'Peng' =>-14871, 'pi '=>-14857, 'pian' =>-14678, 'piao' =>-14674, 'pie' =>-14670, 'pin' =>-14668, 'ping' =>-14663, 'Po' =>-14654, 'put' =>-14645,
- 'Qi' =>-14630, 'qia '=>-14594, 'Qian' =>-14429, 'Qiang '=>-14407, 'qiao' =>-14399, 'qie' =>-14384, 'Qin' =>-14379, 'Qing' =>-14368, 'qiong '=>-14355, 'Qiu' =>-14353, 'qu' =>-14345, 'quany' =>-14170, 'que' =>-14159, 'qun' =>-14151,
- 'Ran' =>-14149, 'rang '=>-14145, 'rao' =>-14140, 're' =>-14137, 'ren' =>-14135, 'reng' =>-14125, 'Ri' =>-14123, 'Rong '=>-14122, 'rou' =>-14112, 'Ru' =>-14109, 'rule' =>-14099, 'Rui '=>-14097, 'run' =>-14094, 'ruo' =>-14092,
- 'Sa' =>-14090, 'Sai '=>-14087, 'san' =>-14083, 'Shanghai' =>-13917, 'sao' =>-13914, 'se' =>-13910, 'sen' =>-13907, 'seng' =>-13906, 'Sha' =>-13905, 'Sha' =>-13896, 'shanc' =>-13894, 'Shang '=>-13878, 'shao' =>-13870, 'Shanghai' =>-13859, 'shen' =>-13847, 'sheng' =>-13831, 'shi' =>-13658, 'Shou' =>-13611, 'shu' =>-13601, 'shua '=>-13406, 'shuai' =>-13404, 'shuance' =>-13400, 'shuang' =>-13398, 'shuis '=>-13395, 'Sha' =>-13391, 'shuo '=>-13387, 'Si' =>-13383, 'song' =>-13367, 'sou' =>-13359, 'su' =>-13356, 'suany' =>-13343, 'sui' =>-13340, 'sun' =>-13329, 'suo' =>-13326,
- 'Ta' =>-13318, 'Tai' =>-13147, 'tan' =>-13138, 'tang' =>-13120, 'ta' =>-13107, 'Te' =>-13096, 'teng' =>-13095, 'ti' =>-13091, 'tiance' =>-13076, 'tiao' =>-13068, 'tie '=>-13063, 'ting' =>-13060, 'tong' =>-12888, 'tou' =>-12875, 'tu' =>-12871, 'tuany' =>-12860, 'Tui '=>-12858, 'tun' =>-12852, 'tuo' =>-12849,
- 'Wa '=>-12838, 'wai' =>-12831, 'wan' =>-12829, 'Wang' =>-12812, 'Wei' =>-12802, 'wen' =>-12607, 'weng' =>-12597, 'wo' =>-12594, 'wu' =>-12585,
- 'Xi' =>-12556, 'CIA' =>-12359, 'xian '=>-12346, 'Xiang' =>-12320, 'Xiao' =>-12300, 'xie' =>-12120, 'sin' =>-12099, 'x' =>-12089, 'xiong' =>-12074, 'Xiu' =>-12067, 'Xu '=>-12058, 'xuany' =>-12039, 'xue' =>-11867, 'xun '=>-11861,
- 'Ya' =>-11847, 'yan' =>-11831, 'Yang' =>-11798, 'yunao' =>-11781, 'Ye '=>-11604, 'yi' =>-11589, 'in' =>-11536, 'in' =>-11358, 'yo' =>-11340, 'Yong '=>-11339, 'you' =>-11324, 'yu' =>-11303, 'yuany' =>-11097, 'yue' =>-11077, 'yun' =>-11067,
- 'Za' =>-11055, 'zai' =>-11052, 'zany' =>-11045, 'zang '=>-11041, 'zao' =>-11038, 'Z' =>-11024, 'Zei' =>-11020, 'zn' =>-11019, 'zeng' =>-11018, 'zha' =>-11014, 'Zhai' =>-10838, 'zhan' =>-10832, 'zhang' =>-10815, 'zhao' =>-10800, 'zhes' =>-10790, 'Zhen' =>-10780, 'zheng' =>-10764, 'zhi' =>-10587, 'zhong' =>-10544, 'zhou' =>-10533, 'zhu' =>-10519, 'zhua' =>-10331, 'zhuai' =>-10329, 'zhua' =>-10328, 'zhuang' =>-10322, 'zhui' =>-10315, 'zhun' =>-10309, 'zhuo' =>-10307, 'zi' =>-10296, 'zong '=>-10281, 'zou' =>-10274, 'Zu' =>-10270, 'zuan' =>-10262,
- 'Zui '=>-10260, 'Zun' =>-10256, 'zuo' =>-10254
- );
- /**
- * Obtain all pinyin, and return an array of pinyin, for example, 'Zhang sanfeng' ==> ['Zhang ', 'San', 'feng']
- * @ Param $ chinese
- * @ Param string $ charset
- * @ Return array
- */
- Public function get_all_py ($ chinese, $ charset = 'utf-8 ')
- {
- If ($ charset! = 'Gb2312') $ chinese = $ this-> _ U2_Utf8_Gb ($ chinese );
- $ Py = $ this-> zh_to_pys ($ chinese );
- Return $ py;
- }
- /**
- * Obtain the first letter of pinyin, such as ['zhang', 'san', 'feng'] ==> zsf
- * @ Param $ all_pys
- * @ Return string
- */
- Public function get_first_py ($ all_pys)
- {
- If (count ($ all_pys) <= 0 ){
- Return '';
- }
- $ Result = [];
- Foreach ($ all_pys as $ one ){
- If (is_null ($ one) | strlen ($ one) <= 0 ){
- Continue;
- }
- $ Result [] = substr ($ one, 0, 1 );
- }
- Return join ('', $ result );
- }
- /**
- * Obtain the first letter of pinyin, for example, ['zhang', 'san', 'feng'] => z
- * @ Param $ all_pys
- * @ Return string
- */
- Public function get_first_letter ($ all_pys)
- {
- If (count ($ all_pys) <= 0 ){
- Return '';
- }
- Foreach ($ all_pys as $ one ){
- If (is_null ($ one) | strlen ($ one) <= 0 ){
- Continue;
- }
- Return substr ($ one, 0, 1 );
- }
- Return '';
- }
- Private function _ U2_Utf8_Gb ($ _ C)
- {
- $ _ String = '';
- If ($ _ C <0x80) $ _ String. = $ _ C;
- Elseif ($ _ C <0x800 ){
- $ _ String. = chr (0xC0 | $ _ C> 6 );
- $ _ String. = chr (0x80 | $ _ C & 0x3F );
- } Elseif ($ _ C <0x10000 ){
- $ _ String. = chr (0xE0 | $ _ C> 12 );
- $ _ String. = chr (0x80 | $ _ C> 6 & 0x3F );
- $ _ String. = chr (0x80 | $ _ C & 0x3F );
- } Elseif ($ _ C <0x200000 ){
- $ _ String. = chr (0xF0 | $ _ C> 18 );
- $ _ String. = chr (0x80 | $ _ C> 12 & 0x3F );
- $ _ String. = chr (0x80 | $ _ C> 6 & 0x3F );
- $ _ String. = chr (0x80 | $ _ C & 0x3F );
- }
- Return iconv ('utf-8', 'gb2312 ', $ _ String );
- }
- Private function zh_to_py ($ num, $ blank = '')
- {
- If ($ num> 0 & $ num <160 ){
- Return chr ($ num );
- } Elseif ($ num <-20319 | $ num>-10247 ){
- Return $ blank;
- } Else {
- Foreach ($ this-> dict_list as $ py => $ code ){
- If ($ code> $ num) break;
- $ Result = $ py;
- }
- Return $ result;
- }
- }
- Private function zh_to_pys ($ chinese)
- {
- $ Result = array ();
- For ($ I = 0; $ I <strlen ($ chinese); $ I ++ ){
- $ P = ord (substr ($ chinese, $ I, 1 ));
- If ($ p & gt; 160 ){
- $ Q = ord (substr ($ chinese, ++ $ I, 1 ));
- $ P = $ p * 256 + $ q-65536;
- }
- $ Result [] = $ this-> zh_to_py ($ p );
- }
- Return $ result;
- }
- }
|