PHP Chinese character to pinyin source code (GB2312 class library, supporting about 6000 Chinese characters)

Source: Internet
Author: User
PHP Chinese character to pinyin source code (GB2312 class library, supporting about 6000 Chinese characters)

In a recent project, php is used as the development language for front-end data. in order to sort data by letter, Chinese pinyin must be extracted, I used this project to write a script for converting Chinese characters to pinyin. the script is relatively simple and the annotations are also detailed. I will not talk about it here. I will directly go to the code.

Usage:

  1. $ Py = new PinYin ();
  2. $ All_py = $ py-> get_all_py ("wu Guowei"); // output ['wu', 'guo', 'Wei'], and the output string calls the join method, join ('', $ all_py)
  3. $ First_py = $ py-> get_first_py ($ all_py); // output wgw
  4. $ First_letter = $ py-> get_first_letter ($ all_py); // output w



Source code:

  1. /**
  2. * + ------------------------------------------------------
  3. * Converting PHP Chinese characters to pinyin
  4. * + ------------------------------------------------------
  5. * Usage:
  6. * $ Py = new PinYin ();
  7. * $ All_py = $ py-> get_all_py ("wu Guowei"); // output ['wu', 'guo', 'Wei'], and the output string calls the join method, join ('', $ all_py)
  8. * $ First_py = $ py-> get_first_py ($ all_py); // output wgw
  9. * $ First_letter = $ py-> get_first_letter ($ all_py); // output w
  10. *
  11. * + ------------------------------------------------------
  12. */
  13. Class PinYin
  14. {
  15. Private $ dict_list = array (
  16. 'A' =>-20319, 'Ai' =>-20317, 'any' =>-20304, 'ang '=>-20295, 'ao' =>-20292,
  17. 'Ba' =>-20283, 'ba' =>-20265, 'ban' =>-20257, 'bang' =>-20242, 'bao' =>-20230, 'bei' =>-20051, 'Ben' =>-20036, 'beng' =>-20032, 'bi' =>-20026, 'bian '=>-20002, 'biao' =>-19990, 'bie '=>-19986, 'bin' =>-19982, 'Bing' =>-19976, 'Bo' =>-19805, 'bu '=>-19784,
  18. 'CA' =>-19775, 'Cai '=>-19774, 'can' =>-19763, 'cang '=>-19756, 'Cao' =>-19751, 'Ce '=>-19746, 'ceng' =>-19741, 'Cha' =>-19739, 'chai' =>-19728, 'Chan' =>-19725, 'Chang' =>-19715, 'chunao' =>-19540, 'ch' =>-19531, 'Chen' =>-19525, 'cheng' =>-19515, 'chi '=>-19500, 'Chong' =>-19484, 'chou' =>-19479, 'chu' =>-19467, 'chuai' =>-19289, 'Shanghai' =>-19288, 'Shanghai' =>-19281, 'Shanghai' =>-19275, 'chun' =>-19270, 'chuo' =>-19263, 'ci' =>-19261, 'cong' =>-19249, 'cou' =>-19243, 'CU '=>-19242, 'cuan' =>-19238, 'Cui' =>-19235, 'cun' =>-19227, 'cuo '=>-19224,
  19. 'Da' =>-19218, 'Dai' =>-19212, 'Dan' =>-19038, 'Dang '=>-19023, 'Dao' =>-19018, 'dep' =>-19006, 'deng' =>-19003, 'di' =>-18996, 'Dian' =>-18977, 'diao' =>-18961, 'di' =>-18952, 'Ding' =>-18783, 'diu' =>-18774, 'dong' =>-18773, 'dou' =>-18763, 'du' =>-18756, 'duany' =>-18741, 'dui' =>-18735, 'dun' =>-18731, 'Duo' =>-18722,
  20. 'E' =>-18710, 'en' =>-18697, 'ER' =>-18696,
  21. 'FA '=>-18526, 'fan' =>-18518, 'fang '=>-18501, 'fei' =>-18490, 'fen' =>-18478, 'feng' =>-18463, 'fo' =>-18448, 'fou' =>-18447, 'Fu' =>-18446,
  22. 'GA' =>-18239, 'gai' =>-18237, 'Gan' =>-18231, 'Gang '=>-18220, 'gao' =>-18211, 'ge' =>-18201, 'gei' =>-18184, 'Gen' =>-18183, 'geng' =>-18181, 'gong' =>-18012, 'gou' =>-17997, 'GU' =>-17988, 'gua' =>-17970, 'guai' =>-17964, 'guany' =>-17961, 'guang '=>-17950, 'GU' =>-17947,
  23. 'Gun' =>-17931, 'guo' =>-17928,
  24. 'Haa' =>-17922, 'Hai' =>-17759, 'Han' =>-17752, 'hang' =>-17733, 'hao' =>-17730, 'hi' =>-17721, 'hei' =>-17703, 'hangzhou' =>-17701, 'hangzhou' =>-17697, 'Hong '=>-17692, 'hou' =>-17683, 'hu' =>-17676, 'hua' =>-17496, 'huai' =>-17487, 'huany' =>-17482, 'huangt' =>-17468, 'hui' =>-17454,
  25. 'Hun' =>-17433, 'huo' =>-17427,
  26. 'Ji '=>-17417, 'Jia' =>-17202, 'jian '=>-17185, 'Jiang' =>-16983, 'jiao' =>-16970, 'jie' =>-16942, 'Jin' =>-16915, 'Jing' =>-16733, 'jiong '=>-16708, 'jiu' =>-16706, 'ju' =>-16689, 'juany' =>-16664, 'jue '=>-16657, 'Jun' =>-16647,
  27. 'CA' =>-16474, 'Kai' =>-16470, 'kan '=>-16465, 'hang' =>-16459, 'kakao' =>-16452, 'ke' =>-16448, 'Ken' =>-16433, 'keng' =>-16429, 'Kong '=>-16427, 'kou' =>-16423, 'ku '=>-16419, 'kua' =>-16412, 'kuai' =>-16407, 'kua' =>-16403, 'kuang' =>-16401, 'kui' =>-16393, 'Ku' =>-16220, 'kuo' =>-16216,
  28. & Apos; la & apos; = & apos;-16212, & apos;-16205, & apos; lai & apos; = & apos;-16202, & apos; lan & apos; = & apos;-16187, & apos;-16180, 'le' =>-16171, 'lei' =>-16169, 'leng' =>-16158, 'Lil' =>-16155, 'Lil' =>-15959, 'liany' =>-15958, 'lilang' =>-15944, 'liao' =>-15933, 'Lil' =>-15920, 'lin' =>-15915, 'Ling' =>-15903, 'liu' =>-15889,
  29. 'Long' =>-15878, 'loud' =>-15707, 'Lu' =>-15701, 'LV' =>-15681, 'luan' =>-15667, 'lue' =>-15661, 'luns '=>-15659, 'luo' =>-15652,
  30. 'Ma' =>-15640, 'Mai' =>-15631, 'man' =>-15625, 'MANG' =>-15454, 'Mao '=>-15448, 'me' =>-15436, 'MEI' =>-15435, 'Men' =>-15419, 'meng' =>-15416, 'MI' =>-15408, 'miany' =>-15394, 'Miao' =>-15385, 'mie '=>-15377, 'min' =>-15375, 'Ming' =>-15369, 'miu' =>-15363, 'Mo' =>-15362, 'Mou' =>-15183, 'Mu '=>-15180,
  31. 'Na' =>-15165, 'Nai' =>-15158, 'Nan '=>-15153, 'nang' =>-15150, 'nao' =>-15149, 'nee' =>-15144, 'nei' =>-15143, 'nen' =>-15141, 'neng' =>-15140, 'ni' =>-15139, 'niance' =>-15128, 'niang '=>-15121, 'niao' =>-15119, 'nia' =>-15117, 'nin' =>-15110, 'nin' =>-15109, 'Niu' =>-14941,
  32. 'Nong' =>-14937, 'Nu '=>-14933, 'nv' =>-14930, 'nuan' =>-14929, 'Nue '=>-14928, 'nuo' =>-14926,
  33. 'O' =>-14922, 'ou '=>-14921,
  34. 'Pa' =>-14914, 'Pai '=>-14908, 'pan' =>-14902, 'pang' =>-14894, 'pao' =>-14889, 'pei '=>-14882, 'pen' =>-14873, 'Peng' =>-14871, 'pi '=>-14857, 'pian' =>-14678, 'piao' =>-14674, 'pie' =>-14670, 'pin' =>-14668, 'ping' =>-14663, 'Po' =>-14654, 'put' =>-14645,
  35. 'Qi' =>-14630, 'qia '=>-14594, 'Qian' =>-14429, 'Qiang '=>-14407, 'qiao' =>-14399, 'qie' =>-14384, 'Qin' =>-14379, 'Qing' =>-14368, 'qiong '=>-14355, 'Qiu' =>-14353, 'qu' =>-14345, 'quany' =>-14170, 'que' =>-14159, 'qun' =>-14151,
  36. 'Ran' =>-14149, 'rang '=>-14145, 'rao' =>-14140, 're' =>-14137, 'ren' =>-14135, 'reng' =>-14125, 'Ri' =>-14123, 'Rong '=>-14122, 'rou' =>-14112, 'Ru' =>-14109, 'rule' =>-14099, 'Rui '=>-14097, 'run' =>-14094, 'ruo' =>-14092,
  37. 'Sa' =>-14090, 'Sai '=>-14087, 'san' =>-14083, 'Shanghai' =>-13917, 'sao' =>-13914, 'se' =>-13910, 'sen' =>-13907, 'seng' =>-13906, 'Sha' =>-13905, 'Sha' =>-13896, 'shanc' =>-13894, 'Shang '=>-13878, 'shao' =>-13870, 'Shanghai' =>-13859, 'shen' =>-13847, 'sheng' =>-13831, 'shi' =>-13658, 'Shou' =>-13611, 'shu' =>-13601, 'shua '=>-13406, 'shuai' =>-13404, 'shuance' =>-13400, 'shuang' =>-13398, 'shuis '=>-13395, 'Sha' =>-13391, 'shuo '=>-13387, 'Si' =>-13383, 'song' =>-13367, 'sou' =>-13359, 'su' =>-13356, 'suany' =>-13343, 'sui' =>-13340, 'sun' =>-13329, 'suo' =>-13326,
  38. 'Ta' =>-13318, 'Tai' =>-13147, 'tan' =>-13138, 'tang' =>-13120, 'ta' =>-13107, 'Te' =>-13096, 'teng' =>-13095, 'ti' =>-13091, 'tiance' =>-13076, 'tiao' =>-13068, 'tie '=>-13063, 'ting' =>-13060, 'tong' =>-12888, 'tou' =>-12875, 'tu' =>-12871, 'tuany' =>-12860, 'Tui '=>-12858, 'tun' =>-12852, 'tuo' =>-12849,
  39. 'Wa '=>-12838, 'wai' =>-12831, 'wan' =>-12829, 'Wang' =>-12812, 'Wei' =>-12802, 'wen' =>-12607, 'weng' =>-12597, 'wo' =>-12594, 'wu' =>-12585,
  40. 'Xi' =>-12556, 'CIA' =>-12359, 'xian '=>-12346, 'Xiang' =>-12320, 'Xiao' =>-12300, 'xie' =>-12120, 'sin' =>-12099, 'x' =>-12089, 'xiong' =>-12074, 'Xiu' =>-12067, 'Xu '=>-12058, 'xuany' =>-12039, 'xue' =>-11867, 'xun '=>-11861,
  41. 'Ya' =>-11847, 'yan' =>-11831, 'Yang' =>-11798, 'yunao' =>-11781, 'Ye '=>-11604, 'yi' =>-11589, 'in' =>-11536, 'in' =>-11358, 'yo' =>-11340, 'Yong '=>-11339, 'you' =>-11324, 'yu' =>-11303, 'yuany' =>-11097, 'yue' =>-11077, 'yun' =>-11067,
  42. 'Za' =>-11055, 'zai' =>-11052, 'zany' =>-11045, 'zang '=>-11041, 'zao' =>-11038, 'Z' =>-11024, 'Zei' =>-11020, 'zn' =>-11019, 'zeng' =>-11018, 'zha' =>-11014, 'Zhai' =>-10838, 'zhan' =>-10832, 'zhang' =>-10815, 'zhao' =>-10800, 'zhes' =>-10790, 'Zhen' =>-10780, 'zheng' =>-10764, 'zhi' =>-10587, 'zhong' =>-10544, 'zhou' =>-10533, 'zhu' =>-10519, 'zhua' =>-10331, 'zhuai' =>-10329, 'zhua' =>-10328, 'zhuang' =>-10322, 'zhui' =>-10315, 'zhun' =>-10309, 'zhuo' =>-10307, 'zi' =>-10296, 'zong '=>-10281, 'zou' =>-10274, 'Zu' =>-10270, 'zuan' =>-10262,
  43. 'Zui '=>-10260, 'Zun' =>-10256, 'zuo' =>-10254
  44. );
  45. /**
  46. * Obtain all pinyin, and return an array of pinyin, for example, 'Zhang sanfeng' ==> ['Zhang ', 'San', 'feng']
  47. * @ Param $ chinese
  48. * @ Param string $ charset
  49. * @ Return array
  50. */
  51. Public function get_all_py ($ chinese, $ charset = 'utf-8 ')
  52. {
  53. If ($ charset! = 'Gb2312') $ chinese = $ this-> _ U2_Utf8_Gb ($ chinese );
  54. $ Py = $ this-> zh_to_pys ($ chinese );
  55. Return $ py;
  56. }
  57. /**
  58. * Obtain the first letter of pinyin, such as ['zhang', 'san', 'feng'] ==> zsf
  59. * @ Param $ all_pys
  60. * @ Return string
  61. */
  62. Public function get_first_py ($ all_pys)
  63. {
  64. If (count ($ all_pys) <= 0 ){
  65. Return '';
  66. }
  67. $ Result = [];
  68. Foreach ($ all_pys as $ one ){
  69. If (is_null ($ one) | strlen ($ one) <= 0 ){
  70. Continue;
  71. }
  72. $ Result [] = substr ($ one, 0, 1 );
  73. }
  74. Return join ('', $ result );
  75. }
  76. /**
  77. * Obtain the first letter of pinyin, for example, ['zhang', 'san', 'feng'] => z
  78. * @ Param $ all_pys
  79. * @ Return string
  80. */
  81. Public function get_first_letter ($ all_pys)
  82. {
  83. If (count ($ all_pys) <= 0 ){
  84. Return '';
  85. }
  86. Foreach ($ all_pys as $ one ){
  87. If (is_null ($ one) | strlen ($ one) <= 0 ){
  88. Continue;
  89. }
  90. Return substr ($ one, 0, 1 );
  91. }
  92. Return '';
  93. }
  94. Private function _ U2_Utf8_Gb ($ _ C)
  95. {
  96. $ _ String = '';
  97. If ($ _ C <0x80) $ _ String. = $ _ C;
  98. Elseif ($ _ C <0x800 ){
  99. $ _ String. = chr (0xC0 | $ _ C> 6 );
  100. $ _ String. = chr (0x80 | $ _ C & 0x3F );
  101. } Elseif ($ _ C <0x10000 ){
  102. $ _ String. = chr (0xE0 | $ _ C> 12 );
  103. $ _ String. = chr (0x80 | $ _ C> 6 & 0x3F );
  104. $ _ String. = chr (0x80 | $ _ C & 0x3F );
  105. } Elseif ($ _ C <0x200000 ){
  106. $ _ String. = chr (0xF0 | $ _ C> 18 );
  107. $ _ String. = chr (0x80 | $ _ C> 12 & 0x3F );
  108. $ _ String. = chr (0x80 | $ _ C> 6 & 0x3F );
  109. $ _ String. = chr (0x80 | $ _ C & 0x3F );
  110. }
  111. Return iconv ('utf-8', 'gb2312 ', $ _ String );
  112. }
  113. Private function zh_to_py ($ num, $ blank = '')
  114. {
  115. If ($ num> 0 & $ num <160 ){
  116. Return chr ($ num );
  117. } Elseif ($ num <-20319 | $ num>-10247 ){
  118. Return $ blank;
  119. } Else {
  120. Foreach ($ this-> dict_list as $ py => $ code ){
  121. If ($ code> $ num) break;
  122. $ Result = $ py;
  123. }
  124. Return $ result;
  125. }
  126. }
  127. Private function zh_to_pys ($ chinese)
  128. {
  129. $ Result = array ();
  130. For ($ I = 0; $ I <strlen ($ chinese); $ I ++ ){
  131. $ P = ord (substr ($ chinese, $ I, 1 ));
  132. If ($ p & gt; 160 ){
  133. $ Q = ord (substr ($ chinese, ++ $ I, 1 ));
  134. $ P = $ p * 256 + $ q-65536;
  135. }
  136. $ Result [] = $ this-> zh_to_py ($ p );
  137. }
  138. Return $ result;
  139. }
  140. }



Class Library, PHP

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.