This code is transformed, without too much technical difficulty. It just eliminates the errors of the original code and adds some common requirements. Currently, this class can implement the following functions:
1. Basic functions of PinYin conversion for Chinese strings
2. Support and customization of separators between pinyin words
3. Support and customization of whether to uppercase letters of Pinyin words
4. Support and customization of multiphoneme preference
5. Support and customization of characters to be parsed during the conversion process, such as Chinese Punctuation
Output example:
The system. Web. UI. Control. init event is triggered to initialize the page.
Yin-fa system. Web. UI. Control. init Shi-Jian-yi-DUI-Ye-jin-Xing-chu-shi-Hua
Hello! What are the characteristics of Chongqing and Shenzhen?
Hello! Chong Qing He Shen Zhen de te se Ge Shi shenme ne?
The Code is as follows:
/** // <Summary>
/// Tools for converting Chinese characters into pinyin.
/// </Summary>
Public sealed class chs2pinyin
...{
/** // <Summary>
/// An integer array containing the characters ASC code.
/// </Summary>
Private Static int [] Pv = new int []... {-20319,-20317,-20304,-20295,-20292,-20283,-20265,-20257,-20242,-20230,-20051,-20036,-20032, -20026,-20002,-19990,-19986,-19982,-19976,-19805,-19784,-19775,-19774,-19763,-19756,-19751, -19746,-19741,-19739,-19728,-19725,-19715,-19540,-19531,-19525,-19515,-19500,-19484,-19479, -19467,-19289,-19288,-19281,-19275,-19270,-19263,-19261,-19249,-19243,-19242,-19238,-19235, -19227,-19224,-19218,-19212,-19038,-19023,-19018,-19006,-19003,-18996,-18977,-18961,-18952, -18783,-18774,-18773,-18763,-18756,-18741,-18735,-18731,-18722,-18710,-18697,-18696,-18526, -18518,-18501,-18490,-18478,-18463,-18448,-18447,-18446,-18239,-18237,-18231,-18220,-18211, -18201,-18184,-18183,-18181,-18012,-17997,-17988,-17970,-17964,-17961,-17950,-17947,-17931, -17928,-17922,-17759,-17752,-17733,-17730,-17721,-17703,-17701,-17697,-17692,-17683,-17676, -17496,-17487,-17482,-17468,-17454,-17433,-17427,-17417,-17202,-17185,-16983,-16970,-16942, -16915,-16733,-16708,-16706,-16689,-16664,-16657,-16647,-16474,-16470,-16465,-16459,-16452, -16448,-16433,-16429,-16427,-16423,-16419,-16412,-16407,-16403,-16401,-16393,-16220,-16216, -16212,-16205,-16202,-16187,-16180,-16171,-16169,-16158,-16155,-15959,-15958,-15944,-15933, -15920,-15915,-15903,-15889,-15878,-15707,-15701,-15681,-15667,-15661,-15659,-15652,-15640, -15631,-15625,-15454,-15448,-15436,-15435,-15419,-15416,-15408,-15394,-15385,-15377,-15375, -15369,-15363,-15362,-15183,-15180,-15165,-15158,-15153,-15150,-15149,-15144,-15143,-15141, -15140,-15139,-15128,-15121,-15119,-15117,-15110,-15109,-14941,-14937,-14933,-14930,-14929, -14928,-14926,-14922,-14921,-14914,-14908,-14902,-14894,-14889,-14882,-14873,-14871,-14857, -14678,-14674,-14670,-14668,-14663,-14654,-14645,-14630,-14594,-14429,-14407,-14399,-14384, -14379,-14368,-14355,-14353,-14345,-14170,-14159,-14151,-14149,-14145,-14140,-14137,-14135, -14125,-14123,-14122,-14112,-14109,-14099,-14097,-14094,-14092,-14090,-14087,-14083,-13917, -13914,-13910,-13907,-13906,-13905,-13896,-13894,-13878,-13870,-13859,-13847,-13831,-13658, -13611,-13601,-13406,-13404,-13400,-13398,-13395,-13391,-13387,-13383,-13367,-13359,-13356, -13343,-13340,-13329,-13326,-13318,-13147,-13138,-13120,-13107,-13096,-13095,-13091,-13076, -13068,-13063,-13060,-12888,-12875,-12871,-12860,-12858,-12852,-12849,-12838,-12831,-12829, -12812,-12802,-12607,-12597,-12594,-12585,-12556,-12359,-12346,-12320,-12300,-12120,-12099, -12089,-12074,-12067,-12058,-12039,-11867,-11861,-11847,-11831,-11798,-11781,-11604,-11589, -11536,-11358,-11340,-11339,-11324,-11303,-11097,-11077,-11067,-11055,-11052,-11045,-11041, -11038,-11024,-11020,-11019,-11018,-11014,-10838,-10832,-10815,-10800,-10790,-10780,-10764, -10587,-10544,-10533,-10519,-10331,-10329,-10328,-10322,-10315,-10309,-10307,-10296,-10281, -10274,-10270,-10262,-10260,-10256,-10254 };
/** // <Summary>
/// A string array containing Chinese characters and pinyin.
/// </Summary>
Private Static string [] PS = new string []... {"A", "AI", "an", "Ang", "Ao", "ba", "bai", "ban", "bang ", "Bao", "Bei", "Ben", "Beng", "bi", "Bian", "Biao", "bie", "bin", "bing ", "Bo", "bu", "ca", "Cai", "can", "Cang", "Cao", "CE", "Ceng", "cha ", "chai", "Chan", "Chang", "Chao", "Che", "Chen", "Cheng", "Chi", "Chong", "Chou ", "chu", "Chuai", "Chuan", "Chuang", "Chui", "Chun", "Chuo", "Ci", "Cong", "cou ", "Cu", "Cuan", "Cui", "cun", "CuO", "da", "Dai", "Dan", "Dang", "Dao ", "De", "Deng", "Di", "Dian", "Diao", "die", "ding", "Diu", "Dong", "dou ", "du", "Duan", "DUI", "Dun", "duo", "E", "en", "er", "Fa", "fan ", "fang", "fei", "Fen", "Feng", "FO", "fou", "Fu", "Ga", "Gai", "Gan ", "Gang", "Gao", "Ge", "gei", "gen", "Geng", "Gong", "gou", "Gu", "Gua ", "Guai", "Guan", "Guang", "Gui", "gun", "Guo", "ha", "Hai", "Han", "hang ", "hao", "he", "Hei", "Hu", "Heng", "Hong", "Hou", "Hu", "Hua", "Huai ", "Huan", "Huang", "Hui", "hun", "Huo", "Ji", "Jia", "jian", "Jiang", "Jiao ", "Jie", "Jin", "jing", "jiong", "Jiu", "Ju", "Juan", "Jue", "Jun", "ka ", "Kai", "kan", "Kang", "kao", "Ke", "Ken", "Keng", "Kong", "Kou", "ku ", "Kua", "Kuai", "Kuan", "Kuang", "Kui", "Kun", "Kuo", "La", "Lai", "Lan ", "Lang", "Lao", "Le", "lei", "leng", "Li", "Lia", "Lian", "Liang", "liao ", "lie", "Lin", "Ling", "Liu", "long", "Lou", "Lu", "LV", "Luan", "Lue ", "Lun", "Luo", "Ma", "Mai", "man", "mang", "Mao", "me", "Mei", "men ", "Meng", "mi", "Mian", "Miao", "Mie", "min", "Ming", "Miu", "Mo", "MOU ", "Mu", "Na", "Nai", "Nan", "Nang", "Nao", "ne", "Nei", "Nen", "Neng ", "Ni", "Nian", "Niang", "niao", "Nie", "Nin", "Ning", "niu", "Nong", "Nu ", "NV", "Nuan", "nue", "nuo", "O", "ou", "Pa", "Pai", "pan", "Pang ", "pao", "Pei", "pen", "peng", "Pi", "pian", "Piao", "pie", "pin", "ping ", "po", "Pu", "Qi", "QIA", "Qian", "Qiang", "Qiao", "Qie", "Qin", "Qing ", "Qiong", "Qiu", "Qu", "Quan", "que", "Qun", "ran", "rang", "Rao", "re ", "Ren", "reng", "Ri", "rong", "Rou", "Ru", "Ruan", "Rui", "run", "RuO ", "sa", "sai", "San", "sang", "Sao", "se", "Sen", "Seng", "Sha", "Shai ", "Shan", "Shang", "shao", "she", "shen", "Sheng", "Shi", "Shou", "Shu", "Shua ", "Shuai", "Shuan", "Shuang", "Shui", "Shun", "Shuo", "Si", "Song", "Sou", "Su ", "Suan", "Sui", "Sun", "Suo", "ta", "Tai", "Tan", "Tang", "Tao", "te ", "Teng", "ti", "Tian", "tiao", "Tie", "ting", "tong", "TOU", "tu", "tuan ", "Tui", "tun", "Tuo", "wa", "Wai", "Wan", "Wang", "wei", "Wen", "Weng ", "wo", "Wu", "Xi", "xia", "Xian", "Xiang", "Xiao", "xie", "Xin", "Xing ", "Xiong", "Xiu", "Xu", "Xuan", "Xue", "Xun", "ya", "Yan", "Yang", "Yao ", "Ye", "Yi", "Yin", "ying", "yo", "Yong", "you", "Yu", "Yuan", "Yue ", "Yun", "za", "Zai", "Zan", "Zang", "Zao", "ze", "Zei", "Zen", "Zeng ", "Zha", "Zhai", "Zhan", "Zhang", "Zhao", "zhe", "Zhen", "Zheng", "Zhi", "Zhong ", "Zhou", "zhu", "zhua", "zhuai", "Zhuan", "Zhuang", "Zhui", "Zhun", "Zhuo", "zi ", "Zong", "zou", "Zu", "Zuan", "Zui", "Zun", "Zuo "};
/** // <Summary>
/// A string array containing the characters to be excluded.
/// </Summary>
Private Static string [] BD = new string []... {",", ". "," '","' "," $ "," $ ","(","「","『",") "," ["," "," [","] ",", "]", "-", "… ",", "<", "", "> "};
Private Static hashtable _ phrase;
/** // <Summary>
/// Set or obtain a combination of key/value pairs that contain the pronunciation of an exclusive phrase.
/// </Summary>
Public static hashtable phrase
...{
Get
...{
If (_ phrase = NULL)
...{
_ Phrase = new hashtable ();
_ Phrase. Add ("Chongqing", "Chong Qing ");
_ Phrase. Add ("Shenzhen", "Shen Zhen ");
_ Phrase. Add ("what", "shen me ");
}
Return _ phrase;
}
Set... {_ phrase = value ;}
}
/** // <Summary>
/// Convert the specified Chinese string to the pinyin format.
/// </Summary>
/// <Param name = "CHS"> specifies the Chinese string to be converted. </Param>
/// <Param name = "separator"> delimiter used to connect pinyin. </Param>
/// <Param name = "initialcap"> specifies whether to uppercase the first letter. </Param>
/// <Returns> A pinyin string containing a Chinese string. </Returns>
Public static string convert (string CHS, string separator, bool initialcap)
...{
If (CHS = NULL | CHS. Length = 0) Return "";
If (separator = NULL | separator. Length = 0) separator = "";
// Exception phrase
Foreach (dictionaryentry de in chs2pinyin. phrase)
...{
CHS = chs. replace (de. key. tostring (), String. format ("{0}", de. value. tostring (). replace ("", separator )));
}
Byte [] array = new byte [2];
String returnstr = "";
Int chrasc = 0;
Int I1 = 0;
Int I2 = 0;
Bool B = false;
Char [] nowchar = CHS. tochararray ();
Cultureinfo CI = thread. currentthread. currentculture;
Textinfo Ti = CI. textinfo;
For (Int J = 0; j <nowchar. length; j ++)
...{
Array = encoding. Default. getbytes (nowchar [J]. tostring ());
String S = nowchar [J]. tostring ();;
If (array. Length = 1)
...{
B = true;
Returnstr + = s;
}
Else
...{
If (S = "? ")
...{
If (returnstr = "" | B = true) returnstr + = s;
Else returnstr + = separator + S;
Continue;
}
I1 = (short) (array [0]);
I2 = (short) (array [1]);
Chrasc = I1 * 256 + I2-65536;
For (INT I = (Pv. Length-1); I> = 0; I --)
...{
If (Pv <= chrasc)
...{
S = Ps;
If (initialcap = true) S = Ti. totitlecase (s );
If (returnstr = "" | B = true) returnstr + = s;
Else returnstr + = separator + S;
Break;
}
}
B = false;
}
}
Returnstr = returnstr. Replace ("", separator );
Return returnstr;
}
/** // <Summary>
/// Convert the specified Chinese string to the pinyin format.
/// </Summary>
/// <Param name = "CHS"> specifies the Chinese string to be converted. </Param>
/// <Param name = "separator"> delimiter used to connect pinyin. </Param>
/// <Returns> A pinyin string containing a Chinese string. </Returns>
Public static string convert (string CHS, string separator)
...{
Return chs2pinyin. Convert (CHS, separator, false );
}
/** // <Summary>
/// Convert the specified Chinese string to the pinyin format.
/// </Summary>
/// <Param name = "CHS"> specifies the Chinese string to be converted. </Param>
/// <Param name = "initialcap"> specifies whether to uppercase the first letter. </Param>
/// <Returns> A pinyin string containing a Chinese string. </Returns>
Public static string convert (string CHS, bool initialcap)
...{
Return chs2pinyin. Convert (CHS, "", initialcap );
}
/** // <Summary>
/// Convert the specified Chinese string to the pinyin format.
/// </Summary>
/// <Param name = "CHS"> specifies the Chinese string to be converted. </Param>
/// <Returns> A pinyin string containing a Chinese string. </Returns>
Public static string convert (string CHS)
...{
Return chs2pinyin. Convert (CHS ,"");
}
}