Valid only for GB2312 encoding
/* An array of code values at the dividing point of the kanji pronunciation */
static const unsigned short code_pin[] = {
0XB0A1,0XB0A3,0XB0B0,0XB0B9,0XB0BC,0XB0C5,0XB0D7,0XB0DF,0XB0EE,0XB0FA,0XB1AD,0XB1BC,0XB1C0,0XB1C6,
0xb1de,0xb1ea,0xb1ee,0xb1f2,0xb1f8,0xb2a3,0xb2b8,0xb2c1,0xb2c2,0xb2cd,0xb2d4,0xb2d9,0xb2de,0xb2e3,
0XB2E5,0XB2F0,0XB2F3,0XB2FD,0XB3AC,0XB3B5,0XB3BB,0XB3C5,0XB3D4,0XB3E4,0XB3E9,0XB3F5,0XB4A7,0XB4A8,
0XB4AF,0XB4B5,0XB4BA,0XB4C1,0XB4C3,0XB4CF,0XB4D5,0XB4D6,0XB4DA,0XB4DD,0XB4E5,0XB4E8,0XB4EE,0XB4F4,
0XB5A2,0XB5B1,0XB5B6,0XB5C2,0XB5C5,0XB5CC,0XB5DF,0XB5EF,0XB5F8,0XB6A1,0XB6AA,0XB6AB,0XB6B5,0XB6BC,
0XB6CB,0XB6D1,0XB6D5,0XB6DE,0XB6EA,0XB6F7,0XB6F8,0XB7A2,0XB7AA,0XB7BB,0XB7C6,0XB7D2,0XB7E1,0XB7F0,
0XB7F1,0XB7F2,0XB8C1,0XB8C3,0XB8C9,0XB8D4,0XB8DD,0XB8E7,0XB8F8,0XB8F9,0XB8FB,0XB9A4,0XB9B3,0XB9BC,
0XB9CE,0XB9D4,0XB9D7,0XB9E2,0XB9E5,0XB9F5,0XB9F8,0XB9FE,0XBAA1,0XBAA8,0XBABB,0XBABE,0XBAC7,0XBAD9,
0xbadb,0xbadf,0xbae4,0xbaed,0xbaf4,0xbba8,0xbbb1,0xbbb6,0xbbc4,0xbbd2,0xbbe7,0xbbed,0xbbf7,0xbcce,
0XBCDF,0XBDA9,0XBDB6,0XBDD2,0XBDED,0XBEA3,0XBEBC,0XBEBE,0XBECF,0XBEE8,0XBEEF,0XBEF9,0XBFA6,0XBFAA,
0XBFAF,0XBFB5,0XBFBC,0XBFC0,0XBFCF,0XBFD3,0XBFD5,0XBFD9,0XBFDD,0XBFE4,0XBFE9,0XBFED,0XBFEF,0XBFF7,
0xc0a4,0xc0a8,0xc0ac,0xc0b3,0xc0b6,0xc0c5,0xc0cc,0xc0d5,0xc0d7,0xc0e2,0xc0e5,0xc1a9,0xc1aa,0xc1b8,
0xc1c3,0xc1d0,0xc1d5,0xc1e1,0xc1ef,0xc1fa,0xc2a5,0xc2ab,0xc2bf,0xc2cd,0xc2d3,0xc2d5,0xc2dc,0xc2e8,
0XC2F1,0XC2F7,0XC3A2,0XC3A8,0XC3B4,0XC3B5,0XC3C5,0XC3C8,0XC3D0,0XC3DE,0XC3E7,0XC3EF,0XC3F1,0XC3F7,
0XC3FD,0XC3FE,0XC4B1,0XC4B4,0XC4C3,0XC4CA,0XC4CF,0XC4D2,0XC4D3,0XC4D8,0XC4D9,0XC4DB,0XC4DC,0XC4DD,
0XC4E8,0XC4EF,0XC4F1,0XC4F3,0XC4FA,0XC4FB,0XC5A3,0XC5A7,0XC5AB,0XC5AE,0XC5AF,0XC5B0,0XC5B2,0XC5B6,
0XC5B7,0XC5BE,0XC5C4,0XC5CA,0XC5D2,0XC5D7,0XC5DE,0XC5E7,0XC5E9,0XC5F7,0XC6AA,0XC6AE,0XC6B2,0XC6B4,
0xc6b9,0xc6c2,0xc6cb,0xc6da,0xc6fe,0xc7a3,0xc7b9,0xc7c1,0xc7d0,0xc7d5,0xc7e0,0xc7ed,0xc7ef,0xc7f7,
0xc8a6,0xc8b1,0xc8b9,0xc8bb,0xc8bf,0xc8c4,0xc8c7,0xc8c9,0xc8d3,0xc8d5,0xc8d6,0xc8e0,0xc8e3,0xc8ed,
0xc8ef,0xc8f2,0xc8f4,0xc8f6,0xc8f9,0xc8fd,0xc9a3,0xc9a6,0xc9aa,0xc9ad,0xc9ae,0xc9af,0xc9b8,0xc9ba,
0XC9CA,0XC9D2,0XC9DD,0XC9E9,0XC9F9,0XCAA6,0XCAD5,0XCADF,0XCBA2,0XCBA4,0XCBA8,0XCBAA,0XCBAD,0XCBB1,
0XCBB5,0XCBB9,0XCBC9,0XCBD1,0XCBD4,0XCBE1,0XCBE4,0XCBEF,0XCBF2,0XCBFA,0XCCA5,0XCCAE,0XCCC0,0XCCCD,
0XCCD8,0XCCD9,0XCCDD,0XCCEC,0XCCF4,0XCCF9,0XCCFC,0XCDA8,0XCDB5,0XCDB9,0XCDC4,0XCDC6,0XCDCC,0XCDCF,
0XCDDA,0XCDE1,0XCDE3,0XCDF4,0XCDFE,0XCEC1,0XCECB,0XCECE,0XCED7,0XCEF4,0XCFB9,0XCFC6,0XCFE0,0XCFF4,
0XD0A8,0XD0BD,0XD0C7,0XD0D6,0XD0DD,0XD0E6,0XD0F9,0XD1A5,0XD1AB,0XD1B9,0XD1C9,0XD1EA,0XD1FB,0XD2AC,
0XD2BB,0XD2F0,0XD3A2,0XD3B4,0XD3B5,0XD3C4,0XD3D9,0XD4A7,0XD4BB,0XD4C5,0XD4D1,0XD4D4,0XD4DB,0XD4DF,
0XD4E2,0XD4F0,0XD4F4,0XD4F5,0XD4F6,0XD4FA,0XD5AA,0XD5B0,0XD5C1,0XD5D0,0XD5DA,0XD5E4,0XD5F4,0XD6A5,
0xd6d0,0xd6db,0xd6e9,0xd7a5,0xd7a7,0xd7a8,0xd7ae,0xd7b5,0xd7bb,0xd7bd,0xd7c8,0xd7d7,0xd7de,0xd7e2,
0XD7EA,0XD7EC,0XD7F0,0XD7F2};
/* Kanji Pronunciation Array */
static const char *str_pin[] = {
"A", "AI", "an", "Ang", "AO", "ba", "Bai", "ban", "bang", "Bao", "bei", "Ben", "Beng", "Bi", "Bian", "Biao",
"Bie", "Bin", "Bing", "Bo", "Bu", "Ca", "Cai", "Can", "Cang", "Cao", "Ce", "CEng", "Cha", "Chai", "Chan",
"Chang", "Chao", "Che", "Chen", "Cheng", "Chi", "Chong", "Chou", "Chu", "Chuai", "Chuan", "Chuang", "Chui",
"Chun", "Chuo", "CI", "Cong", "Cou", "cu", "Cuan", "Cui", "Cun", "CuO", "Da", "Dai", "Dan", "Dang", "DAO",
"De", "Deng", "Di", "Dian", "Diao", "Die", "Ding", "diu", "Dong", "Dou", "du", "Duan", "DUI", "Dun", "Duo",
"E", "en", "er", "fa", "Fan", "Fang", "Fei", "Fen", "Feng", "Fo", "Fou", "Fu", "Ga", "Gai", "Gan", "gang",
"Gao", "GE", "gei", "Gen", "Geng", "Gong", "Gou", "gu", "Gua", "Guai", "Guan", "Guang", "GUI", "Gun",
"Guo", "Ha", "Hai", "Han", "hang", "Hao", "he", "hei", "hen", "Heng", "Hong", "Hou", "Hu", "Hua", "Huai",
"Huan", "Huang", "Hui", "Hun", "Huo", "Ji", "Jia", "Jian", "Jiang", "Jiao", "Jie", "Jin", "Jing",
"Jiong", "JIU", "Ju", "Juan", "Jue", "June", "Ka", "Kai", "Kan", "Kang", "Kao", "Ke", "Ken", "Keng",
"Kong", "Kou", "Ku", "Kua", "Kuai", "Kuan", "Kuang", "Kui", "Kun", "Kuo", "La", "Lai", "LAN", "Lang",
"Lao", "Le", "lei", "Leng", "Li", "Lia", "Lian", "Liang", "Liao", "lie", "Lin", "Ling", "Liu", "Long", "Lou",
"Lu", "LV", "Luan", "Lue", "Lun", "Luo", "Ma", "Mai", "Man", "Mang", "Mao", "Me", "Mei", "Men", "Meng",
"Mi", "Mian", "Miao", "Mie", "Min", "Ming", "Miu", "Mo", "MoU", "Mu", "NA", "Nai", "Nan", "Nang", "Nao", "NE",
"Nei", "nen", "Neng", "ni", "Nian", "Niang", "Niao", "nie", "nin", "Ning", "Niu", "Nong", "Nu", "NV", "Nuan",
"Nue", "Nuo", "O", "ou", "pa", "pai", "Pan", "Pang", "Pao", "Pei", "pen", "Peng", "PI", "Pian", "Piao", "Pie",
"Pin", "ping", "po", "Pu", "qi", "QIA", "Qian", "Qiang", "Qiao", "Qie", "Qin", "Qing", "Qiong", "Qiu", "Qu",
"Quan", "que", "Qun", "ran", "rang", "Rao", "Re", "Ren", "Reng", "Ri", "Rong", "Rou", "Ru", "Ruan", "Rui",
"Run", "Ruo", "sa", "Sai", "san", "sang", "Sao", "se", "Sen", "Seng", "Sha", "Shai", "Shan", "Shang", "Shao",
"She", "Shen", "Sheng", "Shi", "Shou", "Shu", "Shua", "Shuai", "Shuan", "Shuang", "Shui", "Shun", "Shuo",
"Si", "song", "Sou", "su", "Suan", "Sui", "Sun", "suo", "ta", "Tai", "Tan", "Tang", "Tao", "TE", "Teng",
"Ti", "Tian", "Tiao", "Tie", "Ting", "Tong", "Tou", "tu", "Tuan", "Tui", "Tun", "Tuo", "WA", "Wai", "Wan",
"Wang", "Wei", "Wen", "Weng", "Wo", "WU", "XI", "Xia", "Xian", "Xiang", "Xiao", "Xie", "Xin", "Xing",
"Xiong", "Xiu", "Xu", "Xuan", "Xue", "Xun", "Ya", "Yan", "Yang", "Yao", "Ye", "Yi", "Yin", "ying", "Yo",
"Yong", "You", "Yu", "Yuan", "Yue", "Yun", "Za", "Zai", "Zan", "Zang", "Zao", "Ze", "Zei", "Zen", "Zeng",
"Zha", "Zhai", "Zhan", "Zhang", "Zhao", "Zhe", "Zhen", "Zheng", "Zhi", "Zhong", "Zhou", "Zhu", "Zhua",
"Zhuai", "Zhuan", "Zhuang", "Zhui", "Zhun", "Zhuo", "Zi", "Zong", "Zou", "zu", "Zuan", "Zui", "Zun", "Zuo"};
/* Array Length */
static const size_t Size_array = sizeof (code_pin)/sizeof (short);
#include <iostream>
#include <string>
Using Std::string;
Using Std::cout;
Using Std::endl;
/* Get a pinyin for a Chinese character */
Const char* Get_pin (unsigned short Char_zh)
{
size_t low = 0, high = size_array-1;
size_t index;
while (high-low! = 1)
{
index = (low + high)/2;
if (code_pin[index] = = Char_zh) return Str_pin[index];
if (Code_pin[index] < Char_zh) low = index;
else High = index;
}
return Str_pin[code_pin[high] <= Char_zh High:low];
}
/* Convert Chinese and English mixed strings into pinyin form */
String Str_to_pin (String const &input)
{
string result;
unsigned short Char_zh;
int inputlength = Input.length ();
unsigned char high, low;
for (int i = 0; i < inputlength; ++i)
{
High = Input[i];
if (High < 0x80) Result.append (1, high);
Else
{
low = Input[++i];
Char_zh = (High << 8) + low;
Result.append (Get_pin (Char_zh));
}
}
return result;
}
/* Convert Chinese and English mixed strings into pinyin form, followed by a space after each pinyin */
String Str_to_pin_space (String const &input)
{
string result;
unsigned short Char_zh;
int inputlength = Input.length ();
unsigned char high, low;
for (int i = 0; i < inputlength; ++i)
{
High = Input[i];
if (High < 0x80)
{
if (i > 0 && input[i-1] < 0) result.append (1, ");
Result.append (1, high);
}
Else
{
if (i > 0) result.append (1, ");
low = Input[++i];
Char_zh = (High << 8) + low;
Result.append (Get_pin (Char_zh));
}
}
return result;
}
/*
Converts the Chinese and English mixed strings into pinyin and places a space between each successive part.
The input must satisfy only the above three characters, the correctness is guaranteed by the caller.
*/
String Str_to_pin_space_ (String const &input)
{
string result (1, ");
unsigned short Char_zh;
int inputlength = Input.length ();
unsigned char high, low;
for (int i = 0; i < inputlength; ++i)
{
High = Input[i];
if (High < 58)
{
if (i > 0 && (input[i-1] < 0 | | input[i-1] > 57))
{
Result.append (1, ");
}
Result.append (1, high);
}
else if (High > up && High < 123)
{
if (i > 0 && (input[i-1] < 0 | | input[i-1] < 58))
{
Result.append (1, ");
}
Result.append (1, high);
}
Else
{
if (i > 0) result.append (1, ");
low = Input[++i];
Char_zh = (High << 8) + low;
Result.append (Get_pin (Char_zh));
}
}
return result;
}
/* Test Code */
int main ()
{
cout << str_to_pin_space_ ("Zhong Hua People's DFD a152 left 35gdaf Republic") << Endl;
return 0;
}
Learn the Endless blog