Python acquires pinyin instance program for Chinese characters

Source: Internet
Author: User
Tags ord


So with the following def, do not know if you have a more efficient way?

The code is as follows Copy Code

#coding: Utf-8
#基于python2.6
    table = ' a,-20319;ai,-20317;an,-20304;ang,-20295;ao,-20292;ba,-20283;bai,-20265;ban,-20257; bang,-20242;bao,-20230;bei,-20051;ben,-20036;beng,-20032;bi,-20026;bian,-20002;biao,-19990;bie,-19986;bin,- 19982;bing,-19976;bo,-19805;bu,-19784;ca,-19775;cai,-19774;can,-19763;cang,-19756;cao,-19751;ce,-19746;ceng,- 19741;cha,-19739;chai,-19728;chan,-19725;chang,-19715;chao,-19540;che,-19531;chen,-19525;cheng,-19515;chi,- 19500;chong,-19484;chou,-19479;chu,-19467;chuai,-19289;chuan,-19288;chuang,-19281;chui,-19275;chun,-19270;chuo , -19263;ci,-19261;cong,-19249;cou,-19243;cu,-19242;cuan,-19238;cui,-19235;cun,-19227;cuo,-19224;da,-19218;dai, -19212;dan,-19038;dang,-19023;dao,-19018;de,-19006;deng,-19003;di,-18996;dian,-18977;diao,-18961;die,-18952; ding,-18783;diu,-18774;dong,-18773;dou,-18763;du,-18756;duan,-18741;dui,-18735;dun,-18731;duo,-18722;e,-18710; En,-18697;er,-18696;fa,-18526;fan,-18518;fang,-18501;fei,-18490;fen,-18478;feng,-18463;fo,-18448;fou,-18447;fu , -18446;ga,-18239;gai,-18237;gan,-18231;gang,-18220;gao,-18211;ge,-18201;gei,-18184;gen,-18183;geng,-18181;gong,-18012;gou,- 17997;gu,-17988;gua,-17970;guai,-17964;guan,-17961;guang,-17950;gui,-17947;gun,-17931;guo,-17928;ha,-17922;hai , -17759;han,-17752;hang,-17733;hao,-17730;he,-17721;hei,-17703;hen,-17701;heng,-17697;hong,-17692;hou,-17683; hu,-17676;hua,-17496;huai,-17487;huan,-17482;huang,-17468;hui,-17454;hun,-17433;huo,-17427;ji,-17417;jia,- 17202;jian,-17185;jiang,-16983;jiao,-16970;jie,-16942;jin,-16915;jing,-16733;jiong,-16708;jiu,-16706;ju,-16689 ; juan,-16664;jue,-16657;jun,-16647;ka,-16474;kai,-16470;kan,-16465;kang,-16459;kao,-16452;ke,-16448;ken,-16433 ; keng,-16429;kong,-16427;kou,-16423;ku,-16419;kua,-16412;kuai,-16407;kuan,-16403;kuang,-16401;kui,-16393;kun,- 16220;kuo,-16216;la,-16212;lai,-16205;lan,-16202;lang,-16187;lao,-16180;le,-16171;lei,-16169;leng,-16158;li,- 16155;lia,-15959;lian,-15958;liang,-15944;liao,-15933;lie,-15920;lin,-15915;ling,-15903;liu,-15889;long,-15878 ; lou,-15707;lu,-15701;lv,-15681;luan,-15667;lue,-15661;lun,-15659;luo,-15652;ma,-15640;mai,-15631;man,-15625;mang,-15454;mao,- 15448;me,-15436;mei,-15435;men,-15419;meng,-15416;mi,-15408;mian,-15394;miao,-15385;mie,-15377;min,-15375;ming , -15369;miu,-15363;mo,-15362;mou,-15183;mu,-15180;na,-15165;nai,-15158;nan,-15153;nang,-15150;nao,-15149;ne,- 15144;nei,-15143;nen,-15141;neng,-15140;ni,-15139;nian,-15128;niang,-15121;niao,-15119;nie,-15117;nin,-15110; Ning,-15109;niu,-14941;nong,-14937;nu,-14933;nv,-14930;nuan,-14929;nue,-14928;nuo,-14926;o,-14922;ou,-14921;pa , -14914;pai,-14908;pan,-14902;pang,-14894;pao,-14889;pei,-14882;pen,-14873;peng,-14871;pi,-14857;pian,-14678; piao,-14674;pie,-14670;pin,-14668;ping,-14663;po,-14654;pu,-14645;qi,-14630;qia,-14594;qian,-14429;qiang,- 14407;qiao,-14399;qie,-14384;qin,-14379;qing,-14368;qiong,-14355;qiu,-14353;qu,-14345;quan,-14170;que,-14159; qun,-14151;ran,-14149;rang,-14145;rao,-14140;re,-14137;ren,-14135;reng,-14125;ri,-14123;rong,-14122;rou,-14112 ; ru,-14109;ruan,-14099; rui,-14097;run,-14094;ruo,-14092;sa,-14090;sai,-14087;san,-14083;sang,-13917;sao,-13914;se,-13910;sen,-13907; seng,-13906;sha,-13905;shai,-13896;shan,-13894;shang,-13878;shao,-13870;she,-13859;shen,-13847;sheng,-13831; shi,-13658;shou,-13611;shu,-13601;shua,-13406;shuai,-13404;shuan,-13400;shuang,-13398;shui,-13395;shun,-13391; shuo,-13387;si,-13383;song,-13367;sou,-13359;su,-13356;suan,-13343;sui,-13340;sun,-13329;suo,-13326;ta,-13318; tai,-13147;tan,-13138;tang,-13120;tao,-13107;te,-13096;teng,-13095;ti,-13091;tian,-13076;tiao,-13068;tie,- 13063;ting,-13060;tong,-12888;tou,-12875;tu,-12871;tuan,-12860;tui,-12858;tun,-12852;tuo,-12849;wa,-12838;wai, -12831;wan,-12829;wang,-12812;wei,-12802;wen,-12607;weng,-12597;wo,-12594;wu,-12585;xi,-12556;xia,-12359;xian, -12346;xiang,-12320;xiao,-12300;xie,-12120;xin,-12099;xing,-12089;xiong,-12074;xiu,-12067;xu,-12058;xuan,- 12039;xue,-11867;xun,-11861;ya,-11847;yan,-11831;yang,-11798;yao,-11781;ye,-11604;yi,-11589;yin,-11536;ying,- 11358;yo,-11340;yong,-11339;you,-11324;yu,-11303;yuan,-11097;yue,-11077;yun,-11067;za,-11055;zai,-11052;zan,-11045;zang,-11041;zao, -11038;ze,-11024;zei,-11020;zen,-11019;zeng,-11018;zha,-11014;zhai,-10838;zhan,-10832;zhang,-10815;zhao,-10800 ; zhe,-10790;zhen,-10780;zheng,-10764;zhi,-10587;zhong,-10544;zhou,-10533;zhu,-10519;zhua,-10331;zhuai,-10329; Zhuan,-10328;zhuang,-10322;zhui,-10315;zhun,-10309;zhuo,-10307;zi,-10296;zong,-10281;zou,-10274;zu,-10270;zuan , -10262;zui,-10260;zun,-10256;zuo,-10254 '
def Pinyin (num):
Global table
If Num>0 & num<160:
return Chr (NUM)
V=table.split (';')
For i in Xrange (Len (v) -1,-1,-1):
S=v[i].split (', ')
if Int (s[1]) <=num:
return S[0]
Break

if __name__ = = "__main__":
Chinese = ' I'm the Thorn bird who you are '
I=0
while (I<len (Chinese)-1):
p = Ord (chinese[i:i+1])
if (p>160):
I+=1
Q = Ord (chinese[i:i+1])
p = p*256+q-65536
I+=1
Print Pinyin (p)


Above we encoding also can carry on the optimization processing, as follows

The code is as follows Copy Code

#!/usr/bin/python
#-*-coding:cp936-*-

Import Sys
Import re
Import string

Class Cconvert:
def __init__ (self):
"Load data Table"
Try
Fp=open ("Convert.txt")
Except IOError:
print ' Can ' t load data from Data.txt/nplease Make sure this file exists.
Sys.exit (1)
Else
Self.data=fp.read ()
Fp.close ()

def convert (self, strin):
"Convert GBK to PinYin"
Length, Stroutkey, Stroutvalue, I=len (Strin), "", "", 0
While I<length:
If i==length-1:
Stroutkey+=strin[i:i+1]+ ""
Stroutvalue+=strin[i:i+1]+ ""
Break
Code1, Code2=ord (strin[i:i+1]), Ord (strin[i+1:i+2])
If code1>=0x81 and Code1<=0xfe and code2>=0x40 and Code2<=0xfe and code2!=0x7f:
Strtemp=self.getindex (strin[i:i+2])
Strlength=len (strtemp)
If strlength<2:strlength=2
Stroutkey+=string.center (strin[i:i+2], strlength) + ""
Stroutvalue+=string.center (strtemp, strlength) + ""
I+=1;
Else
Stroutkey+=strin[i:i+1]+ ""
Stroutvalue+=strin[i:i+1]+ ""
I+=1
return [Stroutvalue, Stroutkey]

def getindex (self, Strin):
"Convert single GBK to PinYin from index"
Pos=re.search ("^" +strin+ "([0-9a-za-z]+)", Self.data, re. M
If Pos==none:
Return Strin
Else
return Pos.group (1)

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.