Packagecn.chh.utils;/*** Get the first letter of each character *@authorCHH *@since2013-01-21 * @bugs does not support Polyphone processing*/ Public classPinyinconv {//Simplified Chinese encoding range from B0A1 (45217) to F7fe (63486) Private Static intBEGIN = 45217; Private Static intEND = 63486; //according to the initials, this table is the first character that appears in the GB2312, meaning "Ah" is the first character representing the first letter A. //i, U, v do not do consonants, custom rules follow the preceding letters Private Static Char[] chartable = {' Ah ', ' baa ', ' rub ', ' lap ', ' moth ', ' hair ', ' karma ', ' ha ', ' Ha ', ' hit ', ' Kerala ', ' garbage ', ' Mom ', ' take ', ' oh ', ' pa ', ' period ', ' yes ', ' Satan ', ' collapse ', ' collapse ', ' Collapse ', ' dig ', ' ever ', ' press ', ' turn ', }; //26 Letter Range corresponds to 27 endpoints//GB2312 code Chinese character interval decimal notation Private Static int[] table =New int[27]; //corresponding to the first letter interval table Private Static Char[] initialtable = {' A ', ' B ', ' C ', ' d ', ' e ', ' f ', ' G ', ' H ', ' H ', ' J ', ' K ', ' l ', ' m ', ' n ', ' o ', ' P ', ' Q ', ' R ', ' s ', ' t ', ' t ', ' t ', ' W ', ' x ', ' y ', ' z ', }; //Initialize Static { for(inti = 0; I < 26; i++) {Table[i]= Gbvalue (Chartable[i]);//get the GB2312 code of the first-letter range endpoint table, decimal. } table[+] = END;//End of interval table } /*** One of the most important ways to return a character string based on a string containing Chinese characters is the following: a character read, judge, output*/ Public Staticstring Cn2py (String sourcestr) {string Result= ""; intStrlength =sourcestr.length (); inti; Try { for(i = 0; i < strlength; i++) {Result+=char2initial (Sourcestr.charat (i)); } } Catch(Exception e) {Result= ""; } returnResult; } /*** Input character, get his initials, English letter return corresponding uppercase letter, other non-Simplified Chinese characters returned ' 0 ' **/ Private Static CharChar2initial (Charch) { //handling of English letters: lowercase letters are converted to uppercase, uppercase and direct return if(Ch >= ' a ' && ch <= ' Z ') return(Char) (CH-' a ' + ' a '); if(Ch >= ' A ' && ch <= ' Z ') returnch; //processing of non-English letters: convert to the first letter, and then determine whether within the Code table range,//if not, return directly. //If so, it is judged within the Code table. intGB = Gbvalue (CH);//Kanji Conversion First letter if(GB < BEGIN) | | (GB > END))//before the Code table interval, return directly returnch; inti; for(i = 0; i <; i++) {//judge the matching code table interval, match to break, judge the interval shape such as "[,)" if(GB >= Table[i] && (GB < table[i+1])) Break; } if(Gb==end) {//Make up the right end of the GB2312 intervalI=25; } returnInitialtable[i];//in the Code table interval, return the first letter } /*** Take out the Chinese character code cn kanji*/ Private Static intGbvalue (CharCH) {//converts a Chinese character (GB2312) to a decimal representation. String str =NewString (); STR+=ch; Try { byte[] bytes = Str.getbytes ("GB2312"); if(Bytes.length < 2) return0; return(Bytes[0] << 8 & 0xff00) + (Bytes[1] & 0xFF); } Catch(Exception e) {return0; } } Public Static voidMain (string[] args)throwsException {System.out.println (cn2py (Beijing)); } }
Java Tools _pinyinconv