Package info. gnuhpc;
/**
* @ Author gnuhpc email: warmbupt@gmail.com blog: http://gnuhpc.info
* @ Date 2010-1-22
* @ Bugs does not support multiphonograph Processing
*/
Public class PinyinConv {
// The encoding range of Simplified Chinese is from B0A1 (45217) to F7FE (63486)
Private static int BEGIN = 45217;
Private static int END = 63486;
// According to the initials, this table is the first Chinese character that appears in GB2312, that is, "Ah" represents the first Chinese Character of the first letter.
// I, u, and v are not mother-of-sound, and the custom rules follow the first letter
Private static char [] chartable = {ah, Ba, rub, ride, moth, hair, Ghost, ha,
Ha, hit, ka, hey, Mom, take, oh, snap, period, ran, scatter, collapse, collapse,
Collapsed, Dug, used, pressed, turned ,};
// 26 letter intervals correspond to 27 endpoints
// GB2312 decimal representation of Chinese Characters
Private static int [] table = new int [27];
// Corresponding to the first letter Range Table
Private static char [] initialtable = {a, B, c, d, e, f, g,
H, h, j, k, l, m, n, o, p, q, r, s, t,
T, t, w, x, y, z ,};
// Initialization
Static {
For (int I = 0; I <26; I ++ ){
Table [I] = gbValue (chartable [I]); // obtain the endpoint table of the first letter range of GB2312 in decimal format.
}
Table [26] = END; // Range table END
}
// ------------------------ Public method area ------------------------
/**
* The most important method to return the first letter of a Chinese character string based on a string containing Chinese characters is as follows:
*/
Public static String cn2py (String SourceStr ){
String Result = "";
Int StrLength = SourceStr. length ();
Int I;
Try {
For (I = 0; I <StrLength; I ++ ){
Result + = Char2Initial (SourceStr. charAt (I ));
}
} Catch (Exception e ){
Result = "";
}
Return Result;
}
// ------------------------ Private Method Area ------------------------
/**
* Enter a character to obtain the initials. The corresponding uppercase letters are returned for English letters, and 0 is returned for other non-simplified Chinese characters.
*
*/
Private static char Char2Initial (char ch ){
// Processing of English letters: converts lowercase letters to uppercase letters, and returns results directly in uppercase letters.
If (ch> = a & ch <= z)
Return (char) (ch-a + );
If (ch> = A & ch <= Z)
Return ch;
// Handle non-English letters: convert to the first letter, and then determine whether it is within the code table range,
// If not, return directly.
// If yes, it is determined in the code table.
Int gb = gbValue (ch); // Chinese character conversion first letter
If (gb <BEGIN) | (gb> END) // return directly before the code table range
Return ch;
Int I;
For (I = 0; I <26; I ++) {// judge the range of the matched code table. When the match arrives, the break is used. The range is like "[,)".
If (gb> = table [I]) & (gb <table [I + 1])
Break;
}
If (gb = END) {// Add the rightmost end of the GB2312 range
I = 25;
}
Return initialtable [I]; // In the code table range, the first letter is returned.
}
/**
* Extract the encoding of Chinese Characters in cn
*/
Private static int gbValue (char ch) {// converts a Chinese character (GB2312) to a decimal representation.
String str = new String ();
Str + = ch;
Try {
Byte [] bytes = str. getBytes ("GB2312 ");
If (bytes. length <2)
Return 0;
Return (bytes [0] <8 & 0xff00) + (bytes [1] & 0xff );
} Catch (Exception e ){
Return 0;
}
}
Public static void main (String [] args) throws Exception {
System. out. println (cn2py ("Chongqing attaches great importance to the development of the IT industry. Most foreign companies, such as IBM, are stationed in the mountain city "));
}
}