Extract the first letter of Chinese characters and Pinyin (Java version)

Source: Internet
Author: User

Package info. gnuhpc;

/**
* @ Author gnuhpc email: warmbupt@gmail.com blog: http://gnuhpc.info
* @ Date 2010-1-22
* @ Bugs does not support multiphonograph Processing
*/
Public class PinyinConv {
// The encoding range of Simplified Chinese is from B0A1 (45217) to F7FE (63486)
Private static int BEGIN = 45217;
Private static int END = 63486;

// According to the initials, this table is the first Chinese character that appears in GB2312, that is, "Ah" represents the first Chinese Character of the first letter.
// I, u, and v are not mother-of-sound, and the custom rules follow the first letter
Private static char [] chartable = {ah, Ba, rub, ride, moth, hair, Ghost, ha,
Ha, hit, ka, hey, Mom, take, oh, snap, period, ran, scatter, collapse, collapse,
Collapsed, Dug, used, pressed, turned ,};

// 26 letter intervals correspond to 27 endpoints
// GB2312 decimal representation of Chinese Characters
Private static int [] table = new int [27];

// Corresponding to the first letter Range Table
Private static char [] initialtable = {a, B, c, d, e, f, g,
H, h, j, k, l, m, n, o, p, q, r, s, t,
T, t, w, x, y, z ,};

// Initialization
Static {
For (int I = 0; I <26; I ++ ){
Table [I] = gbValue (chartable [I]); // obtain the endpoint table of the first letter range of GB2312 in decimal format.
}
Table [26] = END; // Range table END
}

// ------------------------ Public method area ------------------------
/**
* The most important method to return the first letter of a Chinese character string based on a string containing Chinese characters is as follows:
*/
Public static String cn2py (String SourceStr ){
String Result = "";
Int StrLength = SourceStr. length ();
Int I;
Try {
For (I = 0; I <StrLength; I ++ ){
Result + = Char2Initial (SourceStr. charAt (I ));
}
} Catch (Exception e ){
Result = "";
}
Return Result;
}

// ------------------------ Private Method Area ------------------------
/**
* Enter a character to obtain the initials. The corresponding uppercase letters are returned for English letters, and 0 is returned for other non-simplified Chinese characters.
*
*/
Private static char Char2Initial (char ch ){
// Processing of English letters: converts lowercase letters to uppercase letters, and returns results directly in uppercase letters.
If (ch> = a & ch <= z)
Return (char) (ch-a + );
If (ch> = A & ch <= Z)
Return ch;

// Handle non-English letters: convert to the first letter, and then determine whether it is within the code table range,
// If not, return directly.
// If yes, it is determined in the code table.
Int gb = gbValue (ch); // Chinese character conversion first letter

If (gb <BEGIN) | (gb> END) // return directly before the code table range
Return ch;

Int I;
For (I = 0; I <26; I ++) {// judge the range of the matched code table. When the match arrives, the break is used. The range is like "[,)".
If (gb> = table [I]) & (gb <table [I + 1])
Break;
}

If (gb = END) {// Add the rightmost end of the GB2312 range
I = 25;
}
Return initialtable [I]; // In the code table range, the first letter is returned.
}

/**
* Extract the encoding of Chinese Characters in cn
*/
Private static int gbValue (char ch) {// converts a Chinese character (GB2312) to a decimal representation.
String str = new String ();
Str + = ch;
Try {
Byte [] bytes = str. getBytes ("GB2312 ");
If (bytes. length <2)
Return 0;
Return (bytes [0] <8 & 0xff00) + (bytes [1] & 0xff );
} Catch (Exception e ){
Return 0;
}
}

Public static void main (String [] args) throws Exception {
System. out. println (cn2py ("Chongqing attaches great importance to the development of the IT industry. Most foreign companies, such as IBM, are stationed in the mountain city "));
}
}

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.