In a project, you need to make queries based on the first letter of certain query conditions (such as name). For example, if you want to query a person named "James", you can enter 'zs '. A tool class is written as follows:
/** GB 2312-80 divides the recorded Chinese characters into two levels. The first level of Chinese characters is commonly used, with 3755 Chinese characters counted,
* Place in 16 ~ Area 55, sorted by Chinese pinyin letters/PEN; second-level Chinese characters are commonly used Chinese characters,
* The number is 3008, which is placed between 56 and 56 ~ Area 87, arranged by the beginning/strokes, so this program can only find
* Specifies the initials of a level-1 Chinese character. At the same time, only the first letter (z, C, S) can be obtained for the compliant initials (zh, CH, SH)
*/
Public class stringutil {
Private Static log logger = logfactory. getlog (stringutil. Class );
// Constant for converting the country code and location code
Static final int gb_sp_diff = 160;
// The initial location code for storing different pronunciations of the first-level Chinese Character of the National Standard
Static final int [] secposvaluelist = {
1601,163 7, 1833,207 8, 2274,230 2, 2433,259 4, 2787,
3106,321 2, 3472,363 5, 3722,373 0, 3858,402 7, 4086,
4390,455 8, 4684,492 5, 5249,560 0 };
// Stores the pronunciation of the starting location code corresponding to different pronunciations of the first-level Chinese Character of the National Standard
Static final char [] firstletter = {
'A', 'B', 'C', 'D', 'E', 'E', 'F', 'G', 'h', 'J ',
'K', 'l', 'M', 'n', 'O', 'P', 'Q', 'R','s ',
'T', 'w', 'x', 'y', 'z '};
// Obtain the pinyin code of a string
Public static string getfirstletter (string oristr ){
String STR = oristr. tolowercase ();
Stringbuffer buffer = new stringbuffer ();
Char ch;
Char [] temp;
For (INT I = 0; I <Str. Length (); I ++) {// process each character in STR in sequence
Ch = Str. charat (I );
Temp = new char [] {ch };
Byte [] Unicode = new string (temp). getbytes ();
If (UNICODE [0] <128 & Unicode [0]> 0) {// non-Chinese Characters
Buffer. append (temp );
} Else {
Buffer. append (convert (UNICODE ));
}
}
Return buffer. tostring ();
}
/** Get the first letter of a Chinese character.
* The two bytes of the GB code minus 160, respectively, and are converted into a 10-digit combination to get the location code.
* For example, if the GB code of the Chinese character "you" is 0xc4/0xe3, the difference between 0xa0 (160) and 0x24/0x43
* If 0x24 is converted to 10, it is 36, and 0x43 is 67, then its location code is 3667. In the comparison table, it is pronounced as 'n'
*/
Static char convert (byte [] bytes ){
Char result = '-';
Int secposvalue = 0;
Int I;
For (I = 0; I <bytes. length; I ++ ){
Bytes [I]-= gb_sp_diff;
}
Secposvalue = bytes [0] * 100 + bytes [1];
For (I = 0; I <23; I ++ ){
If (secposvalue> = secposvaluelist [I] & secposvalue <secposvaluelist [I + 1]) {
Result = firstletter [I];
Break;
}
}
Return result;
}
Public static void main (string [] ARGs ){
System. Out. println (stringutil. getfirstletter ("I love u "));
System. Out. println (stringutil. getfirstletter ("I Love Tiananmen Square "));
System. Out. println (stringutil. getfirstletter ("I love Beijing Tiananmen "));
}
}
Running output:
I love u
Wabjtam
I love bjtam