Use of pinyin4j and pinyin4j
Pinyin4j is a powerful Chinese pinyin toolkit. It mainly obtains pinyin in various formats and needs from Chinese. It has powerful functions. Let's take a look at how to use pinyin4j.
Import java. util. HashSet;
Import java. util. Set;
Import net. sourceforge. pinyin4j. PinyinHelper;
Import net. sourceforge. pinyin4j. format. HanyuPinyinCaseType;
Import net. sourceforge. pinyin4j. format. HanyuPinyinOutputFormat;
Import net. sourceforge. pinyin4j. format. HanyuPinyinToneType;
Import net. sourceforge. pinyin4j. format. HanyuPinyinVCharType;
Import net. sourceforge. pinyin4j. format. exception. BadHanyuPinyinOutputFormatCombination;
Public class PingYingChange {
/**
* Obtain the first letter of the Chinese character string, which is the same as the English character
*
* @ Param chinese character string
* @ Return first letter of Chinese pinyin
*/
Public static String cn2FirstSpell (String chinese ){
StringBuffer pybf = new StringBuffer ();
Char [] arr = chinese. toCharArray ();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat ();
DefaultFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
DefaultFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE );
For (int I = 0; I <arr. length; I ++ ){
If (arr [I]> 128 ){
Try {
String [] _ t = PinyinHelper. toHanyuPinyinStringArray (arr [I], defaultFormat );
If (_ t! = Null ){
Pybf. append (_ t [0]. charAt (0 ));
}
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else {
Pybf. append (arr [I]);
}
}
Return pybf. toString (). replaceAll ("\ W", ""). trim ();
}
/**
* Obtain the Chinese character string pinyin, with English characters unchanged
*
* @ Param chinese character string
* @ Return Chinese pinyin
*/
Public static String cn2Spell (String chinese ){
StringBuffer pybf = new StringBuffer ();
Char [] arr = chinese. toCharArray ();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat ();
DefaultFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
DefaultFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE );
For (int I = 0; I <arr. length; I ++ ){
If (arr [I]> 128 ){
Try {
Pybf. append (PinyinHelper. toHanyuPinyinStringArray (arr [I], defaultFormat) [0]);
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else {
Pybf. append (arr [I]);
}
}
Return pybf. toString ();
}
Public static String cnToSpell (String chines ){
String pinyinName = "";
StringBuffer strbuf = new StringBuffer ();
Char [] nameChar = chines. toCharArray ();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat ();
DefaultFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
DefaultFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE );
For (int I = 0; I <nameChar. length; I ++ ){
Char name = quanbianban (nameChar [I]);
NameChar [I] = name;
If (128 <nameChar [I]) {
Try {
Strbuf. append (PinyinHelper. toHanyuPinyinStringArray (nameChar [I], defaultFormat) [0]. charAt (0 ));
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else {
Strbuf. append (nameChar [I]);
}
}
PinyinName = strbuf. toString ();
Return pinyinName;
}
Public static char quanbianban (char quan ){
Switch (quan ){
Case '0 ':
Return '0 ';
Case '1 ':
Return '1 ';
Case '2 ':
Return '2 ';
Case '3 ':
Return '3 ';
Case '4 ':
Return '4 ';
Case '5 ':
Return '5 ';
Case '6 ':
Return '6 ';
Case '7 ':
Return '7 ';
Case '8 ':
Return '8 ';
Case '9 ':
Return '9 ';
Default:
Return quan;
}
}
/**
* String set conversion string (separated by commas)
* @ Author wyh
* @ Param stringSet
* @ Return
*/
Public static String makeStringByStringSet (Set <String> stringSet ){
StringBuilder str = new StringBuilder ();
Int I = 0;
For (String s: stringSet ){
If (I = stringSet. size ()-1 ){
Str. append (s );
} Else {
Str. append (s + ",");
}
I ++;
}
Return str. toString (). toLowerCase ();
}
/**
* Obtain the pinyin set
* @ Author wyh
* @ Param src
* @ Return Set <String>
*/
Public static Set <String> getPinyin (String src ){
If (src! = Null &&! Src. trim (). inclusignorecase ("")){
Char [] srcChar;
SrcChar = src. toCharArray ();
// Output class in Chinese pinyin format
HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat ();
// Output settings, Case sensitivity, and phonetic notation
HanYuPinOutputFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
HanYuPinOutputFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE );
HanYuPinOutputFormat. setVCharType (HanyuPinyinVCharType. WITH_V );
String [] [] temp = new String [src. length ()] [];
For (int I = 0; I <srcChar. length; I ++ ){
Char c = srcChar [I];
// Convert Chinese or a-z or A-Z to PinYin (my requirement is to retain Chinese or a-z or A-Z)
If (String. valueOf (c). matches ("[\ u4E00-\ u9FA5] + ")){
Try {
Temp [I] = PinyinHelper. toHanyuPinyinStringArray (srcChar [I], hanYuPinOutputFormat );
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else if (int) c >=65 & (int) c <= 90) | (int) c >=97 & (int) c <= 122 )){
Temp [I] = new String [] {String. valueOf (srcChar [I])};
} Else {
Temp [I] = new String [] {""};
}
}
String [] pingyinArray = Exchange (temp );
Set <String> pinyinSet = new HashSet <String> ();
For (int I = 0; I <pingyinArray. length; I ++ ){
PinyinSet. add (pingyinArray [I]);
}
Return pinyinSet;
}
Return null;
}
/**
* Recursion
* @ Author wyh
* @ Param strJaggedArray
* @ Return
*/
Public static String [] Exchange (String [] [] strJaggedArray ){
String [] [] temp = DoExchange (strJaggedArray );
Return temp [0];
}
/**
* Recursion
* @ Author wyh
* @ Param strJaggedArray
* @ Return
*/
Private static String [] [] DoExchange (String [] [] strJaggedArray ){
Int len = strJaggedArray. length;
If (len> = 2 ){
Int len1 = strJaggedArray [0]. length;
Int len2 = strJaggedArray [1]. length;
Int newlen = len1 * len2;
String [] temp = new String [newlen];
Int Index = 0;
For (int I = 0; I <len1; I ++ ){
For (int j = 0; j <len2; j ++ ){
Temp [Index] = strJaggedArray [0] [I] + strJaggedArray [1] [j];
Index ++;
}
}
String [] [] newArray = new String [len-1] [];
For (int I = 2; I <len; I ++ ){
NewArray [I-1] = strJaggedArray [I];
}
NewArray [0] = temp;
Return DoExchange (newArray );
} Else {
Return strJaggedArray;
}
}
Public static void main (String [] args) throws Exception {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat ();
// UPPERCASE: upper case (ZHONG)
// LOWERCASE: LOWERCASE (zhong)
Format. setCaseType (HanyuPinyinCaseType. LOWERCASE );
// WITHOUT_TONE: No phonetic symbol (zhong)
// WITH_TONE_NUMBER: 1-4 digits indicating the English logo (zhong4)
// WITH_TONE_MARK: Use the phonetic symbol directly (WITH_U_UNICODE is required; otherwise, an exception occurs) (zh ng)
Format. setToneType (HanyuPinyinToneType. WITH_TONE_MARK );
// WITH_V: v represents U (nv)
// WITH_U_AND_COLON: Use "u:" to indicate u (nu :)
// WITH_U_UNICODE: directly use U (n u)
Format. setVCharType (HanyuPinyinVCharType. WITH_U_UNICODE );
String [] pinyin = PinyinHelper. toHanyuPinyinStringArray ('weight', format );
System. out. println (PinyinHelper. toHanyuPinyinStringArray ('weight', format) [1]);
System. out. println (PingYingChange. cnToSpell ("Zhenjiang abc "));
System. out. println (PingYingChange. quanbianban ('O '));
String x = "who told me that the student was born with an expert ";
System. out. println (cn2FirstSpell (x ));
System. out. println (cn2Spell (x ));
String str = "Shan tianfang ";
System. out. println (makeStringByStringSet (getPinyin (str )));
System. out. println ("");
}
}