Use of pinyin4j and pinyin4j

Source: Internet
Author: User

Use of pinyin4j and pinyin4j
Pinyin4j is a powerful Chinese pinyin toolkit. It mainly obtains pinyin in various formats and needs from Chinese. It has powerful functions. Let's take a look at how to use pinyin4j.

Import java. util. HashSet;
Import java. util. Set;

Import net. sourceforge. pinyin4j. PinyinHelper;
Import net. sourceforge. pinyin4j. format. HanyuPinyinCaseType;
Import net. sourceforge. pinyin4j. format. HanyuPinyinOutputFormat;
Import net. sourceforge. pinyin4j. format. HanyuPinyinToneType;
Import net. sourceforge. pinyin4j. format. HanyuPinyinVCharType;
Import net. sourceforge. pinyin4j. format. exception. BadHanyuPinyinOutputFormatCombination;

Public class PingYingChange {

/**
* Obtain the first letter of the Chinese character string, which is the same as the English character
*
* @ Param chinese character string
* @ Return first letter of Chinese pinyin
*/
Public static String cn2FirstSpell (String chinese ){
StringBuffer pybf = new StringBuffer ();
Char [] arr = chinese. toCharArray ();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat ();
DefaultFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
DefaultFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE );
For (int I = 0; I <arr. length; I ++ ){
If (arr [I]> 128 ){
Try {
String [] _ t = PinyinHelper. toHanyuPinyinStringArray (arr [I], defaultFormat );
If (_ t! = Null ){
Pybf. append (_ t [0]. charAt (0 ));
}
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else {
Pybf. append (arr [I]);
}
}
Return pybf. toString (). replaceAll ("\ W", ""). trim ();
}

/**
* Obtain the Chinese character string pinyin, with English characters unchanged
*
* @ Param chinese character string
* @ Return Chinese pinyin
*/
Public static String cn2Spell (String chinese ){
StringBuffer pybf = new StringBuffer ();
Char [] arr = chinese. toCharArray ();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat ();
DefaultFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
DefaultFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE );
For (int I = 0; I <arr. length; I ++ ){
If (arr [I]> 128 ){
Try {
Pybf. append (PinyinHelper. toHanyuPinyinStringArray (arr [I], defaultFormat) [0]);
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else {
Pybf. append (arr [I]);
}
}
Return pybf. toString ();
}
Public static String cnToSpell (String chines ){

String pinyinName = "";
StringBuffer strbuf = new StringBuffer ();
Char [] nameChar = chines. toCharArray ();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat ();
DefaultFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
DefaultFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE );
For (int I = 0; I <nameChar. length; I ++ ){
Char name = quanbianban (nameChar [I]);
NameChar [I] = name;
If (128 <nameChar [I]) {
Try {
Strbuf. append (PinyinHelper. toHanyuPinyinStringArray (nameChar [I], defaultFormat) [0]. charAt (0 ));
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else {
Strbuf. append (nameChar [I]);
}
}

PinyinName = strbuf. toString ();

Return pinyinName;
}

Public static char quanbianban (char quan ){
Switch (quan ){

Case '0 ':
Return '0 ';

Case '1 ':
Return '1 ';

Case '2 ':
Return '2 ';

Case '3 ':
Return '3 ';

Case '4 ':
Return '4 ';

Case '5 ':
Return '5 ';

Case '6 ':
Return '6 ';

Case '7 ':
Return '7 ';

Case '8 ':
Return '8 ';

Case '9 ':
Return '9 ';

Default:
Return quan;

}
}
/**
* String set conversion string (separated by commas)
* @ Author wyh
* @ Param stringSet
* @ Return
*/
Public static String makeStringByStringSet (Set <String> stringSet ){
StringBuilder str = new StringBuilder ();
Int I = 0;
For (String s: stringSet ){
If (I = stringSet. size ()-1 ){
Str. append (s );
} Else {
Str. append (s + ",");
}
I ++;
}
Return str. toString (). toLowerCase ();
}

/**
* Obtain the pinyin set
* @ Author wyh
* @ Param src
* @ Return Set <String>
*/
Public static Set <String> getPinyin (String src ){
If (src! = Null &&! Src. trim (). inclusignorecase ("")){
Char [] srcChar;
SrcChar = src. toCharArray ();
// Output class in Chinese pinyin format
HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat ();

// Output settings, Case sensitivity, and phonetic notation
HanYuPinOutputFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
HanYuPinOutputFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE );
HanYuPinOutputFormat. setVCharType (HanyuPinyinVCharType. WITH_V );

String [] [] temp = new String [src. length ()] [];
For (int I = 0; I <srcChar. length; I ++ ){
Char c = srcChar [I];
// Convert Chinese or a-z or A-Z to PinYin (my requirement is to retain Chinese or a-z or A-Z)
If (String. valueOf (c). matches ("[\ u4E00-\ u9FA5] + ")){
Try {
Temp [I] = PinyinHelper. toHanyuPinyinStringArray (srcChar [I], hanYuPinOutputFormat );
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else if (int) c >=65 & (int) c <= 90) | (int) c >=97 & (int) c <= 122 )){
Temp [I] = new String [] {String. valueOf (srcChar [I])};
} Else {
Temp [I] = new String [] {""};
}
}
String [] pingyinArray = Exchange (temp );
Set <String> pinyinSet = new HashSet <String> ();
For (int I = 0; I <pingyinArray. length; I ++ ){
PinyinSet. add (pingyinArray [I]);
}
Return pinyinSet;
}
Return null;
}

/**
* Recursion
* @ Author wyh
* @ Param strJaggedArray
* @ Return
*/
Public static String [] Exchange (String [] [] strJaggedArray ){
String [] [] temp = DoExchange (strJaggedArray );
Return temp [0];
}

/**
* Recursion
* @ Author wyh
* @ Param strJaggedArray
* @ Return
*/
Private static String [] [] DoExchange (String [] [] strJaggedArray ){
Int len = strJaggedArray. length;
If (len> = 2 ){
Int len1 = strJaggedArray [0]. length;
Int len2 = strJaggedArray [1]. length;
Int newlen = len1 * len2;
String [] temp = new String [newlen];
Int Index = 0;
For (int I = 0; I <len1; I ++ ){
For (int j = 0; j <len2; j ++ ){
Temp [Index] = strJaggedArray [0] [I] + strJaggedArray [1] [j];
Index ++;
}
}
String [] [] newArray = new String [len-1] [];
For (int I = 2; I <len; I ++ ){
NewArray [I-1] = strJaggedArray [I];
}
NewArray [0] = temp;
Return DoExchange (newArray );
} Else {
Return strJaggedArray;
}
}

Public static void main (String [] args) throws Exception {
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat ();

// UPPERCASE: upper case (ZHONG)
// LOWERCASE: LOWERCASE (zhong)
Format. setCaseType (HanyuPinyinCaseType. LOWERCASE );

// WITHOUT_TONE: No phonetic symbol (zhong)
// WITH_TONE_NUMBER: 1-4 digits indicating the English logo (zhong4)
// WITH_TONE_MARK: Use the phonetic symbol directly (WITH_U_UNICODE is required; otherwise, an exception occurs) (zh ng)
Format. setToneType (HanyuPinyinToneType. WITH_TONE_MARK );

// WITH_V: v represents U (nv)
// WITH_U_AND_COLON: Use "u:" to indicate u (nu :)
// WITH_U_UNICODE: directly use U (n u)
Format. setVCharType (HanyuPinyinVCharType. WITH_U_UNICODE );
String [] pinyin = PinyinHelper. toHanyuPinyinStringArray ('weight', format );
System. out. println (PinyinHelper. toHanyuPinyinStringArray ('weight', format) [1]);
System. out. println (PingYingChange. cnToSpell ("Zhenjiang abc "));
System. out. println (PingYingChange. quanbianban ('O '));
String x = "who told me that the student was born with an expert ";
System. out. println (cn2FirstSpell (x ));
System. out. println (cn2Spell (x ));
String str = "Shan tianfang ";
System. out. println (makeStringByStringSet (getPinyin (str )));
System. out. println ("");
}
}

 

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.