Recently, in order to increase the matching rate of queries, the CMS system needs to add pinyin search fields. On the internet, it found the java open-source class library of pinyin4j, which provides conversion of Chinese to Chinese pinyin (and supports polyphonic words, I looked at his demo and decided to use it. Because in actual use, I need to consider the different arrangement and combination of polyphonic words. The following Code supports polyphonic words.
[Java]
Import java. util. HashSet;
Import java. util. Set;
Import net. sourceforge. pinyin4j. PinyinHelper;
Import net. sourceforge. pinyin4j. format. HanyuPinyinCaseType;
Import net. sourceforge. pinyin4j. format. HanyuPinyinOutputFormat;
Import net. sourceforge. pinyin4j. format. HanyuPinyinToneType;
Import net. sourceforge. pinyin4j. format. HanyuPinyinVCharType;
Import net. sourceforge. pinyin4j. format. exception. BadHanyuPinyinOutputFormatCombination;
Public class pinyin4j {
/**
* String set conversion string (separated by commas)
* @ Author wyh
* @ Param stringSet
* @ Return
*/
Public static String makeStringByStringSet (Set <String> stringSet ){
StringBuilder str = new StringBuilder ();
Int I = 0;
For (String s: stringSet ){
If (I = stringSet. size ()-1 ){
Str. append (s );
} Else {
Str. append (s + ",");
}
I ++;
}
Return str. toString (). toLowerCase ();
}
/**
* Obtain the pinyin set
* @ Author wyh
* @ Param src
* @ Return Set <String>
*/
Public static Set <String> getPinyin (String src ){
If (src! = Null &&! Src. trim (). inclusignorecase ("")){
Char [] srcChar;
SrcChar = src. toCharArray ();
// Output class in Chinese pinyin format
HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat ();
// Output settings, Case sensitivity, and phonetic notation
HanYuPinOutputFormat. setCaseType (HanyuPinyinCaseType. LOWERCASE );
HanYuPinOutputFormat. setToneType (HanyuPinyinToneType. WITHOUT_TONE)
HanYuPinOutputFormat. setVCharType (HanyuPinyinVCharType. WITH_V );
String [] [] temp = new String [src. length ()] [];
For (int I = 0; I <srcChar. length; I ++ ){
Char c = srcChar [I];
// Convert Chinese or a-z or A-Z to PinYin (my requirement is to retain Chinese or a-z or A-Z)
If (String. valueOf (c). matches ("[\ u4E00-\ u9FA5] + ")){
Try {
Temp [I] = PinyinHelper. toHanyuPinyinStringArray (srcChar [I], hanYuPinOutputFormat );
} Catch (BadHanyuPinyinOutputFormatCombination e ){
E. printStackTrace ();
}
} Else if (int) c >=65 & (int) c <= 90) | (int) c >=97 & (int) c <= 122 )){
Temp [I] = new String [] {String. valueOf (srcChar [I])};
} Else {
Temp [I] = new String [] {""};
}
}
String [] pingyinArray = Exchange (temp );
Set <String> pinyinSet = new HashSet <String> ();
For (int I = 0; I <pingyinArray. length; I ++ ){
PinyinSet. add (pingyinArray [I]);
}
Return pinyinSet;
}
Return null;
}
/**
* Recursion
* @ Author wyh
* @ Param strJaggedArray
* @ Return
*/
Public static String [] Exchange (String [] [] strJaggedArray ){
String [] [] temp = DoExchange (strJaggedArray );
Return temp [0];
}
/**
* Recursion
* @ Author wyh
* @ Param strJaggedArray
* @ Return
*/
Private static String [] [] DoExchange (String [] [] strJaggedArray ){
Int len = strJaggedArray. length;
If (len> = 2 ){
Int len1 = strJaggedArray [0]. length;
Int len2 = strJaggedArray [1]. length;
Int newlen = len1 * len2;
String [] temp = new String [newlen];
Int Index = 0;
For (int I = 0; I <len1; I ++ ){
For (int j = 0; j <len2; j ++ ){
Temp [Index] = strJaggedArray [0] [I] + strJaggedArray [1] [j];
Index ++;
}
}
String [] [] newArray = new String [len-1] [];
For (int I = 2; I <len; I ++ ){
NewArray [I-1] = strJaggedArray [I];
}
NewArray [0] = temp;
Return DoExchange (newArray );
} Else {
Return strJaggedArray;
}
}
/**
* @ Param args
*/
Public static void main (String [] args ){
String str = "Shan tianfang ";
System. out. println (makeStringByStringSet (getPinyin (str )));
}
}