Use pinyin4j. jar to convert Chinese characters to PinYin

Source: Internet
Author: User

You can use pinyin4j. jar to convert Chinese characters to pinyin. The Code is as follows:

[Java]
View plaincopyprint?
  1. Package mainapp;
  2. Import net. SourceForge. pinyin4j. pinyinhelper;
  3. Import net. SourceForge. pinyin4j. format. hanyupinyincasetype;
  4. Import net. SourceForge. pinyin4j. format. hanyupinyinoutputformat;
  5. Import net. SourceForge. pinyin4j. format. hanyupinyintonetype;
  6. Import net. SourceForge. pinyin4j. format. hanyupinyinvchartype;
  7. Import net. SourceForge. pinyin4j. format. Exception. badhanyupinyinoutputformatcombination;
  8. Public class testpinyin4j
  9. {
  10. /**
  11. * Convert Chinese characters to full spelling
  12. *
  13. * @ Param SRC
  14. * @ Return string
  15. */
  16. Public static string getpinyin (string SRC)
  17. {
  18. Char [] T1 = NULL;
  19. T1 = SRC. tochararray ();
  20. String [] T2 = new string [t1.length];
  21. // Set the output format of Chinese characters and Pinyin
  22. Hanyupinyinoutputformat T3 = new hanyupinyinoutputformat ();
  23. T3.setcasetype (hanyupinyincasetype. lowercase );
  24. T3.settonetype (hanyupinyintonetype. without_tone );
  25. T3.setvchartype (hanyupinyinvchartype. with_v );
  26. String t4 = "";
  27. Int T0 = t1.length;
  28. Try
  29. {
  30. For (INT I = 0; I <t0; I ++)
  31. {
  32. // Determine whether it can be a Chinese character
  33. // System. Out. println (T1 [I]);
  34. If (character. tostring (T1 [I]). Matches ("[// u4e00-// u9fa5] + "))
  35. {
  36. T2 = pinyinhelper. tohanyupinyinstringarray (T1 [I], T3); // save all the Chinese characters to the T2 array.
  37. T4 + = t2 [0]; // obtain the first pronunciation of the Chinese character and connect it to the string T4.
  38. }
  39. Else
  40. {
  41. // If it is not a Chinese character, indirectly retrieve the character and connect it to the string T4
  42. T4 + = character. tostring (T1 [I]);
  43. }
  44. }
  45. }
  46. Catch (badhanyupinyinoutputformatcombination E)
  47. {
  48. E. printstacktrace ();
  49. }
  50. Return T4;
  51. }
  52. /**
  53. * Extract the first letter of each Chinese Character
  54. *
  55. * @ Param Str
  56. * @ Return string
  57. */
  58. Public static string getpinyinheadchar (string Str)
  59. {
  60. String convert = "";
  61. For (Int J = 0; j <Str. Length (); j ++)
  62. {
  63. Char word = Str. charat (j );
  64. // Extract the first letter of a Chinese character
  65. String [] pinyinarray = pinyinhelper. tohanyupinyinstringarray (Word );
  66. If (pinyinarray! = NULL)
  67. {
  68. Convert + = pinyinarray [0]. charat (0 );
  69. }
  70. Else
  71. {
  72. Convert + = word;
  73. }
  74. }
  75. Return convert;
  76. }
  77. /**
  78. * Convert a string to an ascii code.
  79. *
  80. * @ Param cnstr
  81. * @ Return string
  82. */
  83. Public static string getcnascii (string cnstr)
  84. {
  85. Stringbuffer strbuf = new stringbuffer ();
  86. // Convert a string to a byte sequence
  87. Byte [] bgbk = cnstr. getbytes ();
  88. For (INT I = 0; I <bgbk. length; I ++)
  89. {
  90. // Convert each character to an ascii code
  91. Strbuf. append (integer. tohexstring (bgbk [I] & 0xff ));
  92. }
  93. Return strbuf. tostring ();
  94. }
  95. Public static void main (string [] ARGs)
  96. {
  97. String cnstr = "Chen ";
  98. System. Out. println (getpinyin (cnstr ));
  99. System. Out. println (getpinyinheadchar (cnstr ));
  100. System. Out. println (getcnascii (cnstr ));
  101. }
  102. }

You can search for "pinyin4j. Jar" from the "resource channel" of csdn to find related resources and introduce them to the project.

Pinyin4j can obtain pinyin corresponding to Chinese characters, and its format can be customized.

Features:

1. Support for multiple pinyin systems:

Chinese pinyin
General pinyin
Weishi pinyin
Phonetic transcription
Yali
Mandarin and Rome
2. Support for polyphonic words

The correct pinyin can be obtained based on the context.

3. Support for multiple pinyin formats

Case sensitivity
Supports Unicode U, V, and u
Supports digital tones (lü3), tone symbols (lüs), and non-tone marks (lüs)

From: http://blog.csdn.net/ziwen00/article/details/5436123

You can use pinyin4j. jar to convert Chinese characters to pinyin. The Code is as follows:

[Java]
View plaincopyprint?
  1. Package mainapp;
  2. Import net. SourceForge. pinyin4j. pinyinhelper;
  3. Import net. SourceForge. pinyin4j. format. hanyupinyincasetype;
  4. Import net. SourceForge. pinyin4j. format. hanyupinyinoutputformat;
  5. Import net. SourceForge. pinyin4j. format. hanyupinyintonetype;
  6. Import net. SourceForge. pinyin4j. format. hanyupinyinvchartype;
  7. Import net. SourceForge. pinyin4j. format. Exception. badhanyupinyinoutputformatcombination;
  8. Public class testpinyin4j
  9. {
  10. /**
  11. * Convert Chinese characters to full spelling
  12. *
  13. * @ Param SRC
  14. * @ Return string
  15. */
  16. Public static string getpinyin (string SRC)
  17. {
  18. Char [] T1 = NULL;
  19. T1 = SRC. tochararray ();
  20. String [] T2 = new string [t1.length];
  21. // Set the output format of Chinese characters and Pinyin
  22. Hanyupinyinoutputformat T3 = new hanyupinyinoutputformat ();
  23. T3.setcasetype (hanyupinyincasetype. lowercase );
  24. T3.settonetype (hanyupinyintonetype. without_tone );
  25. T3.setvchartype (hanyupinyinvchartype. with_v );
  26. String t4 = "";
  27. Int T0 = t1.length;
  28. Try
  29. {
  30. For (INT I = 0; I <t0; I ++)
  31. {
  32. // Determine whether it can be a Chinese character
  33. // System. Out. println (T1 [I]);
  34. If (character. tostring (T1 [I]). Matches ("[// u4e00-// u9fa5] + "))
  35. {
  36. T2 = pinyinhelper. tohanyupinyinstringarray (T1 [I], T3); // save all the Chinese characters to the T2 array.
  37. T4 + = t2 [0]; // obtain the first pronunciation of the Chinese character and connect it to the string T4.
  38. }
  39. Else
  40. {
  41. // If it is not a Chinese character, indirectly retrieve the character and connect it to the string T4
  42. T4 + = character. tostring (T1 [I]);
  43. }
  44. }
  45. }
  46. Catch (badhanyupinyinoutputformatcombination E)
  47. {
  48. E. printstacktrace ();
  49. }
  50. Return T4;
  51. }
  52. /**
  53. * Extract the first letter of each Chinese Character
  54. *
  55. * @ Param Str
  56. * @ Return string
  57. */
  58. Public static string getpinyinheadchar (string Str)
  59. {
  60. String convert = "";
  61. For (Int J = 0; j <Str. Length (); j ++)
  62. {
  63. Char word = Str. charat (j );
  64. // Extract the first letter of a Chinese character
  65. String [] pinyinarray = pinyinhelper. tohanyupinyinstringarray (Word );
  66. If (pinyinarray! = NULL)
  67. {
  68. Convert + = pinyinarray [0]. charat (0 );
  69. }
  70. Else
  71. {
  72. Convert + = word;
  73. }
  74. }
  75. Return convert;
  76. }
  77. /**
  78. * Convert a string to an ascii code.
  79. *
  80. * @ Param cnstr
  81. * @ Return string
  82. */
  83. Public static string getcnascii (string cnstr)
  84. {
  85. Stringbuffer strbuf = new stringbuffer ();
  86. // Convert a string to a byte sequence
  87. Byte [] bgbk = cnstr. getbytes ();
  88. For (INT I = 0; I <bgbk. length; I ++)
  89. {
  90. // Convert each character to an ascii code
  91. Strbuf. append (integer. tohexstring (bgbk [I] & 0xff ));
  92. }
  93. Return strbuf. tostring ();
  94. }
  95. Public static void main (string [] ARGs)
  96. {
  97. String cnstr = "Chen ";
  98. System. Out. println (getpinyin (cnstr ));
  99. System. Out. println (getpinyinheadchar (cnstr ));
  100. System. Out. println (getcnascii (cnstr ));
  101. }
  102. }

You can search for "pinyin4j. Jar" from the "resource channel" of csdn to find related resources and introduce them to the project.

Pinyin4j can obtain pinyin corresponding to Chinese characters, and its format can be customized.

Features:

1. Support for multiple pinyin systems:

Chinese pinyin
General pinyin
Weishi pinyin
Phonetic transcription
Yali
Mandarin and Rome
2. Support for polyphonic words

The correct pinyin can be obtained based on the context.

3. Support for multiple pinyin formats

Case sensitivity
Supports Unicode U, V, and u
Supports digital tones (lü3), tone symbols (lüs), and non-tone marks (lüs)

From: http://blog.csdn.net/ziwen00/article/details/5436123

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.