Java's perfect method for judging Chinese characters, and java's method for judging Chinese Characters
Java judges whether a string contains Chinese characters. Generally, it uses the Regular Expression of Unicode encoding (CJK Unified Chinese character encoding range: 0x4e00-0x9fbb) to determine whether a string contains Chinese characters. However, in fact, this interval is not very accurate, because some Chinese Punctuation Marks include :,. And so on.
The following is a Comprehensive Judgment Method: CharUtil. java
Copy codeThe Code is as follows:
Import java. util. regex. Pattern;
Public class CharUtil {
Public static void main (String [] args ){
String [] strArr = new String [] {"www.micmiu.com ","! @ # $ % ^ & * () _ + {} [] | \"'? /:; <> ,.","! ¥ ...... () --:; ",.? , "," No "," Han Jia Ren ","??? "};
For (String str: strArr ){
System. out. println ("============> test string:" + str );
System. out. println ("Regular Expression judgment result:" + isChineseByREG (str) + "--" + isChineseByName (str ));
System. out. println ("Unicode judgment result:" + isChinese (str ));
System. out. println ("detailed judgment list :");
Char [] ch = str. toCharArray ();
For (int I = 0; I <ch. length; I ++ ){
Char c = ch [I];
System. out. println (c + "-->" + (isChinese (c )? "Yes": "no "));
}
}
}
// Perfect Chinese characters and Symbols Based on Unicode encoding
Private static boolean isChinese (char c ){
Character. UnicodeBlock ub = Character. UnicodeBlock. of (c );
If (ub = Character. UnicodeBlock. cjk_uniied_ideographs | ub = Character. UnicodeBlock. CJK_COMPATIBILITY_IDEOGRAPHS
| Ub = Character. UnicodeBlock. cjk_uniied_ideographs_extension_a | ub = Character. UnicodeBlock. cjk_uniied_ideographs_extension_ B
| Ub = Character. UnicodeBlock. CJK_SYMBOLS_AND_PUNCTUATION | ub = Character. UnicodeBlock. HALFWIDTH_AND_FULLWIDTH_FORMS
| Ub = Character. UnicodeBlock. GENERAL_PUNCTUATION ){
Return true;
}
Return false;
}
// Complete judgment of Chinese characters and symbols
Public static boolean isChinese (String strName ){
Char [] ch = strName. toCharArray ();
For (int I = 0; I <ch. length; I ++ ){
Char c = ch [I];
If (isChinese (c )){
Return true;
}
}
Return false;
}
// Only some CJK characters can be judged (CJK Unified Chinese characters)
Public static boolean isChineseByREG (String str ){
If (str = null ){
Return false;
}
Pattern pattern = Pattern. compile ("[\ u4E00-\ u9FBF] + ");
Return pattern. matcher (str. trim (). find ();
}
// Only some CJK characters can be judged (CJK Unified Chinese characters)
Public static boolean isChineseByName (String str ){
If (str = null ){
Return false;
}
// Different cases: \ p indicates inclusion, \ P indicates not inclusion
// \ P {Cn} indicates the encoding of undefined characters in Unicode. \ P {Cn} indicates the encoding of The Defined Characters in Unicode.
String reg = "\ p {InCJK uniied Ideographs} & \ P {Cn }";
Pattern pattern = Pattern. compile (reg );
Return pattern. matcher (str. trim (). find ();
}
}