C # Judging Chinese Character favorites
Method 1: In a unicode string, the range of Chinese characters is 4e00 .. 9fff: CJK uniied ideographs. Determine whether the character is Chinese by determining the Unicode encoding of the character.ProgramCode
View plaincopy to clipboardprint?
<PRE class = CSHARP name = "code"> protected bool ischineseletter (string input, int index)
{
Int code = 0;
Int chfrom = convert. toint32 ("4e00", 16); // range (0x4e00 ~ 0x9fff) to int (chfrom ~ Chend)
Int chend = convert. toint32 ("9fff", 16 );
If (input! = "")
{
Code = Char. converttoutf32 (input, index); // obtain the Unicode encoding of the character at the specified index in the string input.
If (code> = chfrom & Code <= chend)
{
Return true; // return true if the code is within the Chinese range
}
Else
{
Return false; // if the code is not within the Chinese range, false is returned.
}
}
Return false;
} </PRE>
View plaincopy to clipboardprint? Protected bool ischineseletter (string input, int index) {int code = 0; int chfrom = convert. toint32 ("4e00", 16); // range (0x4e00 ~ 0x9fff) to int (chfrom ~ Chend) int chend = convert. toint32 ("9fff", 16); If (input! = "") {Code = char. converttoutf32 (input, index); // obtain the Unicode encoding if (code> = chfrom & Code <= chend) {return true; // if the code returns true within the Chinese range} else {return false; // if the code is not within the Chinese range, return false} return false;} protected bool ischineseletter (string input, int index)
{
Int code = 0;
Int chfrom = convert. toint32 ("4e00", 16); // range (0x4e00 ~ 0x9fff) to int (chfrom ~ Chend)
Int chend = convert. toint32 ("9fff", 16 );
If (input! = "")
{
Code = Char. converttoutf32 (input, index); // obtain the Unicode encoding of the character at the specified index in the string input.
If (code> = chfrom & Code <= chend)
{
Return true; // return true if the code is within the Chinese range
}
Else
{
Return false; // if the code is not within the Chinese range, false is returned.
}
}
Return false;
}
Method 2: program code
View plaincopy to clipboardprint?
Public bool ischina (string cstring)
{
Bool boolvalue = false;
For (INT I = 0; I <cstring. length; I ++)
{
If (convert. toint32 (convert. tochar (cstring. substring (I, 1) <convert. toint32 (convert. tochar (128 )))
{
Boolvalue = false;
}
Else
{
Return boolvalue = true;
}
}
Return boolvalue;
}
Public bool ischina (string cstring)
{
Bool boolvalue = false;
For (INT I = 0; I <cstring. length; I ++)
{
If (convert. toint32 (convert. tochar (cstring. substring (I, 1) <convert. toint32 (convert. tochar (128 )))
{
Boolvalue = false;
}
Else
{
Return boolvalue = true;
}
}
Return boolvalue;
}
Method 3: program code
View plaincopy to clipboardprint?
/// <Summary>
/// Determine whether a sentence contains Chinese Characters
/// </Summary>
/// <Param> string </param>
Public bool wordsiscn (string words)
{
String tmmp;
For (INT I = 0; I <words. length; I ++)
{
Tmmp = words. substring (I, 1 );
Byte [] Sarr = system. Text. encoding. getencoding ("gb2312"). getbytes (tmmp );
If (Sarr. Length = 2)
{
Return true;
}
}
Return false;
}
/// <Summary>
/// Determine whether a sentence contains Chinese Characters
/// </Summary>
/// <Param> string </param>
Public bool wordsiscn (string words)
{
String tmmp;
For (INT I = 0; I <words. length; I ++)
{
Tmmp = words. substring (I, 1 );
Byte [] Sarr = system. Text. encoding. getencoding ("gb2312"). getbytes (tmmp );
If (Sarr. Length = 2)
{
Return true;
}
}
Return false;
} View plaincopy to clipboardprint?
<Strong> Method 4: </strong> program code
Method 4: View plaincopy to clipboardprint?
for (INT I = 0; I
{< br>
RegEx RX = new RegEx ("^ [\ u4e00-\ u9fa5] ___ fckpd ___ 4 quot ;);
If (RX. ismatch (s [I])
// Yes
else
// No
}
View plaincopy to clipboardprint? For (INT I = 0; I <S. length; I ++) {RegEx RX = new RegEx ("^ [\ u4e00-\ u9fa5] ___ fckpd ___ 4 quot;); If (RX. ismatch (s [I]) // is else // No} For (INT I = 0; I <S. length; I ++)
{
RegEx RX = new RegEx ("^ [\ u4e00-\ u9fa5] ___ fckpd ___ 4 quot ;);
If (RX. ismatch (s [I])
// Yes
Else
// No
}< br> \ u4e00-\ u9fa5 Chinese character range. ^ [\ U4e00-\ u9fa5] $ Regular Expression in the range of Chinese characters
Similarly, you can determine that Japanese: \ u0x3040-\ u0x309f is a hirakana, \ u0x30a0-\ u0x30ff is a katakana
Method 5: program code
View plaincopy to clipboardprint?
unicodeencoding = new unicodeencoding ();
byte [] unicodebytearray = unicodeencoding. getbytes (inputstring);
for (INT I = 0; I
{< br>
I ++;
// if it is a Chinese character, the high level is not 0
If (unicodebytearray [I]! = 0)
{< br>
}< br>
......
Unicodeencoding = new unicodeencoding ();
Byte [] unicodebytearray = unicodeencoding. getbytes (inputstring );
For (INT I = 0; I <unicodebytearray. length; I ++)
{
I ++;
// If it is a Chinese character, the high level is not 0
If (unicodebytearray [I]! = 0)
{
}
...... Method 6: program code
View plaincopy to clipboardprint?
/// <Summary>
/// Specify a string to determine whether it contains only Chinese Characters
/// </Summary>
/// <Param name = "teststr"> </param>
/// <Returns> </returns>
Public bool isonlycontainschinese (string teststr)
{
Char [] words = teststr. tochararray ();
Foreach (char word in words)
{
If (isgbcode (word. tostring () | isgbkcode (word. tostring () // It is a gb2312 or GBK Chinese Word
{
Continue;
}
Else
{
Return false;
}
}
Return true;
}
/// <Summary>
/// Determine whether a word is gb2312 Encoded chinese characters
/// </Summary>
/// <Param name = "word"> </param>
/// <Returns> </returns>
Private bool isgbcode (string word)
{
Byte [] bytes = encoding. getencoding ("gb2312"). getbytes (Word );
If (bytes. Length <= 1) // if there is only one byte, It is ASCII code or other code
{
Return false;
}
Else
{
Byte byte1 = bytes [0];
Byte byte2 = bytes [1];
If (byte1> = 176 & byte1 <= 247 & byte2> = 160 & byte2 <= 254) // determine whether it is gb2312
{
Return true;
}
Else
{
Return false;
}
}
}
/// <Summary>
/// Determine whether a word is a GBK-Encoded chinese character
/// </Summary>
/// <Param name = "word"> </param>
/// <Returns> </returns>
Private bool isgbkcode (string word)
{
Byte [] bytes = encoding. getencoding ("GBK"). getbytes (word. tostring ());
If (bytes. Length <= 1) // if there is only one byte, It is ASCII code
{
Return false;
}
Else
{
Byte byte1 = bytes [0];
Byte byte2 = bytes [1];
If (byte1> = 129 & byte1 <= 254 & byte2> = 64 & byte2 <= 254) // determine whether the code is GBK.
{
Return true;
}
Else
{
Return false;
}
}
}
/// <Summary>
/// Determine whether a word is a big5-Encoded chinese character
/// </Summary>
/// <Param name = "word"> </param>
/// <Returns> </returns>
Private bool isbig5code (string word)
{
Byte [] bytes = encoding. getencoding ("big5"). getbytes (word. tostring ());
If (bytes. Length <= 1) // if there is only one byte, It is ASCII code
{
Return false;
}
Else
{
Byte byte1 = bytes [0];
Byte byte2 = bytes [1];
If (byte1> = 129 & byte1 <= 254) & (byte2> = 64 & byte2 <= 126) | (byte2> = 161 & byte2 <= 254) // checks whether the code is big5.
{
Return true;
}
Else
{
Return false;
}
}
}
This article from the csdn blog, reproduced please indicate the source: http://blog.csdn.net/yur505/archive/2008/06/17/2557211.aspx