String Truncation
Keywords: Java string Chinese interception stringtrimutils, use chararray, Java uses Unicode internally, do not care about Encoding
Java code
- /**
- * @ Author sunrie
- *
- */
- Public class stringtrimutils {
- /**
- * The length of a part of a string (Chinese, Japanese, and Korean characters are 2), which is not distinguished between Chinese and English. If the number is not correct, the length should be less than one character bit.
- *
- * @ Param STR original string
- * @ Param specialcharslength (the length of Chinese, Japanese, and Korean characters is 2)
- * @ Return
- */
- Public static string trim (string STR, int specialcharslength ){
- If (STR = NULL | "". Equals (STR) | specialcharslength <1 ){
- Return "";
- }
- Char [] chars = Str. tochararray ();
- Int charslength = getcharslength (chars, specialcharslength );
- Return new string (chars, 0, charslength );
- }
- /**
- * Get the length of a segment of characters. The length of Chinese, Japanese, and Korean characters in the input length is 2, and the length of all characters in the output length is 1.
- * @ Param chars a string of characters
- * @ Param specialcharslength: the input length. The length of Chinese, Japanese, and Korean characters is 2.
- * @ Return: the output length. All characters are 1 characters in length.
- */
- Private Static int getcharslength (char [] chars, int specialcharslength ){
- Int COUNT = 0;
- Int normalcharslength = 0;
- For (INT I = 0; I <chars. length; I ++ ){
- Int specialcharlength = getspecialcharlength (chars [I]);
- If (count <= specialcharslength-specialcharlength ){
- Count + = specialcharlength;
- Normalcharslength ++;
- } Else {
- Break;
- }
- }
- Return normalcharslength;
- }
- /**
- * Length of the obtained characters: The Chinese, Japanese, and Korean characters are 2 characters in length, and the ASCII code and other characters are 1 characters in length.
- * @ Param C character
- * @ Return character Length
- */
- Private Static int getspecialcharlength (char c ){
- If (isletter (c )){
- Return 1;
- } Else {
- Return 2;
- }
- }
- /**
- * Determines whether a character is an ascill character or other characters (such as Chinese, Japanese, or Korean)
- *
- * @ Param char C, the character to be judged
- * @ Return Boolean: returns the true and ascill characters.
- */
- Private Static Boolean isletter (char c ){
- Int K = 0x80;
- Return C/k = 0? True: false;
- }
- }
Substring, the code found on the Internet, with GBK processing added, there is no problem in UTF-8 and Other encoding states
Java code
- Import java. Io. unsupportedencodingexception;
- Public class substring {
- /**
- * Determines whether a character is an ascill character or other characters (such as Chinese, Japanese, or Korean)
- *
- * @ Param C: The characters to be judged
- * @ Return returns true and ascill characters.
- */
- Public static Boolean isletter (char c ){
- Int K = 0x80;
- Return C/k = 0? True: false;
- }
- /**
- * Obtain the length of a string. The length of a Chinese character or Japanese/Korean character is 2, and the length of an English character is 1.
- *
- * @ Param s the string to be obtained.
- * @ Return I returns the string length.
- */
- Public static int length (string s ){
- If (S = NULL)
- Return 0;
- Char [] C = S. tochararray ();
- Int Len = 0;
- For (INT I = 0; I <C. length; I ++ ){
- Len ++;
- If (! Isletter (C [I]) {
- Len ++;
- }
- }
- Return Len;
- }
- /**
- * The length of a part of a character is not distinguished between Chinese and English characters. If the number is incorrect, the length of a part is equal to one character bit.
- *
- *
- * @ Param origin original string
- * @ Param Len truncation length (the length of a Chinese character is calculated as 2)
- * @ Param C suffix
- * @ Return returns a string
- */
- Public static string substring (string origin, int Len, string c ){
- If (origin = NULL | origin. Equals ("") | Len <1)
- Return "";
- Byte [] strbyte = new byte [Len];
- If (LEN> length (origin )){
- Return origin + C;
- }
- Try {
- System. arraycopy (origin. getbytes ("GBK"), 0, strbyte, 0, Len );
- Int COUNT = 0;
- For (INT I = 0; I <Len; I ++ ){
- Int value = (INT) strbyte [I];
- If (value <0 ){
- Count ++;
- }
- }
- If (count % 2! = 0 ){
- Len = (LEN = 1 )? ++ Len: -- Len;
- }
- Return new string (strbyte, 0, Len, "GBK") + C;
- } Catch (unsupportedencodingexception e ){
- Throw new runtimeexception (E );
- }
- }
- }
I found it online today at http://sunrie.javaeye.com/blog/115608.