Citation: http://songjianyong.iteye.com/blog/1552973
Packagecn.com.songjy.test; Importjava.io.UnsupportedEncodingException; /*** Two ways to get string lengths in Java by byte **/ Public classApp { Public Static voidMain (string[] args)throwsunsupportedencodingexception {String str= "Only"; SYSTEM.OUT.PRINTLN (Getwordcount (str)); SYSTEM.OUT.PRINTLN (Getwordcountregex (str)); SYSTEM.OUT.PRINTLN (Getwordcountcode (str,"GBK")); SYSTEM.OUT.PRINTLN (Getwordcountcode (str,"UTF-8")); } /*because Java is Unicode-encoded, the length of a Chinese character is 1, not 2. * However, it is sometimes necessary to get the length of the string in byte units. For example, "123abc Great Wall" is calculated as a byte length of 10, while the length by Unicode is 8. * In order to get 10, you need to scan from scratch based on the ASCII character to get the specific length. If it is a standard character, the ASCII range is 0 to 255, and if it is a kanji or other full-width character, the ASCII will be greater than 255. * Therefore, you can write the following method to get the string length in bytes. */ Public Static intGetwordcount (String s) {intLength = 0; for(inti = 0; I < s.length (); i++) { intASCII =Character.codepointat (S, i); if(ASCII >= 0 && ASCII <=255) Length++; Elselength+ = 2; } returnlength; } /*The rationale is to replace all non-standard characters (double-byte characters) in a string with two standard characters (* *, or other). This allows you to directly use the length method to get the byte lengths of the string.*/ Public Static intGetwordcountregex (String s) {s= S.replaceall ("[^\\x00-\\xff]", "* *"); intLength =s.length (); returnlength; } /*get length in a specific encoding format*/ Public Static intGetwordcountcode (String str, string code)throwsunsupportedencodingexception{returnstr.getbytes (code). Length; } }
Java get String (Chinese) length by byte