How do I make the string length of JS statistics consistent with PHP? The function code here is only for characters encoded under GBK, and one Chinese character is equal to two characters. The code is as follows:
code is as follows |
copy code |
function strlen (str) { var s = 0; for (var i = 0; i < str.length; i++) { if (Str.charat (i). Match (/[u0391-uffe5]/)) { s + + 2; } else { s++; } } return s } |
Grab out each character, match full-width characters and Chinese characters, then count 2 characters, others count 1 characters.
code is as follows |
copy code |
<script> Alert (fucchecklength (" China a ")); Function Fucchecklength (strtemp) { var i,sum; sum=0; for (i=0;i<strtemp.length;i++) { if ((Strtemp.charcodeat (i) >=0) && (Strtemp.charcodeat (i) <=255)) sum=sum+1; else sum= sum+2; } return sum; } < /script> |
Will get the result: 5
How long does it take to get the byte? Note the difference between the byte and the character. The byte length is related to the encoding, such as "China a", the gbk/gb2312 encoding is 5 bytes, but if it is utf-8, it is 7 bytes (utf-8 usually a kanji of 3 bytes).
We can convert all characters to GBK, instance
The code is as follows |
Copy Code |
function Utf8tounicode (STRUTF8) { var BSTR = ""; var ntotalchars = strutf8.length; Total chars to be processed. var noffset = 0; Processing point on StrUtf8 var nremainingbytes = Ntotalchars; How many bytes left to be converted var noutputposition = 0; var icode, ICode1, ICode2; The value of the Unicode. while (Noffset < ntotalchars) { Icode = Strutf8.charcodeat (Noffset); if ((Icode & 0x80) = = 0)//1 byte. { if (Nremainingbytes < 1)//Not enough data Break BSTR + + String.fromCharCode (Icode & 0x7F); Noffset + +; Nremainingbytes-= 1; } else if ((Icode & 0xe0) = = 0xc0)//2 bytes { ICode1 = strutf8.charcodeat (Noffset + 1); if (Nremainingbytes < 2 | | Not enough data (ICode1 & 0xc0)!= 0x80)//Invalid pattern { Break } BSTR + + String.fromCharCode ((Icode & 0x3F) << 6) | (ICode1 & 0x3F)); Noffset + 2; Nremainingbytes-= 2; } else if ((Icode & 0xF0) = = 0xe0)//3 bytes { ICode1 = strutf8.charcodeat (Noffset + 1); ICode2 = strutf8.charcodeat (Noffset + 2); if (Nremainingbytes < 3 | | Not enough data (ICode1 & 0xc0)!= 0x80 | | Invalid pattern (ICode2 & 0xc0)!= 0x80) { Break } BSTR + + String.fromCharCode ((Icode & 0x0f) << 12) | ((ICode1 & 0x3F) << 6) | (ICode2 & 0x3F)); Noffset + 3; Nremainingbytes-= 3; } else//4 or more bytes-unsupported Break } if (nremainingbytes!= 0) { Bad UTF8 string. Return ""; } Return BSTR; } |