JS strings are string objects, you can use the length property of a string object to get their lengths, but whether it is Chinese, full-width symbols, and the English minimum length unit is 1, this is not the same as PHP's strlen ().
Copy Code code as follows:
function strlen (str) {
var s = 0;
for (var i = 0; i < str.length; i++) {
if (Str.charat (i). Match (/[u0391-uffe5]/)) {
s + + 2;
} else {
s++;
}
}
return s;
}
Grab out each character, match full-width characters and Chinese characters, then count 2 characters, others count 1 characters.
Copy Code code as follows:
<script>
Alert (Fucchecklength ("China a"));
function Fucchecklength (strtemp)
{
var i,sum;
sum=0;
for (i=0;i<strtemp.length;i++)
{
if ((Strtemp.charcodeat (i) >=0) && (Strtemp.charcodeat (i) <=255))
sum=sum+1;
Else
sum=sum+2;
}
return sum;
}
</script>
The result is: 5 byte length to get? Note the difference between the byte and the character. The byte length is related to the encoding, such as "China a", the gbk/gb2312 encoding is 5 bytes, but if it is utf-8, it is 7 bytes (utf-8 usually a kanji of 3 bytes).
We can convert all characters to GBK, instance
Copy Code code as follows:
function Utf8tounicode (STRUTF8)
{
var BSTR = "";
var ntotalchars = strutf8.length; Total chars to be processed.
var noffset = 0; Processing point on StrUtf8
var nremainingbytes = Ntotalchars; How many bytes left to be converted
var noutputposition = 0;
var icode, ICode1, ICode2; The value of the Unicode.
while (Noffset < ntotalchars)
{
Icode = Strutf8.charcodeat (Noffset);
if ((Icode & 0x80) = = 0)//1 byte.
{
if (Nremainingbytes < 1)//Not enough data
Break
BSTR + + String.fromCharCode (Icode & 0x7F);
Noffset + +;
Nremainingbytes-= 1;
}
else if ((Icode & 0xe0) = = 0xc0)//2 bytes
{
ICode1 = strutf8.charcodeat (Noffset + 1);
if (Nremainingbytes < 2 | |//Not enough data
(ICode1 & 0xc0)!= 0x80)//Invalid pattern
{
Break
}
BSTR + + String.fromCharCode ((Icode & 0x3F) << 6) | (ICode1 & 0x3F));
Noffset + 2;
Nremainingbytes-= 2;
}
else if ((Icode & 0xF0) = = 0xe0)//3 bytes
{
ICode1 = strutf8.charcodeat (Noffset + 1);
ICode2 = strutf8.charcodeat (Noffset + 2);
if (Nremainingbytes < 3 | |//Not enough data
(ICode1 & 0xc0)!= 0x80 | | Invalid pattern
(ICode2 & 0xc0)!= 0x80)
{
Break
}
BSTR + + String.fromCharCode ((Icode & 0x0f) << 12) |
((ICode1 & 0x3F) << 6) |
(ICode2 & 0x3F));
Noffset + 3;
Nremainingbytes-= 3;
}
else//4 or more bytes-unsupported
Break
}
if (nremainingbytes!= 0)
{
Bad UTF8 string.
Return "";
}
Return BSTR;
}
How to solve this problem. This paper introduces the method of using JS to get the length of Chinese characters
First, we define a new function GetBytes () to get the number of bytes in the string, which in JavaScript is a standard function.
Copy Code code as follows:
String.prototype.getBytes = function () {
var cArr = This.match (/[^x00-xff]/ig);
return this.length + (CARR = null 0:carr.length);
}
function Paramcheck (cur) {
if (Cur.value.getBytes () > 64) {
Alert ("characters more than 64 characters");
return false;
}
return true;
}
GetBytes uses regular expressions to determine the number of characters in a string, including Chinese characters in the array Carr, so that the length of Carr is the total number of Chinese characters. The GetBytes method returns length plus the number of Chinese characters, which is the total number of bytes.
just use [^x00-xff], this is a bit disgusting, some special characters can also be matched to, such as}.
But if you use [^U4E00-U9FA5], you can't match it to Chinese ...
Here are a few other ways that you can test:
A:
Copy Code code as follows:
function _length (str) {
var len=0;
for (Var i=0;i<str.length;i++) {
if (Str.charat (i) > ' ~ ') {len+=2} else{len++;}
}
return Len;
}
Two kinds:
Copy Code code as follows:
String.prototype.gblen = function () {
var len = 0;
for (var i=0; i<this.length; i++) {
if (This.charcodeat (i) >127 | | this.charcodeat (i) ==94) {
Len + 2;
} else {
Len + +;
}
}
return Len;
}
String.prototype.gbtrim = function (len, s) {
var str = ';
var sp = s | | '';
var len2 = 0;
for (var i=0; i<this.length; i++) {
if (This.charcodeat (i) >127 | | this.charcodeat (i) ==94) {
Len2 + 2;
} else {
Len2 + +;
}
}
if (len2 <= len) {
return this;
}
len2 = 0;
Len = (len > sp.length)? Len-sp.length:len;
for (var i=0; i<this.length; i++) {
if (This.charcodeat (i) >127 | | this.charcodeat (i) ==94) {
Len2 + 2;
} else {
Len2 + +;
}
if (Len2 > Len) {
STR + SP;
Break
}
STR + + This.charat (i);
}
return str;
}
var str1 = ' The most cattle @#%& in the world are the most cattle @#%& ';
document.write (' str1 = ' + str1 + ')
');
document.write (' length = ' + str1.gblen () + ') + '
');
document.write (' Gbtrim (+) = ' + Str1.gbtrim (10) + '
');
document.write (' gbtrim ') = ' + Str1.gbtrim (10, ' ... ') + '
');
document.write (' Gbtrim, '-\ ') = ' + Str1.gbtrim (12, '-') + '
');
Gbtrim (len intercept length, calculated in English byte length, s truncated after the ellipsis character, such as "...")
Note: Here the characters are calculated as two lengths, so Len in Gbtrim is 10 o'clock and displays up to 5 characters.
When the number of Chinese characters is greater than 5 o'clock, the "..." is added after the interception, so only 4 characters are displayed.