In many cases, the string truncation function is required. it is not enough because of the encoding problem between gb2312 and UTF-8, the following are the specific implementation methods and the code extracted from some mature systems.
1. Chinese character truncation functions supported by UTF-8 and GB2312
The code is as follows:
/*
Chinese character truncation functions supported by Utf-8 and gb2312
Cut_str (string, truncation length, start length, encoding );
The default encoding format is UTF-8.
The default start length is 0.
*/
Function cut_str ($ string, $ sublen, $ start = 0, $ code = 'utf-8 ')
{
If ($ code = 'utf-8 ')
{
$ Pa = "/[\ x01-\ x7f] | [\ xc2-\ xdf] [\ x80-\ xbf] | \ xe0 [\ xa0-\ xbf] [\ x80 -\ xbf] | [\ xe1-\ xef] [\ x80-\ xbf] [\ x80-\ xbf] | \ xf0 [\ x90-\ xbf] [\ x80- \ xbf] [\ x80-\ xbf] | [\ xf1-\ xf7] [\ x80-\ xbf] [\ x80-\ xbf] [\ x80-\ xbf]/ ";
Preg_match_all ($ pa, $ string, $ t_string );
If (count ($ t_string [0])-$ start> $ sublen) return join ('', array_slice ($ t_string [0], $ start, $ sublen )). "... ";
Return join ('', array_slice ($ t_string [0], $ start, $ sublen ));
}
Else
{
$ Start = $ start * 2;
$ Sublen = $ sublen * 2;
$ Strlen = strlen ($ string );
$ Tmpstr = '';
For ($ I = 0; $ I <$ strlen; $ I ++)
{
If ($ I >=$ start & $ I <($ start + $ sublen ))
{
If (ord (substr ($ string, $ I, 1)> 129)
{
$ Tmpstr. = substr ($ string, $ I, 2 );
}
Else
{
$ Tmpstr. = substr ($ string, $ I, 1 );
}
}
If (ord (substr ($ string, $ I, 1)> 129) $ I ++;
}
If (strlen ($ tmpstr) <$ strlen) $ tmpstr. = "... ";
Return $ tmpstr;
}
}
$ Str = "the string to be intercepted by abcd ";
Echo cut_str ($ str, 8, 0, 'gb2312 ');
?>
2. truncate UTF-8 encoded multi-byte strings
The code is as follows:
// Truncate the utf8 string
Function utf8Substr ($ str, $ from, $ len)
{
Return preg_replace ('# ^ (? : [\ X00-\ x7F] | [\ xC0-\ xFF] [\ x80-\ xBF] +) {0, '. $ from .'}'.
'((? : [\ X00-\ x7F] | [\ xC0-\ xFF] [\ x80-\ xBF] +) {0 ,'. $ len. '}). * # s ',
'$ 1', $ str );
}
?>
3. truncate GB2312 Chinese Strings
The code is as follows:
// Truncate a Chinese string
Function mysubstr ($ str, $ start, $ len ){
$ Tmpstr = "";
$ Strlen = $ start + $ len;
For ($ I = 0; $ I <$ strlen; $ I ++ ){
If (ord (substr ($ str, $ I, 1)> 0xa0 ){
$ Tmpstr. = substr ($ str, $ I, 2 );
$ I ++;
} Else
$ Tmpstr. = substr ($ str, $ I, 1 );
}
Return $ tmpstr;
}
?>
4. BugFree character truncation function
The code is as follows:
/**
* @ Package BugFree
* @ Version $ Id: FunctionsMain. inc. php, v 1.32 11:38:37 wwccss Exp $
*
*
* Return part of a string (Enhance the function substr ())
*
* @ Param string $ String the string to cut.
* @ Param int $ Length the length of returned string.
* @ Param booble $ Append whether append "... ": False | true
* @ Return string the cutted string.
*/
Function sysSubStr ($ String, $ Length, $ Append = false)
{
If (strlen ($ String) <= $ Length)
{
Return $ String;
}
Else
{
$ I = 0;
While ($ I <$ Length)
{
$ StringTMP = substr ($ String, $ I, 1 );
If (ord ($ StringTMP) >=224)
{
$ StringTMP = substr ($ String, $ I, 3 );
$ I = $ I + 3;
}
Elseif (ord ($ StringTMP) >=192)
{
$ StringTMP = substr ($ String, $ I, 2 );
$ I = $ I + 2;
}
Else
{
$ I = $ I + 1;
}
$ StringLast [] = $ StringTMP;
}
$ StringLast = implode ("", $ StringLast );
If ($ Append)
{
$ StringLast. = "... ";
}
Return $ StringLast;
}
}
$ String = "http://www.jb51.net-simple, wonderful, General ";
$ Length = "18 ";
$ Append = false;
Echo sysSubStr ($ String, $ Length, $ Append );
?>
Code truncation in dedecms
This is the code directly obtained from dedecms. you can make some modifications.
The code is as follows:
// Chinese truncation 2, single-byte truncation mode
// This function must be used for request content
Function cn_substrR ($ str, $ slen, $ startdd = 0)
{
$ Str = cn_substr (stripslashes ($ str), $ slen, $ startdd );
Return addslashes ($ str );
}
// Chinese truncation 2, single-byte truncation mode
Function cn_substr ($ str, $ slen, $ startdd = 0)
{
Global $ cmd_soft_lang;
If ($ export _soft_lang = 'utf-8 ')
{
Return cn_substr_utf8 ($ str, $ slen, $ startdd );
}
$ Restr = '';
$ C = '';
$ Str_len = strlen ($ str );
If ($ str_len <$ startdd + 1)
{
Return '';
}
If ($ str_len <$ startdd + $ slen | $ slen = 0)
{
$ Slen = $ str_len-$ startdd;
}
$ Enddd = $ startdd + $ slen-1;
For ($ I = 0; $ I <$ str_len; $ I ++)
{
If ($ startdd = 0)
{
$ Restr. = $ c;
}
Else if ($ I> $ startdd)
{
$ Restr. = $ c;
}
If (ord ($ str [$ I])> 0x80)
{
If ($ str_len> $ I + 1)
{
$ C = $ str [$ I]. $ str [$ I + 1];
}
$ I ++;
}
Else
{
$ C = $ str [$ I];
}
If ($ I >=$ enddd)
{
If (strlen ($ restr) + strlen ($ c)> $ slen)
{
Break;
}
Else
{
$ Restr. = $ c;
Break;
}
}
}
Return $ restr;
}
// UTF-8 Chinese intercept, single-byte intercept mode
Function cn_substr_utf8 ($ str, $ length, $ start = 0)
{
If (strlen ($ str) <$ start + 1)
{
Return '';
}
Preg_match_all ("/./su", $ str, $ ar );
$ Str = '';
$ Tstr = '';
// To be compatible with versions earlier than mysql4.1 and consistent with the database varchar, use byte truncation here
For ($ I = 0; isset ($ ar [0] [$ I]); $ I ++)
{
If (strlen ($ tstr) <$ start)
{
$ Tstr. = $ ar [0] [$ I];
}
Else
{
If (strlen ($ str) <$ length + strlen ($ ar [0] [$ I])
{
$ Str. = $ ar [0] [$ I];
}
Else
{
Break;
}
}
}
Return $ str;
}
String truncation code in phpcms:
The code is as follows:
Function str_cut ($ string, $ length, $ dot = '...')
{
$ Strlen = strlen ($ string );
If ($ strlen <= $ length) return $ string;
$ String = str_replace (array ('', '&', '"', ''', '-', '<', '> ','·','... '), Array ('',' & ','" ', "'", ''', ','-',' <','> ', '·','... '), $ String );
$ Strcut = '';
If (strtolower (CHARSET) = 'utf-8 ')
{
$ N = $ tn = $ noc = 0;
While ($ n <$ strlen)
{
$ T = ord ($ string [$ n]);
If ($ t = 9 | $ t = 10 | (32 <= $ t & $ t <= 126 )){
$ Tn = 1; $ n ++; $ noc ++;
} Elseif (194 <=$ t & $ t <= 223 ){
$ Tn = 2; $ n + = 2; $ noc + = 2;
} Elseif (224 <=$ t & $ t <239 ){
$ Tn = 3; $ n + = 3; $ noc + = 2;
} Elseif (240 <=$ t & $ t <= 247 ){
$ Tn = 4; $ n + = 4; $ noc + = 2;
} Elseif (248 <=$ t & $ t <= 251 ){
$ Tn = 5; $ n + = 5; $ noc + = 2;
} Elseif ($ t = 252 | $ t = 253 ){
$ Tn = 6; $ n + = 6; $ noc + = 2;
} Else {
$ N ++;
}
If ($ noc >=$ length) break;
}
If ($ noc> $ length) $ n-= $ tn;
$ Strcut = substr ($ string, 0, $ n );
}
Else
{
$ Dotlen = strlen ($ dot );
$ Maxi = $ length-$ dotlen-1;
For ($ I = 0; $ I <$ maxi; $ I ++)
{
$ Strcut. = ord ($ string [$ I]) & gt; 127? $ String [$ I]. $ string [++ $ I]: $ string [$ I];
}
}
$ Strcut = str_replace (array ('&', '"'," '",' <','> '), array ('&','"', ''', '<', '>'), $ strcut );
Return $ strcut. $ dot;
}