The auto-completion closure function found on the internet is quite good. it can be fully closed based on your html content to ensure that the HTMl code is correctly and simply explained:
First ~ (<[^>] +?> )~ Si
This regular expression is matched. . Simply put, all <标签> .
Second ~ <[A-z0-9] +) [^/>] *? /> ~ Si
This regular expression is matched ...... />. Is a single closed label, such
Third ~ ] *?> ~ Si
This regular expression is matched. . That is, the end tag, as shown in figure
Fourth ~ <[A-z0-9] +) [^/>] *?> ~ Si
Match the content in <...>. This is different from the first one. this is a real html tag, because the html tag only contains numbers and letters, such:
Fifth ~ ~ Si
No need to explain. HTMl comments
For the rest, you can see the source code. php source code attachment:
The code is as follows:
/**
* Obtain HTML and automatically complete and close
*
* Param $ html
*
* Param $ length
*
* Param $ end
*/
Function subHtml ($ html, $ length = 50 ){
$ Result = '';
$ TagStack = array ();
$ Len = 0;
$ Contents = preg_split ("~ (<[^>] +?> )~ Si ", $ html,-1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
Foreach ($ contents as $ tag ){
If (trim ($ tag) = "") continue;
If (preg_match ("~ <[A-z0-9] +) [^/>] *? /> ~ Si ", $ tag )){
$ Result. = $ tag;
} Else if (preg_match ("~ ] *?> ~ Si ", $ tag, $ match )){
If ($ tagStack [count ($ tagStack)-1] = $ match [1]) {
Array_pop ($ tagStack );
$ Result. = $ tag;
}
} Else if (preg_match ("~ <[A-z0-9] +) [^/>] *?> ~ Si ", $ tag, $ match )){
Array_push ($ tagStack, $ match [1]);
$ Result. = $ tag;
} Else if (preg_match ("~ ~ Si ", $ tag )){
$ Result. = $ tag;
} Else {
If ($ len + mstrlen ($ tag) <$ length ){
$ Result. = $ tag;
$ Len + = mstrlen ($ tag );
} Else {
$ Str = msubstr ($ tag, 0, $ length-$ len + 1 );
$ Result. = $ str;
Break;
}
}
} While (! Empty ($ tagStack )){
$ Result. =' ';
}
Return $ result;
}
/**
* Retrieve a Chinese string
*
* Param $ string
*
* Param $ start bit
*
* Param $ length
*
* Param $ charset encoding
*
* Param $ dot additional string
*/
Function msubstr ($ string, $ start, $ length, $ dot = '', $ charset = 'utf-8 '){
$ String = str_replace (array ('&', '"', '<', '>',''), array ('&','"', '<', '>', ''), $ string );
If (strlen ($ string) <= $ length ){
Return $ string;
}
If (strtolower ($ charset) = 'utf-8 '){
$ N = $ tn = $ noc = 0;
While ($ n <strlen ($ string )){
$ T = ord ($ string [$ n]);
If ($ t = 9 | $ t = 10 | (32 <= $ t & $ t <= 126 )){
$ Tn = 1;
$ N ++;
} Elseif (194 <=$ t & $ t <= 223 ){
$ Tn = 2;
$ N + = 2;
} Elseif (224 <=$ t & $ t <= 239 ){
$ Tn = 3;
$ N + = 3;
} Elseif (240 <=$ t & $ t <= 247 ){
$ Tn = 4;
$ N + = 4;
} Elseif (248 <=$ t & $ t <= 251 ){
$ Tn = 5;
$ N + = 5;
} Elseif ($ t = 252 | $ t = 253 ){
$ Tn = 6;
$ N + = 6;
} Else {
$ N ++;
}
$ Noc ++;
If ($ noc >=$ length ){
Break;
}
}
If ($ noc> $ length ){
$ N-= $ tn;
}
$ Strcut = substr ($ string, 0, $ n );
} Else {
For ($ I = 0; $ I <$ length; $ I ++ ){
$ Strcut. = ord ($ string [$ I]) & gt; 127? $ String [$ I]. $ string [++ $ I]: $ string [$ I];
}
}
Return $ strcut. $ dot;
}
/**
* The length of the string, including Chinese and English.
*/
Function mstrlen ($ str, $ charset = 'utf-8 '){
If (function_exists ('MB _ substr ')){
$ Length = mb_strlen ($ str, $ charset );
} Elseif (function_exists ('iconv _ substr ')){
$ Length = iconv_strlen ($ str, $ charset );
} Else {
Preg_match_all ("/[\ x01-\ x7f] | [\ xc2-\ xdf] [\ x80-\ xbf] | \ xe0 [\ xa0-\ xbf] [\ x80- \ xbf] | [\ xe1-\ xef] [\ x80-\ xbf] [\ x80-\ xbf] | \ xf0 [\ x90-\ xbf] [\ x80-f] [\ x80-\ xbf] | [\ xf1-\ xf7] [\ x80-\ xbf] [\ x80-\ xbf] [\ x80-\ xbf]/", $ text, $ ar );
$ Length = count ($ ar [0]);
}
Return $ length;
}
$ Str ="
X
1 s
Test |
124 "; Echo subHtml ($ str ); ?> |