Run the following code:
$tag = "Internet products,"; $text = RTrim ($tag, ",");p Rint_r ($text);
The result we might expect to get is 互联网产品
that the actual result is 互联网产�
. Why is that?
Science
The prefixes used in PHP are mb_
multibyte function http://php.net/manual/zh/ref ....
Like what
$str = "ABCD";p Rint_r (strlen ($STR). " \ n "); 4print_r (Mb_strlen ($STR). " \ n "); 4$str = "Zhou Mengkang";p rint_r (strlen ($STR). " \ n "); 9print_r (Mb_strlen ($STR). " \ n "); 3
mb_
Series functions are manipulated as "one character of multiple bytes" for granularity, without mb_
the actual number of bytes.
Principle
trim
function document
String Trim (String $str [, string $character _mask = "\t\n\r\0\x0b"])
The function is not a multi-byte function, that is, a multibyte character such as Chinese characters, will take its head or tail of a single byte to $character_mask
match the corresponding char array, if in the following array, then delete, continue to match. Like what:
Echo LTrim ("BCDF", "abc"); Df
As the function in the following demo string_print_char
shows:
、
Made 0xe3 0x80 0x81
up of three bytes,
品
Consists of 0xe5 0x93 0x81
three bytes.
So at the rtrim
time of execution, by the byte comparison, will be 0x81
removed, resulting in the last garbled.
SOURCE Inquiry
View PHP7 source, and then refine the following small demo, easy to learn together, in fact, the PHP source of learning is not difficult, a little progress every day.
main.c//trim////Created by Zhou Mengkang on 2017/10/18.//Copyright 2017 Zhou Mengkang. All rights reserved.//#include <stdio.h> #include <stdlib.h> #include <string.h>void string_print_ char (char *str), void Php_charmask (unsigned char *input, size_t len, Char *mask), Char *ltrim (char *str,char *character_mas k); Char *rtrim (char *str,char *character_mask); int main (int argc, char const *argv[]) {printf ("%s\n", LTrim ("BCDF", "ABC" )); String_print_char ("Product"); E5 String_print_char (","); E3 ("%s\n", RTrim ("Internet products,", ",")); return 0;} Char *ltrim (char *str,char *character_mask) {char *res; Char mask[256]; Register size_t i; int trimmed = 0; size_t len = strlen (str); Php_charmask (unsigned char*) character_mask, strlen (Character_mask), mask); for (i = 0; i < len; i++) {if (mask[(unsigned char) str[i]) {trimmed++; } else {break; }} len-=trimmed; str + = trimmed; res = (char *) malloc (sizeof (char) * (len+1)); memcpy (Res,str,len); return res;} Char *rtrim (char *str,char *character_mask) {char *res; Char mask[256]; Register size_t i; size_t len = strlen (str); Php_charmask (unsigned char*) character_mask, strlen (Character_mask), mask); if (Len > 0) {i = len-1; do {if (mask[(unsigned char) str[i]) {len--; } else {break; }} while (I--! = 0); } res = (char *) malloc (sizeof (char) * (len+1)); memcpy (Res,str,len); return res;} void String_print_char (char *str) {unsigned long L = strlen (str); for (int i=0; i < L; i++) {printf ("%02hhx\t", Str[i]); } printf ("\ n");} void Php_charmask (unsigned char *input, size_t len, char *mask) {unsigned char *end; unsigned char c; memset (Mask, 0, 256); for (end = Input+len; input < end; Input++) {c = *input; Mask[c]= 1; }}
If you think the demo is not clear enough, copy it, do it yourself once ~
C Language foundation of poor students also do not worry, I am prepared to write a PHP small white learning C language series of introduction short Kazakhstan.
Solution Solutions
Then we will follow the gourd, using PHP's own multibyte function to achieve the following:
function Mb_rtrim ($string, $trim, $encoding) { $mask = []; $trimLength = Mb_strlen ($trim, $encoding); for ($i = 0; $i < $trimLength; $i + +) { $item = Mb_substr ($trim, $i, 1, $encoding); $mask [] = $item; } $len = Mb_strlen ($string, $encoding); if ($len > 0) { $i = $len-1; do { $item = Mb_substr ($string, $i, 1, $encoding); if (In_array ($item, $mask)) { $len--; } else {break ; } } while ($i--! = 0); } Return Mb_substr ($string, 0, $len, $encoding);} Mb_internal_encoding ("UTF-8"); $tag = "Internet products,"; $encoding = mb_internal_encoding ();p Rint_r (Mb_rtrim ($tag, ",", $ encoding));
Of course you can also use the regular to do. By learning from the above functions, single-byte functions and multibyte functions, have you learned?
PHP7 related source code
Php_function (Trim) { Php_do_trim (Internal_function_param_passthru, 3);} Php_function (RTrim) { Php_do_trim (internal_function_param_passthru, 2);} Php_function (LTrim) { Php_do_trim (internal_function_param_passthru, 1);}
static void Php_do_trim (internal_function_parameters, int mode) { zend_string *str; zend_string *what = NULL; Zend_parse_parameters_start (1, 2) z_param_str (STR) z_param_optional z_param_str (what) Zend_ Parse_parameters_end (); Zval_str (Return_value, Php_trim (STR), (what? Zstr_val (What): NULL), (what?) Zstr_len (What): 0), mode);}
Phpapi zend_string *php_trim (zend_string *str, Char *what, size_t what_len, int mode) {const char *c = zstr_val (str); size_t len = Zstr_len (str); Register size_t i; size_t trimmed = 0; Char mask[256]; if (what) {if (What_len = = 1) {char p = *what; if (mode & 1) {for (i = 0; i < len; i++) {if (c[i] = = p) { trimmed++; } else {break; }} len-= trimmed; c + = trimmed; } if (Mode & 2) {if (len > 0) {i = len-1; do {if (c[i] = = p) {len--; } else {break; }} while (I--! = 0); }}} or else {Php_charmask (unsigned char*) What, What_len, mask); if (mode & 1) {for (i = 0; i < len; i++) {if (mask[(unsigned char) c[i]) { trimmed++; } else {break; }} len-= trimmed; c + = trimmed; } if (Mode & 2) {if (len > 0) {i = len-1; do {if (mask[(unsigned char) c[i]) {len--; } else {break; }} while (I--! = 0); }}}} else {if (Mode & 1) {for (i = 0; i < len; i++) {if ((unsigned char) c[i] <= ' && (c[i] = = ' | | c[i] = = ' \ n ' | | c[i] = = ' \ r ' | | c[i] = = ' \ t ' || C[i] = = ' \v ' | | C[i] = = ' + ')) {trimmed++; } else {break; }} len-= trimmed; c + = trimmed; } if (Mode & 2) {if (len > 0) {i = len-1; Do {if (unsigned char) c[i] <= ' && (c[i] = = ' | | c[i] = = ' \ n ' | | C[i] = = ' \ r ' | | C[i] = = ' \ t ' | | C[i] = = ' \v ' | | C[i] = = ' + ') {len--; } else {break; }} while (I--! = 0); }}} if (Zstr_len (str) = = LEN) {return zend_string_copy (str); } else {return Zend_string_init (c, Len, 0); }}
/* {{Php_charmask * Fills a 256-byte bytemask with input. You can specify a range like ' a. Z ', * it needs to be incrementing. * Returns:failure/success whether the input was correct (i.e. no range errors) */static inline int php_charmask (unsigned Char *input, size_t len, char *mask) {unsigned char *end; unsigned char c; int result = SUCCESS; memset (Mask, 0, 256); for (end = Input+len; input < end; input++) {c=*input; if ((Input+3 < end) && input[1] = = '. ' && input[2] = = '. ' && input[3] >= c) {memset (mask+c, 1, input[3]-C + 1); input+=3; } else if ((Input+1 < end) && input[0] = = '. ' && input[1] = = '. ') {/* Error, try to is as helpful as possible: (a range ending/starting with '. ' won ' t be captured here) */if (End-len >= input) {/* There is no ' left ' char */php_error_docref (NULL, E_war NING, "Invalid'..' -range, no character to the left of ' ... '); result = FAILURE; Continue if (input+2 >= end) {/* There is no ' right ' char */php_error_docref (NULL, e_warning, "Inv Alid ' ... ' -range, no character to the right of ' ... '); result = FAILURE; Continue } if (Input[-1] > Input[2]) {/* Wrong order */Php_error_docref (NULL, e_warning, "Invalid"). .' -range, ' ... ' -range needs to be incrementing "); result = FAILURE; Continue }/* Fixme:better error (A.. B.. C is the only left possibility?) */Php_error_docref (NULL, e_warning, "Invalid"... -range "); result = FAILURE; Continue } else {mask[c]=1; }} return result; /* }}} */