function FilterUtf8 ($STR) {/*UTF8 encoded table: * Unicode symbol Range | UTF-8 Encoding method * u0000 0000-u0000 007F | 0XXXXXXX * u0000 0080-u0000 07FF | 110xxxxx 10xxxxxx * u0000 0800-u0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx * */$re = '; $str = Str_split (Bin2Hex ($STR), 2); $mo = 1<<7; $mo 2 = $mo | (1 << 6); $mo 3 = $mo 2 | (1 << 5); Three bytes $mo 4 = $mo 3 | (1 << 4); Four bytes $mo 5 = $mo 4 | (1 << 3); Five bytes $mo 6 = $mo 5 | (1 << 2); Six bytes for ($i = 0; $i < count ($str), $i + +) {if (Hexdec ($str [$i]) & ($mo)) = = 0) {$re. = Chr (Hexdec ($str [$i])); Continue }//4 bytes and above if ((Hexdec ($str [$i]) & ($mo 6)) = = $mo 6) {$i = $i +5 ; Continue } if (Hexdec ($str[$i]) & ($mo 5)) = = $mo 5) {$i = $i +4; Continue } if ((Hexdec ($str [$i]) & ($mo 4)) = = = $mo 4) {$i = $i +3; Continue } if ((Hexdec ($str [$i]) & ($mo 3)) = = = $mo 3) {$i = $i +2; if (((Hexdec ($str $i) & ($mo) = = = $mo) && ((Hexdec ($str [$i-1]) & ($mo)) = = $mo)) {$r = Chr (Hexdec ($str [$i-2]). Chr (Hexdec ($str [$i-1]). Chr (Hexdec ($str [$i]); $re. = $r; } continue; } if ((Hexdec ($str [$i]) & ($mo 2)) = = = $mo 2) {$i = $i +1; if ((Hexdec ($str [$i]) & ($mo) = = = $mo) {$re. = Chr (Hexdec ($str [$i-1])). chr (He Xdec ($str [$i]); } Continue }} return $re; }
The above describes the filter utf8 characters more than three bytes of characters, or non-UTF8 characters, including aspects of the content, I hope to be interested in PHP tutorial friends helpful.