/** * filter for 3 bytes inside utf8 * @param $str * @return string */function FilterUtf8 ($STR) { /*utf8 encoding table: * unicode symbol range | utf-8 Encoding Method * u0000 0000 - u0000 007f | 0xxxxxxx * u0000 0080 - u0000 07ff | 110xxxxx 10xxxxxx * u0000 0800 - u0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx * */ $re = '; $str = str_split (Bin2Hex ($STR), 2); $mo = 1<<7; $mo 2 = $mo | (1 << 6); $mo 3 = $mo 2 | (1 << 5); //three bytes $mo 4 = $mo 3 | (1 << 4); //four bytes $mo 5 = $mo 4 | (1 << &NBSP;3); //Five bytes $mo 6 = $mo 5 | (1 << 2); // Six bytes for ($i = 0; $i < count ($str); $i + +) { if (Hexdec ($str [$i]) & ($MO)) == 0) { $re .= chr (Hexdec ($str [$i]); continue; } //4 bytes and above if ((Hexdec ($str [$i]) & ($mo 6) ) == $mo 6) { $i = $i +5; continue; } if (Hexdec ($str [$i]) & ($mo 5) ) == $mo 5) { $i = $i +4; continue; } if ((Hexdec ($str [$i]) & ($mo 4) ) == $mo 4) { $i = $i +3; continue; } if ( Hexdec ($str [$i] & ($mo 3) ) == $mo 3 ) { $i = $i +2; if ((Hexdec ($str [$i]) & ($MO) ) == $mo) && ((Hexdec ($str [$i - 1]) & ($MO) ) == $mo) ) { $r = Chr (Hexdec ($str [$i - 2]) . &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;CHR (Hexdec ($str [$i - 1]) . &NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;&NBSP;CHR (Hexdec ($ str[$i]); $re .= $r; } continue; } if (Hexdec ($str [$i]) & ($mo 2) ) == $mo 2 ) { $i = $i +1; if (Hexdec ($str [$i]) & ($MO) ) == $mo) { $re .= chr (Hexdec ($str [$i - 1]) &NBSP;.&NBSP;CHR (Hexdec ($str [$i])); } continue; } } return $re;}
Filter characters more than three bytes in UTF8 characters, or non-UTF8 characters