/** * translates a sequence of utf-8 bytes to their Equivalent unicode code points. * each code point is prefixed with "\u" . * * @param string $utf 8 * * @return string */function utf8_to_unicode ($utf 8) { $i = 0; $l = strlen ($utf 8); $out = "; while ($i < $l) { if ((Ord ($utf 8[$i]) & 0x80) === 0x00) { // 0xxxxxxx $n = ord ($utf 8[$i + +]); } elseif (Ord ($utf 8[$i) &NBSP;&&NBSP;0XE0) &NBSP;===&NBSP;0XC0) { // 110xxxxx 10xxxxxx $n = (Ord ($utf 8[$i + +) & 0x1f) << 6) | ((Ord ($utf 8[$i + +) & 0x3f) << 0) ; } elseif ((Ord ($utf 8[$i) & 0xf0) === 0xe0) { // 1110xxxx 10xxxxxx 10xxxxxx $n = ((Ord ($utf 8[$i +) & 0x0f) << 12) | (Ord ($utf 8[$i + +) & 0x3f) << 6) | ((Ord ($utf 8[$i + +) & 0x3f) << 0) ; } elseif ((Ord ($utf 8[$i) & 0xf8) === 0xf0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx $n = (Ord ($utf 8[$i + +) &NBSP;&&NBSP;0X07) &NBSP;<<&NBSP;18) | ((Ord ($utf 8[$i + +) & 0x3f) << 12) | (Ord ($utf 8[$i + +) & 0x3f) &NBSP;<<&NBSP;&NBSP;6) | ((Ord ($utf 8[$i + +) & 0x3f) << 0) ; } elseif ((Ord ($utf 8[$i]) &NBSP;&&NBSP;0XFC) === 0xf8) { // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx $n = (Ord ($utf 8[$i +) & 0x03) << 24) | (Ord ($ utf8[$i + +]) & 0x3f) << 18) | ((Ord ($utf 8[$i + +) & 0x3f) << &NBSP;12) | ((Ord ($utf 8[$i + +) & 0x3f) << 6) | (Ord ($utf 8[$i + +) & 0x3F) << 0) ; } elseif (Ord ($utf 8[$i) & 0xfe) &NBSP;===&NBSP;0XFC) { // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx $n = (Ord ($ utf8[$i + +]) & 0x01) << 30) | ((Ord ($utf 8[$i + +) & 0x3f) << &NBSP;24) | ((Ord ($utf 8[$i + +) & 0x3f) << 18) | (Ord ($utf 8[$i + +) & 0x3f) &NBSP;<<&NBSP;12) | (Ord ($utf 8[$i+ +]) & 0x3f) << 6) | ((Ord ($utf 8[$i + +) & 0x3f) << 0) ; } else { throw new \exception (' Invalid utf-8 code point '); } $n = strtoupper (Dechex ($n)); $pad = strlen ($n) <= 4 ? Strlen ($n) + strlen ($n) %2 : 0; $n = str_pad ($n, $pad, "0", str_pad_left); $out .= sprintf ("\u%S ", $n); } return $out;}
This article is from the "7804265" blog, please be sure to keep this source http://7814265.blog.51cto.com/7804265/1862517
PHP emoji UTF8 to Unicode