Two functions collected
First, I found that it was not good enough in my tests, and most of them were accurate.
// Returns true if $ string is valid UTF-8 and false otherwise.
Function Is_utf8_bak ( $ Word ){
If ( Preg_match ( " /^ ([ " . CHR ( 228 ) . " - " . CHR ( 233 ) . " ] {1 }[ " . CHR ( 128 ) . " - " . CHR ( 191 ) . " ] {1 }[ " . CHR ( 128 ) . " - " . CHR ( 191 ) . " ] {1}) {1 }/ " , $ Word ) = True | Preg_match ( " /([ " . CHR ( 228 ) . " - " . CHR ( 233 ) . " ] {1 }[ " . CHR ( 128 ) . " - " . CHR ( 191 ) . " ] {1 }[ " . CHR ( 128 ) . " - " . CHR ( 191 ) . " ] {1}) {1} $/ " , $ Word ) = True | Preg_match ( " /([ " . CHR ( 228 ) . " - " . CHR ( 233 ) . " ] {1 }[ " . CHR ( 128 ) . " - " . CHR ( 191 ) . " ] {1 }[ " . CHR ( 128 ) . " - " . CHR ( 191 ) . " ] {1}) {2 ,}/ " , $ Word ) = True )
{
Return True ;
} Else {
Return False ;
}
}
The second test is better than the first one.
// Returns true if $ string is valid UTF-8 and false otherwise.
Function Is_utf8 ( $ String ){
// From http://w3.org/International/questions/qa-forms-utf-8.html
Return Preg_match ( ' % ^ (? :
[\ X09 \ x0a \ x0d \ x20-\ x7e] # ASCII
| [\ Xc2-\ xdf] [\ X80-\ xbf] # non-overlong 2-byte
| \ Xe0 [\ xa0-\ xbf] [\ X80-\ xbf] # excluding overlongs
| [\ Xe1-\ xec \ xee \ XeF] [\ X80-\ xbf] {2} # Straight 3-byte
| \ Xed [\ X80-\ x9f] [\ X80-\ xbf] # excluding Surrogates
| \ Xf0 [\ x90-\ xbf] [\ X80-\ xbf] {2} # Planes 1-3
| [\ Xf1-\ xf3] [\ X80-\ xbf] {3} # planes 4-15
| \ Xf4 [\ X80-\ x8f] [\ X80-\ xbf] {2} # plane 16
) * $ % XS ' , $ String );
} // Function is_utf8