Today encountered the problem of string character encoding, because of the encounter with TTS text to the voice of an API, the Chinese must be utf8, I passed a uncode encoded in Chinese, it has been unable to read it normally, later found to be the problem of coding. There are two APIs found online, which can be used to pass a string into a UTF8 encoded string.
Record it down:
std::string String_to_utf8 (const std::string & str) {int nwlen =:: MultiByteToWideChar (CP_ACP, 0, Str.c_str (),-1, NULL, 0); wchar_t * Pwbuf = new Wchar_t[nwlen + 1];//must be added 1, otherwise the tail will appear ZeroMemory (Pwbuf, Nwlen * 2 + 2); :: MultiByteToWideChar (CP_ACP, 0, Str.c_str (), Str.length (), Pwbuf, Nwlen); int nlen =:: WideCharToMultiByte (Cp_utf8, 0, Pwbuf,-1, NULL, NULL, NULL, NULL); char * PBuf = new Char[nlen + 1]; ZeroMemory (PBuf, Nlen + 1); :: WideCharToMultiByte (Cp_utf8, 0, Pwbuf, Nwlen, PBuf, nlen, NULL, NULL); std::string retstr (PBUF); delete []pwbuf; delete []pbuf; Pwbuf = NULL; PBuf = NULL; return RETSTR;} BOOL IsTextUTF8 (char* str,ulonglong length) {DWORD nbytes=0;//uft8 can be encoded in 1-6 bytes, ASCII with one byte UCHAR chr; BOOL ballascii=true; If all are ASCII, the description is not UTF-8 for (int i=0; i<length; ++i) {chr= * (str+i); if ((chr&0x80)! = 0)//Determine if ASCII encoding, if not, indicates that it is possible for UTF-8,ASCII to be encoded with 7 bits, but with one byte, the highest bit is marked as 0,o0xxxxxxx ballascii= FALSE ; if (nbytes==0)//If it is not an ASCII code, it should be a multibyte character, the count of bytes {if (chr>=0x80) {if (chr> =0XFC&&CHR<=0XFD) nbytes=6; else if (Chr>=0xf8) nbytes=5; else if (chr>=0xf0) nbytes=4; else if (CHR>=0XE0) nbytes=3; else if (chr>=0xc0) nbytes=2; else return FALSE; nbytes--; }} else//multibyte character non-first byte, should be 10xxxxxx {if ((chr&0xc0)! = 0x80) return Fals E nbytes--; }} if (Nbytes > 0)//Counter-return rule return FALSE; if (BALLASCII)//If all are ASCII, the description is not UTF-8 return FALSE; return TRUE;}
Solve the problem of TTS recognition Chinese after string turn UTF8