Do VC ++ development inevitably multi-byte Unicode, UTF-8 conversion, below I wrote a few character Conversion Function
1. Conversion of tchar and Char. The Unicode and multi-byte types of tchar are respectively wchar and char functions as follows:
Note that * the Dest is new and needs to be released.
// Convert the char string to the tchar string void c2t (tchar ** DEST, const char * SRC) {# ifdef _ Unicode if (src = NULL) {return ;} size_t Alen = strlen (SRC) + 1; size_t Ulen = (size_t) multibytetowidechar (cp_acp, 0, SRC, Alen, null, 0) + 1; * DEST = new wchar [Ulen];: multibytetowidechar (cp_acp, 0, SRC, Alen, * DEST, Ulen ); # else // multi-byte tchar is Char int Len = strlen (SRC) + 1; * DEST = new char [Len]; strcpy (* DEST, Src); # endif}
2. tchar is converted to multiple bytes. Pay attention to * release the Dest pointer.
void T2C(char** dest, const TCHAR* src){ if(src == NULL) return ;#ifdef _UNICODE size_t len = WideCharToMultiByte(CP_ACP, 0, src, -1, NULL, 0 , NULL, NULL); if (len == 0) { return; } *dest = new char[len]; WideCharToMultiByte( CP_ACP, 0, src, -1, *dest, len, NULL, NULL );#else int len = _tcslen(src) + 1; *dest = new TCHAR[len]; strcpy(*dest, src);#endif}
3. The following two functions are a bit repetitive but will be used later.
// Convert multiple bytes to the wide byte void c2w (wchar ** DEST, const char * SRC) {If (src = NULL) {return;} size_t Alen = strlen (SRC) + 1; size_t Ulen = (size_t) multibytetowidechar (cp_acp, 0, SRC, Alen, null, 0) + 1; * DEST = new wchar [Ulen];: multibytetowidechar (cp_acp, 0, SRC, Alen, * DEST, Ulen);} // The width bytes are converted to multiple bytes void W2C (char ** DEST, const wchar * SRC) {If (src = NULL) return; size_t Len = widechartomultibyte (cp_acp, 0, SRC,-1, null, 0, null, null); If (LEN = 0) {return;} * DEST = new char [Len]; widechartomultibyte (cp_acp, 0, SRC,-1, * DEST, Len, null, null );}
4. Conversion between UTF-8 and multibyte and wide byte
// Unicode can be directly converted to UTF-8 void unicodetoutf8 (char ** DEST, const wchar * SRC) {assert (DEST! = NULL | SRC! = NULL); int Len =-1; Len = widechartomultibyte (cp_utf8, 0, SRC,-1, 0, 0, 0, 0) + 1; * DEST = new char [Len + 1];: widechartomultibyte (cp_utf8, 0, SRC,-1, * DEST, Len, 0, 0 );} // The multibyte must first be converted to the wide byte before being converted to the UTF-8void ansitoutf8 (char ** DEST, const char * SRC) {assert (DEST! = NULL | SRC! = NULL); wchar * pwszstr = NULL; c2w (& pwszstr, Src); unicodetoutf8 (DEST, pwszstr); safe_arrydelete (pwszstr );}
The UTF-8 is converted to multi-byte or Unicode:
void Utf8ToAnsi(char** dest, const char* src) { ASSERT(dest!= NULL || src != NULL); WCHAR* str = NULL; Utf8ToUnicode(&str, src); W2C(dest, str); SAFE_ARRYDELETE(str); }void Utf8ToUnicode(WCHAR** dest,const char* src){ ASSERT(dest!= NULL || src != NULL); int unicodeLen = ::MultiByteToWideChar( CP_UTF8, 0, src, -1, NULL, 0 ) + 1; *dest = new WCHAR[unicodeLen]; //memset(*dest, 0x0, (unicodeLen + 1)*sizeof(WCHAR)); MultiByteToWideChar(CP_UTF8, 0, src, -1, *dest, unicodeLen);}
Safe_arrydelete is a memory that is used to release macros.
#define SAFE_ARRYDELETE(x) if(NULL!=x){delete[] x;x = NULL;}#define SAFE_DELETE(x) if(NULL!=x){delete x; x =NULL;}
With the above functions, we can convert them between different encodings at will. You can also extend these functions with all your efforts. Note that the pointer indicated by DEST in the above functions needs to be released.
Functions for mutual conversion between several multibyte and Unicode and the UTF-8