Author: Zhu Jinchan
Source: http://blog.csdn.net/clever101
Found open source code of the available resources are really many, from the source of the sqlite3 to pull out a few characters to transform the interface, a little transformation of the discovery is quite good to use. Here is the implementation code:
* * * * * Convert a UTF-8 string to Microsoft Unicode (UTF-16?).
* * * * * * Hold the returned string is obtained from malloc.
*/Static WCHAR *utf8tounicode (const char *zfilename) {int nChar;
WCHAR *zwidefilename;
NChar = MultiByteToWideChar (Cp_utf8, 0, Zfilename,-1, NULL, 0);
Zwidefilename = Static_cast<wchar *> (malloc (nchar*sizeof (zwidefilename[0)));
if (zwidefilename==0) {return 0;
} NChar = MultiByteToWideChar (Cp_utf8, 0, Zfilename,-1, Zwidefilename, NChar);
if (nchar==0) {free (zwidefilename);
Zwidefilename = 0;
return zwidefilename; /* * * * Convert Microsoft Unicode to UTF-8.
Spaces to hold the returned string is * * obtained from malloc ().
*/Static char *unicodetoutf8 (const WCHAR *zwidefilename) {int nbyte;
Char *zfilename;
Nbyte = WideCharToMultiByte (Cp_utf8, 0, Zwidefilename,-1, 0, 0, 0, 0);
Zfilename = static_cast<char*> (malloc (nbyte));
if (zfilename==0) {return 0; } nbyte = WideCharToMultiByte (Cp_utf8, 0, ZwidefilenAme,-1, Zfilename, nbyte, 0, 0);
if (Nbyte = = 0) {free (zfilename);
Zfilename = 0;
return zfilename;
/* * * * Convert an ANSI string to Microsoft Unicode, based on the * * Current codepage settings for file APIs.
* * * * * * Hold the returned string is obtained * * from malloc.
*/Static WCHAR *mbcstounicode (const char *zfilename) {int nbyte;
WCHAR *zmbcsfilename; int codepage = Arefileapisansi ()?
CP_ACP:CP_OEMCP;
Nbyte = MultiByteToWideChar (codepage, 0, Zfilename,-1, null,0) *sizeof (WCHAR);
Zmbcsfilename = static_cast<wchar*> (malloc (nbyte*sizeof (zmbcsfilename[0)));
if (zmbcsfilename==0) {return 0;
} Nbyte = MultiByteToWideChar (codepage, 0, Zfilename,-1, Zmbcsfilename, nbyte);
if (nbyte==0) {free (zmbcsfilename);
Zmbcsfilename = 0;
return zmbcsfilename;
}/* * * * Convert Microsoft Unicode to multibyte character string, based on the * * user's Ansi codepage.
* * * * * * Hold the returned string is obtained from * * malloc (). * * STATIC char* Unicodetombcs (const wchar* zwidefilename) {int nbyte;
Char *zfilename; int codepage = Arefileapisansi ()?
CP_ACP:CP_OEMCP;
Nbyte = WideCharToMultiByte (codepage, 0, Zwidefilename,-1, 0, 0, 0, 0);
Zfilename = static_cast<char*> (malloc (nbyte));
if (zfilename==0) {return 0;
Nbyte = WideCharToMultiByte (codepage, 0, Zwidefilename,-1, Zfilename, nbyte, 0, 0);
if (Nbyte = = 0) {free (zfilename);
Zfilename = 0;
return zfilename; } * * * * Convert multibyte character string to UTF-8.
Spaces to hold the * * returned string is obtained from malloc ().
*/Static char* MbcsToUtf8 (const char *zfilename) {char *zfilenameutf8;
WCHAR *ztmpwide;
Ztmpwide = Mbcstounicode (zfilename);
if (ztmpwide==0) {return 0;
} ZFilenameUtf8 = UnicodeToUtf8 (ztmpwide);
Free (ztmpwide);
return ZFilenameUtf8; } * * * * Convert UTF-8 to multibyte character string.
Spaces to hold the * * returned string is obtained from malloc (). */Static char* Utf8tombcs (const char *zfilename) {char *zfilenamembcs;
wchar* Ztmpwide;
Ztmpwide = Utf8tounicode (zfilename);
if (ztmpwide==0) {return 0;
} Zfilenamembcs = Unicodetombcs (ztmpwide);
Free (ztmpwide);
return ZFILENAMEMBCS;
} std::string MbcsToUtf8 (const char* PSZMBCS) {std::string str;
WCHAR *pwchar=0;
CHAR *pchar=0;
int len=0; int codepage = Arefileapisansi ()?
CP_ACP:CP_OEMCP;
Len=multibytetowidechar (codepage, 0, Pszmbcs,-1, null,0);
Pwchar=new Wchar[len];
if (pwchar!=0) {len = MultiByteToWideChar (codepage, 0, Pszmbcs,-1, Pwchar, Len);
if (len!=0) {len = WideCharToMultiByte (Cp_utf8, 0, Pwchar,-1, 0, 0, 0, 0);
Pchar=new Char[len];
if (pchar!=0) {len = WideCharToMultiByte (Cp_utf8, 0, Pwchar,-1, Pchar, len,0, 0);
if (len!=0) {str = Pchar;
} Delete Pchar;
} Delete Pwchar;
} return str; }
To test these interfaces, I wrote a test project that reads an XML file to convert the characters inside, and the code for the test project downloads the following address:
The interface between Unicode character and multibyte character and test engineering