用到兩個函數:MultiByteToWideChar, WideCharToMultiByte。 使用這兩個API實現任意兩種字元編碼的轉換非常簡單:將源字串使用MultiByteToWideChar轉換成unicode編碼,再將轉換後的unicode編碼使用WideCharToMultiByte轉換成指定的字元編碼。 下面是一段示範程式:
- #include <windows.h>
- #include <tchar.h>
- #include <stdio.h>
- #include <locale.h>
- ////////////////////////////////////////////////////////////////////
- void hexdump(const void* addr, int bytes) {
- int lines = bytes / 16, i = 0;
- int j;
- const unsigned char* pMem = (const unsigned char*)addr;
- setlocale(LC_ALL, ".ACP");
- while(i < lines) {
- for(j = 0; j < 16; j++)
- printf("%-4x", pMem[i * 16 + j]);
- printf("/n");
- for(j = 0; j < 16; j++) {
- char ch = pMem[i*16+j];
- printf("%-4c", isprint(ch) ? ch : '.');
- }
- printf("/n");
- i++;
- }
- i = lines * 16;
- for(j = i; j < bytes; j++)
- printf("%-4x", pMem[j]);
- printf("/n");
- for(j = i; j < bytes; j++) {
- char ch = pMem[j];
- printf("%-4c", isprint(ch) ? ch : '.');
- }
- printf("/n");
- }
- ///////////////////////////////////////////////////////////////
- BOOL CALLBACK EnumCodePagesProc(
- LPTSTR lpCodePageString // code page identifier string
- ) {
- UINT uCodePage = atoi(lpCodePageString);
- printf("uCodePage: %d/n", uCodePage);
- CPINFOEX cpinfo;
- GetCPInfoEx(uCodePage, 0, &cpinfo);
- printf("MaxCharSize: %d/n"
- "CodePage: %d/n"
- "CodePageName: %s/n",
- cpinfo.MaxCharSize,
- cpinfo.CodePage,
- cpinfo.CodePageName);
- printf("DefaultChar:");
- for(int i = 0;i < MAX_DEFAULTCHAR; i++)
- printf("%3x", cpinfo.DefaultChar[i]);
- printf("/nLeadByte:");
- for(i = 0; i < MAX_LEADBYTES; i++)
- printf("%3x", cpinfo.LeadByte[i]);
- printf("/nUnicodeDefaultChar:");
- wprintf(L"%x", cpinfo.UnicodeDefaultChar);
- puts("/n");
- return TRUE;
- }
- ////////////////////////////////////////////////////////////////
- int main() {
- char szTest[] = "this²âÊÔtest";
- printf("local settings: %s/n", setlocale(LC_CTYPE, ""));
- puts("original string");
- hexdump(szTest, sizeof(szTest));
- puts("To Unicode");
- wchar_t out[MAX_PATH] = {0};
- MultiByteToWideChar(
- CP_ACP,
- 0,
- szTest,
- sizeof(szTest),
- out,
- MAX_PATH);
- hexdump(out, wcslen(out) * sizeof(wchar_t));
- puts("To UTF-8");
- char utf8[MAX_PATH] = {0};
- WideCharToMultiByte(
- //54936, // GB18030--54936 CP_UTF8,
- //950, // BIG5
- 65001,//CP_UTF8, // 65001, UTF-7:65000
- 0,
- out,
- wcslen(out),
- utf8,
- MAX_PATH,
- NULL,
- NULL
- );
- hexdump(utf8, lstrlen(utf8));
-
- // EnumSystemCodePages(EnumCodePagesProc, CP_INSTALLED);
- return 0;
- }
輸出結果如下:
local settings: Chinese_People's Republic of China.936
original string
74 68 69 73 b2 e2 ca d4 74 65 73 74 0
t h i s . . . . t e s t .
To Unicode
74 0 68 0 69 0 73 0 4b 6d d5 8b 74 0 65 0
t . h . i . s . K m . . t . e .
73 0 74 0
s . t .
To UTF-8
74 68 69 73 e6 b5 8b e8 af 95 74 65 73 74
t h i s . . . . . . t e s t
可以看到“測試”兩個漢字在GBK編碼(字碼頁為936)中是十六進位的e2 b2 ca d4,在UNICODE中,則是十六進位的:4b 6d d5 8b,在UTF8中,則是十六進位的:e6 b5 8b e8 af 95。