標籤:
使用c++跨windows和linux平台實現字串GBK到UTF8的轉換。
原理是GBK字串先轉為unicode編碼,然後再轉換為UTF8編碼。
代碼如下:
#ifndef __CODE_CONVERT_H__#define __CODE_CONVERT_H__#include <cstdio>#include <stdlib.h> #include <locale.h> #include <string>#if defined(_WIN32) || defined(_WIN64)#include <wtypes.h>#endifusing namespace std;//gbk字串srcStr 轉換為utf8字串utfStr ,字串utfStr的緩衝最大大小 maxUtfStrlen//失敗返回-1,成功返回大於0 ,maxUtfStrlen的大小至少是源字串有效長度大小2倍加1inline int gbk2utf8(char *utfStr,size_t maxUtfStrlen,const char *srcStr) { if(!srcStr||!utfStr) { printf("Bad Parameter\n"); return -1; } #if defined(_WIN32) || defined(_WIN64)int len = MultiByteToWideChar(CP_ACP, 0, (LPCCH)srcStr, -1, NULL,0);unsigned short * strUnicode = new unsigned short[len+1];memset(strUnicode, 0, len * 2 + 2);MultiByteToWideChar(CP_ACP, 0, (LPCCH)srcStr, -1, (LPWSTR)strUnicode, len);len = WideCharToMultiByte(CP_UTF8, 0, (LPWSTR)strUnicode, -1, NULL, 0, NULL, NULL);if (len > (int)maxUtfStrlen){printf("Dst Str memory not enough\n");delete[] strUnicode;return -1;}WideCharToMultiByte (CP_UTF8, 0, (LPWSTR)strUnicode, -1, utfStr, len, NULL,NULL);delete[] strUnicode;return len;#else//linux//首先先將gbk編碼轉換為unicode編碼if(NULL==setlocale(LC_ALL,"zh_CN.gbk"))//設定轉換為unicode前的碼,當前為gbk編碼{ printf("參數有錯誤\n");return -1; } int unicodeLen=mbstowcs(NULL,srcStr,0);//計算轉換後的長度if(unicodeLen<=0) { printf("不能轉換!!!unicodeLen:(%d)\n",unicodeLen);return -1; } wchar_t *unicodeStr=(wchar_t *)calloc(sizeof(wchar_t),unicodeLen+1); mbstowcs(unicodeStr,srcStr,strlen(srcStr));//將gbk轉換為unicode//將unicode編碼轉換為utf8編碼if(NULL==setlocale(LC_ALL,"zh_CN.utf8"))//設定unicode轉換後的碼,當前為utf8{ printf("參數有錯誤\n");free(unicodeStr); return -1; } int utfLen=wcstombs(NULL,unicodeStr,0);//計算轉換後的長度if(utfLen<=0) { printf("不能轉換!!!utfLen:(%d)\n",utfLen);free(unicodeStr);return -1; } else if(utfLen>=(int)maxUtfStrlen)//判斷空間是否足夠{ printf("Dst Str memory not enough\n");free(unicodeStr);return -1; } wcstombs(utfStr,unicodeStr,utfLen); utfStr[utfLen]=0;//添加結束符free(unicodeStr); return utfLen; #endif}//gbk字串srcStr 轉換為utf8字串targetinline int gbk2utf8(std::string& target,const char *srcStr) {if (!srcStr){assert(false&&"string is empty");}int tarLen = (int)strlen(srcStr) * 2 + 1;char *tarStr = new char[tarLen];gbk2utf8(tarStr,tarLen - 1,srcStr); target = tarStr;delete []tarStr;return tarLen;}#endif
c++字串編碼GBK到UTF8的轉換