From: http://www.cppblog.com/fdsajhg/archive/2010/09/03/125770.aspx
Byte to Unicode
//////////////BYTE dwByte[200];wchar_t dwcharw[200];for(int i=0;i<200;i++){dwcharw[i]=dwByte[i];}
(Char -- for -- Unicode)
Windows API gb2312/Unicode/UTF-8 Conversion
/** Gb2312/Unicode/UTF-8 conversion * wlcims workstation 2008-09-08 * http://wlcims.cn */# ifndef _ ocugx_h _ # DEFINE _ ocugx_h _ class ocugx {public: // --------- UTF-8 to Unicode, single word ---------------- int cu8xu (wchar * pout, char * ptext) {int ret = 0; char * uchar = (char *) pout; unsigned CIN = (unsigned char) ptext [0]; If (CIN <0x80) {// ASCII 0x00 ~ 0x7f pout [0] = ptext [0];} else if (CIN <0xdf) {uchar [0] = (ptext [0] <6) | (ptext [1] & 0x3f); uchar [1] = (ptext [0]> 2) & 0x0f; ret = 1;} else if (CIN <0xef) {uchar [0] = (ptext [1] <6) | (ptext [2] & 0x3f); uchar [1] = (ptext [0] <4) | (ptext [1]> 2) & 0x0f); ret = 2;} else if (CIN <0xf7) {uchar [0] = (ptext [2] <6) | (ptext [3] & 0x3f); uchar [1] = (ptext [1] <4) | (ptext [2]> 2) & 0x0f); uchar [2] = (ptext [0] <2) & 0x1c) | (ptext [1]> 4) & 0x03); ret = 3;} return ret;} // ---------- Unicode to UTF-8, single word ------------------ int cuxu8 (char * pout, wchar * ptext) {int ret = 0; unsigned char * pchar = (unsigned char *) ptext; If (ptext [0] <= 0x7f) {// ASCII 0x00 ~ 0x7f pout [0] = (char) pchar [0];} else if (ptext [0] <= 0x7ff) {// 0x080 ~ 0x7ff pout [0] = 0xc0 | (pchar [1] <2) | (pchar [0]> 6 ); pout [1] = 0x80 | (pchar [0] & 0x3f); ret = 1;} else {// 0x0800 ~ 0 xFFFF pout [0] = 0xe0 | (pchar [1]> 4); pout [1] = 0x80 | (pchar [1] & 0x0f) <2) | (pchar [0]> 6); pout [2] = 0x80 | (pchar [0] & 0x3f); ret = 2;} return ret ;} // ----------- convert Unicode to gb2312, single word ---------------- int cuxg (char * pout, wchar * ptext) {int ret = 0; If (ptext [0] <0x80) {// ASCII 0x00 ~ 0x7f pout [0] = (char) ptext [0];} else {: widechartomultibyte (cp_acp, 0, ptext, 1, pout, sizeof (wchar), null, null); ret = 1;} return ret;} // ----------- convert gb2312 to Unicode, single word ---------------- int cgxu (wchar * pout, char * ptext) {int ret = 0; if (unsigned) ptext [0] <0x80) {// ASCII 0x00 ~ 0x7f pout [0] = (wchar) ptext [0];} else {: multibytetowidechar (cp_acp, mb_precomposed, ptext, 2, pout, 1); ret = 1 ;} return ret;} // ------------ UTF-8 to Unicode, string ---------------- int su8xu (wchar * pout, char * ptext, int Len) {int I, j; for (I = 0, j = 0; I <Len; I ++, J ++) {I + = cu8xu (& pout [J], & ptext [I]);} return J ;} // ------------ Unicode to UTF-8, string ---------------- int suxu8 (char * pout, wchar * ptext, int Len) {int I, J; for (I = 0, j = 0; I <Len; I ++, J ++) {J + = cuxu8 (& pout [J], & ptext [I]);} return J;} // ------------ convert Unicode to gb2312, string ---------------- int suxg (char * pout, wchar * ptext, int Len) {int I, j; for (I = 0, j = 0; I <Len; I ++, J ++) {J + = cuxg (& pout [J], & ptext [I]);} return J;} // ------------ convert gb2312 to Unicode, string ---------------- int sgxu (wchar * pout, char * ptext, int Len) {int I, j; for (I = 0, j = 0; I <Len; I ++, J ++) {I + = cgxu (& pout [J], & Ptext [I]);} return J;} // ------------ gb2312 to UTF-8, string ------------------ int sgxu8 (char * pout, char * ptext, int Len) {int I, j; wchar Buf; for (I = 0, j = 0; I <Len; I ++, J ++) {If (unsigned) ptext [0] <0x80) {// ASCII 0x00 ~ 0x7f pout [J] = ptext [I];} else {I + = cgxu (& Buf, & ptext [I]); j + = cuxu8 (& pout [J], & BUF) ;}} return J ;}// ------------ UTF-8 to gb2312, string ------------------ int su8xg (char * pout, char * ptext, int Len) {int I, J; wchar Buf; for (I = 0, j = 0; I <Len; I ++, J ++) {If (unsigned) ptext [0] <0x80) {// ASCII 0x00 ~ 0x7f pout [J] = ptext [I];} else {I + = cu8xu (& Buf, & ptext [I]); j + = cuxg (& pout [J], & BUF) ;}} return J ;};# endif
Thanks for your post, which helped me solve the problem that plagued me for one afternoon, but I found some mistakes. In the following code, ptext [0] In the if condition judgment expression should be ptext [I]
// ------------ UTF-8 to gb2312, string ------------------
Int su8xg (char * pout, char * ptext, int Len ){
Int I, J;
Wchar Buf;
For (I = 0, j = 0; I <Len; I ++, J ++ ){
If (unsigned) ptext [0] <0x80) {// ASCII 0x00 ~ 0x7f
Pout [J] = ptext [I];
} Else {
I + = cu8xu (& Buf, & ptext [I]);
J + = cuxg (& pout [J], & BUF );
}
}
Return J;
}
Feedback
# Re: Windows API gb2312/Unicode/UTF-8 conversion reply more comments
By Michael genn
I also corrected an error: the conversion error occurs when converting the full-width comma (",") in the cu8xu (UTF-8 to Unicode, single word) function.
The reason is that there is a problem in determining the part of the CIN value. We should change some of the smaller signs to less than or equal.
Below is my revised version:
Int cu8xu (wchar_t * pout, char * ptext)
{
Int ret = 0;
Char * uchar = (char *) pout;
Unsigned CIN = (unsigned char) ptext [0];
If (CIN <0x80) {// ASCII 0x00 ~ 0x7f
Pout [0] = ptext [0];
} Else if (CIN <0xdf ){
Uchar [0] = (ptext [0] <6) | (ptext [1] & 0x3f );
Uchar [1] = (ptext [0]> 2) & 0x0f;
Ret = 1;
} Else if (CIN <= 0xef ){
Uchar [0] = (ptext [1] <6) | (ptext [2] & 0x3f );
Uchar [1] = (ptext [0] <4) | (ptext [1]> 2) & 0x0f );
Ret = 2;
} Else if (CIN <0xf7 ){
Uchar [0] = (ptext [2] <6) | (ptext [3] & 0x3f );
Uchar [1] = (ptext [1] <4) | (ptext [2]> 2) & 0x0f );
Uchar [2] = (ptext [0] <2) & 0x1c) | (ptext [1]> 4) & 0x03 );
Ret = 3;
}
Return ret;
}