Conversion between UTF, Unicode, and ASCII strings
Author: Floating white clouds
Utf8 to UTF16:
Cstringw convertutf8toutf16 (const cstringa & utf8) <br/>{< br/> int WLEN = multibytetowidechar (cp_utf8, 0, utf8,-1, 0, 0 ); </P> <p> cstringw Buf; <br/> wchar * dd = Buf. getbuffer (WLEN); </P> <p> WLEN = multibytetowidechar (cp_utf8, 0, utf8,-1, DD, WLEN); </P> <p> Buf. releasebuffer (WLEN); </P> <p> return Buf; <br/>}
Unicode conversion to utf8:
Cstringa convertunicodetoutf8 (const cstringw & Unicode) <br/>{< br/> // wide Char to multi char <br/> int WLEN = widechartomultibyte (cp_utf8, 0, Unicode, -1, null, 0, null, null); </P> <p> cstringa Buf; <br/> char * dd = Buf. getbuffer (WLEN); </P> <p> widechartomultibyte (cp_utf8, 0, Unicode,-1, DD, WLEN, null, null ); </P> <p> Buf. releasebuffer (WLEN); </P> <p> return Buf; <br/>}
Unicode conversion to ASCII:
Cstringa convertunicodetoansi (const cstringw & Unicode) <br/>{< br/> // wide Char to multi char <br/> int WLEN = widechartomultibyte (cp_acp, 0, Unicode, -1, null, 0, null, null); </P> <p> cstringa Buf; <br/> char * dd = Buf. getbuffer (WLEN); </P> <p> widechartomultibyte (cp_acp, 0, Unicode,-1, DD, WLEN, null, null ); </P> <p> Buf. releasebuffer (WLEN); </P> <p> return Buf; <br/>}
AnotherUTF16 to utf8For a version that does not call widechartomultibyte:
Int convertunicodetoutf8 (const wchar_t * SRC, int srclen, unsigned char * DEST, int destlen) <br/>{< br/> int I = 0; <br/> int outputlen = 0;/* bytes */<br/> char tchar; </P> <p> If (! SRC |! DEST) {<br/> return outputlen; <br/>}</P> <p> for (I = 0; I <srclen; I ++) {<br/> If (outputlen> = destlen-1) {</P> <p> // Overflow Detected <br/> break; <br/>}</P> <p> // 0 xxxxxxx <br/> If (SRC [I] <0x80) {<br/> Dest [outputlen ++] = (unsigned char) (SRC [I]); <br/>}</P> <p> // 110 XXXXX 10 xxxxxx <br/> else if (SRC [I] <0x800) {<br/> Dest [outputlen ++] = (SRC [I]> 6 | 0xc0 ); <br/> Dest [outputlen ++] = (SRC [I] & 0x3f | 0x80 ); <br/>}</P> <p> // 1110 XXXX 10 xxxxxx 10 xxxxxx <br/> else if (SRC [I]> 0xd800 & SRC [I] <0 xdfff) {<br/> tchar = (SRC [I] & 0x3c0)> 6) + 1 ); <br/> Dest [outputlen ++] = (tchar> 2 | 0xf0 ); <br/> Dest [outputlen ++] = (tchar & 0x03 | 0x80) | (SRC [I] & 0x3e)> 2 ); <br/>}< br/> else {<br/> Dest [outputlen ++] = (SRC [I]> 12 | 0xe0 ); <br/> Dest [outputlen ++] = (SRC [I]> 6 & 0x3f | 0x80 ); <br/> Dest [outputlen ++] = (SRC [I] & 0x3f | 0x80 ); <br/>}</P> <p> Dest [outputlen] = '/0'; <br/> return outputlen; <br/>}