This method is converted using the Windows character set
Unicode code table may differ in some symbols, especially for most characters.
There should be no problem with Chinese characters. If the requirements are high, you can buy Sybase's
Unicode Development Kit,: P
[Code]
# Include <stdio. h>
# Include <stdlib. h>
# Include <string. h>
# Include <locale. h>
# Include <ctype. h>
# Include <mbstring. h>
Size_t mbstoutf8 (unsigned char * UTF, unsigned char * MBS)
{
Size_t MBL = 0;
Size_t wauthorized = mbstowcs (null, MBS, MBL );
Wchar_t * WC = NULL;
Size_t u8need = 0;
Wchar_t * Wp = NULL;
MBL = _ mbslen (MBS );
Wauthorized = mbstowcs (null, MBS, MBL );
WC = calloc (wauthorized + 1, sizeof (wchar_t ));
If (! WC)
Return-1;
If (mbstowcs (WC, MBS, MBL) <MBL)
Return-1;
WC [MBL] = L' \ 0 ';
WP = WC;
If (! UTF)
{
While (* WP)
{
If (* WP & 0x7f) = * WP)
U8need + = 1;
Else if (* WP & 0x7ff) = * WP)
U8need + = 2;
Else
U8need + = 3;
* WP ++;
}
}
Else
{
While (* WP)
{
If (* WP & 0x7f) = * WP)
{
* UTF ++ = * WP & 0x7f;
U8need + = 1;
}
Else if (* WP & 0x7ff) = * WP)
{
* UTF ++ = 0xc0 | (* WP> 6 );
* UTF ++ = 0x80 | (* WP & 0x3f );
U8need + = 2;
}
Else
{
* UTF ++ = 0xe0 | (* WP> 12 );
* UTF ++ = 0x80 | (* WP> 6) & 0x3f );
* UTF ++ = 0x80 | (* WP & 0x3f );
U8need + = 3;
}
* WP ++;
}
* UTF = 0;
}
Free (WC );
Return u8need;
}
Int main (void)
{
Char MBS [81];
Size_t SZ = 0;
Setlocale (lc_ctype, ". 936 ");
For (;;)
{
Fgets (MBS, 80, stdin );
MBS [strlen (MBS)-1] = '\ 0 ';
If (! * MBS)
Break;
SZ = mbstoutf8 (null, MBS );
If (SZ! =-1)
{
Char * u = malloc (SZ + 1 );
If (u)
{
Mbstoutf8 (u, MBS );
Puts (U );
Free (U );
}
}
}
Return 0;
}
# Include <stdio. h>
# Include <stdlib. h>
# Include <string. h>
# Include <locale. h>
# Include <ctype. h>
# Include <mbstring. h>
# Include <wchar. h>
Size_t utf8tombs (unsigned char * MBS, wchar_t * UTF)
{
Size_t WCL = 0;
Size_t mbneed = wcstombs (null, UTF, WCL );
Char * MC = NULL;
Size_t gbneed = 0;
Char * MP = NULL;
WCL = wcslen (UTF );
Gbneed = wcstombs (null, UTF, WCL );
MC = calloc (gbneed + 1, sizeof (unsigned char ));
If (! MC)
Return-1;
If (wcstombs (MC, UTF, WCL) <WCL)
Return-1;
MC [WCL] = '\ 0 ';
MP = MC;
If (! MBS)
{
While (* MP)
{
If (* MP & 0x80) = * MP)
Gbneed + = 1;
Else if (* MP & 0x800) = * MP)
Gbneed + = 2;
Else
Gbneed + = 3;
* MP ++;
}
}
Else
{
While (* MP)
{
If (* MP & 0x80) = * MP)
{
* MBS ++ = * MP & 0x80;
Gbneed + = 1;
}
Else if (* MP & 0x800) = * MP)
{
* MBS ++ = 0x3f | (* MP <6 );
* MBS ++ = 0x7f | (* MP & 0xc0 );
Gbneed + = 2;
}
Else
{
* MBS ++ = 0x1f | (* MP <12 );
* MBS ++ = 0x7f | (* MP <6) & 0xc0 );
* MBS ++ = 0x7f | (* MP & 0xc0 );
Gbneed + = 3;
}
* MP ++;
}
* MBS = 0;
}
Free (MC );
Return gbneed;
}
Int main (void)
{
Char UTF [81];
Size_t SZ = 0;
Setlocale (lc_ctype, ". 936 ");
For (;;)
{
Fgets (UTF, 80, stdin );
UTF [strlen (UTF)-1] = '\ 0 ';
If (! * UTF)
Break;
SZ = utf8tombs (null, UTF );
If (SZ! =-1)
{
Char * u = malloc (SZ + 1 );
If (u)
{
Utf8tombs (u, UTF );
Puts (U );
Free (U );
}
}
}
Return 0;
} [/Code]