In the last section, I made some things about text encoding, which often involves conversion between various encoding characters. It is mainly made in Chinese and Japanese, including the Chinese gb2312, Japanese JIS, SHIFT-JIS, and the conversion between them and the unnicode code.
1 GBK <=> Unicode
Unsigned short gbk2uni (unsigned short usgbk)
{
Unsigned char szeuc [2] = {usgbk> 8, usgbk & 0xff };
Unsigned short usuni;
Multibytetowidechar (
936,
0,
(Lpcstr) szeuc,
2,
& Usuni,
1
);
Return usuni;
}
Unsigned short uni2gbk (unsigned short usuni)
{
Unsigned char szgbk [3] = {0 };
Unsigned short wzuni [2] = {usuni, 0 };
Unsigned short usgbk;
Widechartomultibyte (
936,
0,
Wzuni,
2,
(Lpstr) szgbk,
2,
0,
0
);
Usgbk = (szgbk [0] <8) | szgbk [1];
Return usgbk;
}
2 SHIFT-JIS <=> Unicode
Unsigned short sjis2uni (unsigned short ussjis)
{
Unsigned char szeuc [2] = {ussjis> 8, ussjis & 0xff };
Unsigned short usuni;
Multibytetowidechar (
932,
0,
(Lpcstr) szeuc,
2,
& Usuni,
1
);
Return usuni;
}
Unsigned short uni2sjis (unsigned short usuni)
{
Unsigned char szsjis [3] = {0 };
Unsigned short wzuni [2] = {usuni, 0 };
Unsigned short ussjis;
Widechartomultibyte (
932,
0,
Wzuni,
2,
(Lpstr) szsjis,
2,
0,
0
);
Ussjis = (szsjis [0] <8) | szsjis [1];
Return ussjis;
}
3jis <=> Unicode
Unsigned short jis2uni (unsigned short usjis)
{
Unsigned char szeuc [2] = {(usjis | 0x8080)> 8, (usjis | 0x8080) & 0xff };
Unsigned short usuni;
Multibytetowidechar (
20932,
0,
(Lpcstr) szeuc,
2,
& Usuni,
1
);
Return usuni;
}
Unsigned short uni2jis (unsigned short usuni)
{
Unsigned char szjis [3] = {0 };
Unsigned short wzuni [2] = {usuni, 0 };
Unsigned short usjis;
Widechartomultibyte (
20932,
0,
Wzuni,
2,
(Lpstr) szjis,
2,
0,
0
);
Usjis = (szjis [0] <8) | szjis [1];
Return usjis;
}
4 JIS SHIFT-JIS
Unsigned short sjis2jis (unsigned short sjis)
{
Unsigned short ubyte, lbyte;
If (sjis> = 0x8140) & (sjis <= 0x9ffc) |
(Sjis> = 0xe040) & (sjis <= 0 xeffc )))
{
Ubyte = sjis> 8;
Lbyte = sjis & 0x00ff;
If (lbyte <= 0x3f) | (lbyte = 0x7f) |
(Lbyte> = 0xfd) return 0;
If (ubyte> = 0xe0) ubyte-= 0xc0; else ubyte-= 0x80;
Ubyte = (ubyte <1) + 0x1f;
If (lbyte> = 0x9f)
{
Ubyte ++;
Lbyte-= 0x7e;
} Else {
If (lbyte> = 0x80) lbyte --;
Lbyte-= 0x1f;
}
Return (ubyte <8) + lbyte;
} Else {
Return 0;
}
}
Unsigned short jis2sjis (unsigned short JIS)
{
Unsigned short ubyte, lbyte;
Ubyte = JIS> 8;
Lbyte = JIS & 0x00ff;
Lbyte + = 0x1f;
If (lbyte> = 0x7f) lbyte ++;
If (lbyte <= 0x3f) return 0;
If (ubyte & 0x0001) = 0)
{
Lbyte = JIS & 0x00ff;
Lbyte + = 0x7e;
Ubyte --;
If (lbyte> 0xfd) return 0;
}
Ubyte-= 0x1f;
Ubyte = ubyte> 1;
Ubyte + = 0x80;
If (ubyte> = 0xa0) ubyte + = 0x40;
If (ubyte> = 0x81) & (ubyte <= 0x9f) |
(Ubyte> = 0xe0) & (ubyte <= 0xef )))
{
Return (ubyte <8) + lbyte;
} Else {
Return 0;
}
}
The uni2jis function does not seem to work very well. Others have been tested and there is no problem. Now I do not know the specific reason, now I go from Unicode to JIS in two steps, the first step is to first convert Unicode to SHIFT-JIS, then from SHIFT-JIS to JIS. If the friend knows why, please give me some advice in my blog.
Okay, that's all. I hope it will bring some convenience to friends who need it.