編碼字元之間的轉換(C/C++)

來源:互聯網
上載者:User

最近一段做一些關於文字編碼方面的東西,常常涉及到各種編碼字元之間的轉換。主要是做中日文方面的,包括中文gb2312, 日文JIS, SHIFT-JIS,以及他們和Unnicode碼之間的轉換。

一 GBK <==> Unicode

unsigned short GBK2UNI(unsigned short usGBK)
{
    unsigned char  szEUC[2] = { usGBK >> 8, usGBK & 0xFF };
    unsigned short usUNI;
    MultiByteToWideChar(       
        936,
        0,
        (LPCSTR)szEUC, 
        2,
        &usUNI,    
        1
    );
    return usUNI;  
}

unsigned short UNI2GBK(unsigned short usUNI)
{
    unsigned char  szGBK[3]={0};
    unsigned short wzUNI[2] = { usUNI, 0 };
    unsigned short usGBK;
    WideCharToMultiByte(
        936,
        0,
        wzUNI, 
        2,
        (LPSTR)szGBK,    
        2,
        0,
        0
    );
    usGBK = (szGBK[0] << 8) | szGBK[1];   
    return usGBK;
}

二 SHIFT-JIS <==> Unicode
unsigned short SJIS2UNI(unsigned short usSJIS)
{
    unsigned char  szEUC[2] = { usSJIS >> 8, usSJIS & 0xFF };
    unsigned short usUNI;
    MultiByteToWideChar(       
        932,
        0,
        (LPCSTR)szEUC, 
        2,
        &usUNI,    
        1
    );
    return usUNI;  
}

unsigned short UNI2SJIS(unsigned short usUNI)
{
    unsigned char  szSJIS[3]  = { 0 };
    unsigned short wzUNI[2] = { usUNI, 0 };
    unsigned short usSJIS;
    WideCharToMultiByte(
        932,
        0,
        wzUNI, 
        2,
        (LPSTR)szSJIS,    
        2,
        0,
        0
    );
    usSJIS = (szSJIS[0] << 8) | szSJIS[1];   
    return usSJIS;
}

 

三  JIS <=> Unicode

unsigned short JIS2UNI(unsigned short usJIS)
{
    unsigned char  szEUC[2] = { (usJIS | 0x8080) >> 8, (usJIS | 0x8080) & 0xFF };
    unsigned short usUNI;
    MultiByteToWideChar(       
        20932,
        0,
        (LPCSTR)szEUC, 
        2,
        &usUNI,    
        1
    );
    return usUNI;  
}
unsigned short UNI2JIS(unsigned short usUNI)
{
    unsigned char  szJIS[3]  = { 0 };
    unsigned short wzUNI[2] = { usUNI, 0 };
    unsigned short usJIS;
    WideCharToMultiByte(
        20932,
        0,
        wzUNI, 
        2,
        (LPSTR)szJIS,    
        2,
        0,
        0
    );
    usJIS = (szJIS[0] << 8) | szJIS[1];   
    return usJIS;
}

 

四  JIS <=> SHIFT-JIS
unsigned short SJIS2JIS( unsigned short sjis )
{
    unsigned short ubyte, lbyte;
   
   
    if (((sjis >= 0x8140) && (sjis <= 0x9ffc)) ||
        ((sjis >= 0xe040) && (sjis <= 0xeffc)) )
    {
        ubyte = sjis >> 8;
        lbyte = sjis & 0x00ff;
       
        if ( (lbyte <= 0x3f) || (lbyte == 0x7f) ||
                (lbyte >= 0xfd) ) return 0;
       
        if ( ubyte >= 0xe0 ) ubyte -= 0xc0; else ubyte -= 0x80;
        ubyte = (ubyte << 1) + 0x1f;
       
        if ( lbyte >= 0x9f )
        {
            ubyte++;
            lbyte -= 0x7e;
        } else {
            if ( lbyte >= 0x80 ) lbyte--;
            lbyte -= 0x1f;
        }
       
        return ( ubyte << 8 ) + lbyte;
       
    } else {
        return 0;
    }
}

unsigned short JIS2SJIS( unsigned short jis )
{
    unsigned short     ubyte, lbyte;
   
   
    ubyte = jis >> 8;
    lbyte = jis & 0x00ff;
   
    lbyte += 0x1f;
    if ( lbyte >= 0x7f ) lbyte++;
    if ( lbyte <= 0x3f ) return 0;
   
    if ( (ubyte & 0x0001) == 0 )
    {
        lbyte = jis & 0x00ff;
        lbyte += 0x7e;
        ubyte--;
        if ( lbyte > 0xfd ) return 0;
    }
   
    ubyte -= 0x1f;
    ubyte = ubyte >> 1;
    ubyte += 0x80;
    if ( ubyte >= 0xa0 ) ubyte += 0x40;
   
    if ( ((ubyte >= 0x81) && (ubyte <= 0x9f)) ||
            ((ubyte >= 0xe0) && (ubyte <= 0xef)) )
    {
        return (ubyte << 8) + lbyte;
    } else {
        return 0;
    }
}

UNI2JIS這個函數好像不太好用,其他的都經過測試,沒有問題的。現在我還不知道具體原因,現在我從Unicode轉到JIS是分兩個步驟的,第一個步驟是先將Unicode轉到SHIFT-JIS,然後由SHIFT-JIS轉到JIS。如果那位朋友知道什麼原因,歡迎在我部落格裡指點指點。

好了,就這些,希望能給需要的朋友們帶來一點方便。

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.