網上找到的各個編碼之間的轉換

來源:互聯網
上載者:User

package com.trs.infra.util;

import java.io.IOException;

public class CharacterConvertor {

    /**
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub

    }

    static public String convertUTF8String2Unicode(String instr)
            throws IOException {
        // byte[] strbytes = instr.getBytes();
        int charindex = instr.length();
        int actualValue;
        int inputValue;
        StringBuffer sbtemp = new StringBuffer();

        for (int i = 0; i < charindex;) {

            actualValue = -1;
            inputValue = instr.charAt(i++);

            inputValue &= 0xff;

            if ((inputValue & 0x80) == 0) {
                actualValue = inputValue;
            } else if ((inputValue & 0xF8) == 0xF0) {
                actualValue = (inputValue & 0x1f) << 18;

                int nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F) << 12;

                nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F) << 6;

                nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F);
            } else if ((inputValue & 0xF0) == 0xE0) {
                actualValue = (inputValue & 0x1f) << 12;

                int nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F) << 6;

                nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F);
            } else if ((inputValue & 0xE0) == 0xC0) {
                actualValue = (inputValue & 0x1f) << 6;

                int nextByte = instr.charAt(i++) & 0xff;
                if ((nextByte & 0xC0) != 0x80)
                    throw new IOException("Invalid UTF-8 format");
                actualValue += (nextByte & 0x3F);
            }
            sbtemp.append((char) actualValue);
        }

        return sbtemp.toString();
    }

    /**
     * Unicode是雙位元組編碼,一個字元佔2個位元組 <br>
     * UCS-2(Unicode-16):2個位元組字元編碼 <br>
     * UCS-4(Unicode-32):4個位元組字元編碼
     *
     * @param instr
     * @return
     */
    public static byte[] convertUnicode2UTF8Byte(String instr) {
        int len = instr.length();
        byte[] abyte = new byte[len << 2];//字元長度的四倍
        int j = 0;
        for (int i = 0; i < len; i++) {
            char c = instr.charAt(i);

            if (c < 0x80) {
                abyte[j++] = (byte) c;
            } else if (c < 0x0800) {
                abyte[j++] = (byte) (((c >> 6) & 0x1F) | 0xC0);
                abyte[j++] = (byte) ((c & 0x3F) | 0x80);
            } else if (c < 0x010000) {
                abyte[j++] = (byte) (((c >> 12) & 0x0F) | 0xE0);
                abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
                abyte[j++] = (byte) ((c & 0x3F) | 0x80);
            } else if (c < 0x200000) {
                abyte[j++] = (byte) (((c >> 18) & 0x07) | 0xF8);
                abyte[j++] = (byte) (((c >> 12) & 0x3F) | 0x80);
                abyte[j++] = (byte) (((c >> 6) & 0x3F) | 0x80);
                abyte[j++] = (byte) ((c & 0x3F) | 0x80);
            }
        }

        byte[] retbyte = new byte[j];
        for (int i = 0; i < j; i++) {
            retbyte[i] = abyte[i];
        }
        return retbyte;
    }

    /**
     * Unicode是雙位元組編碼,一個字元佔2個位元組 IDSO 是單位元組編碼
     *
     * @param myByte
     * @return
     */
    public static String ISO106462Unicode(byte[] myByte) {
        String result = new String("");

        StringBuffer sb = new StringBuffer("");
        try {
            /* 將字串轉換成byte數組 */
            // byte[] myByte= str.getBytes("ISO10646");
            int len = myByte.length;

            for (int i = 0; i < len; i = i + 2) {
                byte hiByte = myByte[i]; // 高八位
                byte loByte = myByte[i + 1]; // 低八位

                int ch = (int) hiByte << 8;
                ch = ch & 0xff00;
                ch += (int) loByte & 0xff;

                sb.append((char) ch);
            }

            result = new String(sb.toString());

        } catch (Exception e) {
            System.out.println("Encoding Error");
        }
        return result;
    }

    /**
     * Unicode是雙位元組編碼,一個字元佔2個位元組
     *
     * @param s
     * @return
     */
    public static byte[] Unicode2Byte(String s) {
        int len = s.length();
        byte abyte[] = new byte[len << 1];
        int j = 0;
        for (int i = 0; i < len; i++) {
            char c = s.charAt(i);
            abyte[j++] = (byte) (c & 0xff);
            abyte[j++] = (byte) (c >> 8);
        }

        return abyte;
    }

}

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.