Package com. TRS. infra. util;
Import java. Io. ioexception;
Public class characterconvertor {
/**
* @ Param ARGs
*/
Public static void main (string [] ARGs ){
// Todo auto-generated method stub
}
Static Public String convertutf8string2unicode (string instr)
Throws ioexception {
// Byte [] strbytes = instr. getbytes ();
Int charindex = instr. Length ();
Int actualvalue;
Int inputvalue;
Stringbuffer sbtemp = new stringbuffer ();
For (INT I = 0; I <charindex ;){
Actualvalue =-1;
Inputvalue = instr. charat (I ++ );
Inputvalue & = 0xff;
If (inputvalue & 0x80) = 0 ){
Actualvalue = inputvalue;
} Else if (inputvalue & 0xf8) = 0xf0 ){
Actualvalue = (inputvalue & 0x1f) <18;
Int nextbyte = instr. charat (I ++) & 0xff;
If (nextbyte & 0xc0 )! = 0x80)
Throw new ioexception ("invalid UTF-8 format ");
Actualvalue + = (nextbyte & 0x3f) <12;
Nextbyte = instr. charat (I ++) & 0xff;
If (nextbyte & 0xc0 )! = 0x80)
Throw new ioexception ("invalid UTF-8 format ");
Actualvalue + = (nextbyte & 0x3f) <6;
Nextbyte = instr. charat (I ++) & 0xff;
If (nextbyte & 0xc0 )! = 0x80)
Throw new ioexception ("invalid UTF-8 format ");
Actualvalue + = (nextbyte & 0x3f );
} Else if (inputvalue & 0xf0) = 0xe0 ){
Actualvalue = (inputvalue & 0x1f) <12;
Int nextbyte = instr. charat (I ++) & 0xff;
If (nextbyte & 0xc0 )! = 0x80)
Throw new ioexception ("invalid UTF-8 format ");
Actualvalue + = (nextbyte & 0x3f) <6;
Nextbyte = instr. charat (I ++) & 0xff;
If (nextbyte & 0xc0 )! = 0x80)
Throw new ioexception ("invalid UTF-8 format ");
Actualvalue + = (nextbyte & 0x3f );
} Else if (inputvalue & 0xe0) = 0xc0 ){
Actualvalue = (inputvalue & 0x1f) <6;
Int nextbyte = instr. charat (I ++) & 0xff;
If (nextbyte & 0xc0 )! = 0x80)
Throw new ioexception ("invalid UTF-8 format ");
Actualvalue + = (nextbyte & 0x3f );
}
Sbtemp. append (char) actualvalue );
}
Return sbtemp. tostring ();
}
/**
* Unicode is double-byte encoding. One character occupies 2 bytes. <br>
* UCS-2: 2 byte character encoding <br>
* UCS-4 (Unicode-32): 4-byte character encoding
*
* @ Param instr
* @ Return
*/
Public static byte [] convertunicode2utf8byte (string instr ){
Int Len = instr. Length ();
Byte [] abyte = new byte [Len <2]; // four times the length of the character
Int J = 0;
For (INT I = 0; I <Len; I ++ ){
Char c = instr. charat (I );
If (C <0x80 ){
Abyte [J ++] = (byte) C;
} Else if (C <0x0800 ){
Abyte [J ++] = (byte) (C> 6) & 0x1f) | 0xc0 );
Abyte [J ++] = (byte) (C & 0x3f) | 0x80 );
} Else if (C <0x010000 ){
Abyte [J ++] = (byte) (C> 12) & 0x0f) | 0xe0 );
Abyte [J ++] = (byte) (C> 6) & 0x3f) | 0x80 );
Abyte [J ++] = (byte) (C & 0x3f) | 0x80 );
} Else if (C <0x200000 ){
Abyte [J ++] = (byte) (C> 18) & 0x07) | 0xf8 );
Abyte [J ++] = (byte) (C> 12) & 0x3f) | 0x80 );
Abyte [J ++] = (byte) (C> 6) & 0x3f) | 0x80 );
Abyte [J ++] = (byte) (C & 0x3f) | 0x80 );
}
}
Byte [] retbyte = new byte [J];
For (INT I = 0; I <j; I ++ ){
Retbyte [I] = abyte [I];
}
Return retbyte;
}
/**
* Unicode is double-byte encoding, and one character occupies 2 bytes. idso is single-byte encoding.
*
* @ Param mybyte
* @ Return
*/
Public static string iso106462unicode (byte [] mybyte ){
String result = new string ("");
Stringbuffer sb = new stringbuffer ("");
Try {
/* Convert a string to a byte array */
// Byte [] mybyte = Str. getbytes ("iso000046 ");
Int Len = mybyte. length;
For (INT I = 0; I <Len; I = I + 2 ){
Byte hibyte = mybyte [I]; // The height is 8 bits.
Byte lobyte = mybyte [I + 1]; // eight lower bits
Int CH = (INT) hibyte <8;
Ch = CH & 0xff00;
CH + = (INT) lobyte & 0xff;
SB. append (char) CH );
}
Result = new string (sb. tostring ());
} Catch (exception e ){
System. Out. println ("Encoding Error ");
}
Return result;
}
/**
* Unicode is double-byte encoding. One character occupies 2 bytes.
*
* @ Param s
* @ Return
*/
Public static byte [] unicode2byte (string s ){
Int Len = S. Length ();
Byte abyte [] = new byte [Len <1];
Int J = 0;
For (INT I = 0; I <Len; I ++ ){
Char c = S. charat (I );
Abyte [J ++] = (byte) (C & 0xff );
Abyte [J ++] = (byte) (C> 8 );
}
Return abyte;
}
}