標籤:
轉自:一葉飄舟http://blog.csdn.net/jdsjlzx/article/details/7058823
package lia.meetlucene;import java.io.IOException;import org.apache.lucene.index.CorruptIndexException;public class Unicode { public static void main(String[] args) throws CorruptIndexException, IOException { String s = "簡介"; String tt = gbEncoding(s); // String tt1 = "你好,我想給你說一個事情"; System.out.println("unicodeBytes is: " + tt); // 輸出“簡介”的unicode編碼 System.out.println("對應的中文: " + decodeUnicode("\\u7b80\\u4ecb")); // System.out.println(decodeUnicode(tt1)); // 輸出unicode編碼對應的中文 System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); System.out.println(s.indexOf("\\")); } public static String gbEncoding(final String gbString) { char[] utfBytes = gbString.toCharArray(); String unicodeBytes = ""; for (int byteIndex = 0; byteIndex < utfBytes.length; byteIndex++) { String hexB = Integer.toHexString(utfBytes[byteIndex]); if (hexB.length() <= 2) { hexB = "00" + hexB; } unicodeBytes = unicodeBytes + "\\u" + hexB; } return unicodeBytes; } public static String decodeUnicode(final String dataStr) { int start = 0; int end = 0; final StringBuffer buffer = new StringBuffer(); while (start > -1) { end = dataStr.indexOf("\\u", start + 2); String charStr = ""; if (end == -1) { charStr = dataStr.substring(start + 2, dataStr.length()); } else { charStr = dataStr.substring(start + 2, end); } char letter = (char) Integer.parseInt(charStr, 16); // 16進位parse整形字串。 buffer.append(new Character(letter).toString()); start = end; } return buffer.toString(); }}
代碼詳解:
public static String decodeUnicode(final String dataStr) { int start = 0; int end = 0; final StringBuffer buffer = new StringBuffer(); while (start > -1) { end = dataStr.indexOf("\\u", start + 1); //使得第一個unicode在start~end之間,+1,+2,+3均可 System.out.println(start + "asdfasd~~~~~~~~~~~~~~~~~~~~~``" + end); // the index of the first occurrence of the specified substring, // starting at the specified index, // or -1 if there is no such occurrence. String charStr = ""; if (end == -1) { charStr = dataStr.substring(start + 2, dataStr.length()); } else { charStr = dataStr.substring(start + 2, end); } char letter = 0; if (charStr.length() == 4) { letter = (char) Integer.parseInt(charStr, 16); // 16進位parse整形字串。 } //防止出錯 buffer.append(new Character(letter).toString()); start = end; } return buffer.toString(); }
java 中文轉換成Unicode編碼和Unicode編碼轉換成中文