[Author]: Kwu
Sqoop handles Clob and Blob fields, Clob as large text in Oracle. The blob stores the binary file.
This type of field is encountered when importing hive or HDFS requires special handling.
1. measured frequently in Oracle
CREATE TABLE t_lob ( A INTEGER, B CLOB, C BLOB )
Test data
Insert into T_lob (A, B, C) VALUES (1, ' Clob test ', To_blob (' 3456 '));
2. Sqoop Script
Import--append--connectjdbc:oracle:thin: @localhost: 1521/orcl--usernamewuke--passwordabcd1234--tablebdc_test. T_lob--columns "A,b,c"--target-dir/tmp/t_lob-m1
Run the script
Sqoop--options-file./importhdfs.opt
3. View the generated HDFs file
watermark/2/text/ahr0cdovl2jsb2cuy3nkbi5uzxqvqkrdsg9tzq==/font/5a6l5l2t/fontsize/400/fill/i0jbqkfcma==/ Dissolve/70/gravity/center ">
Can see. Clob field is imported to HDFs is normal display text, blob is binary file exported to HDFs display as 16 binary
The 16 binary conversion to string can be implemented by, for example, the following methods, in fact, by a shift operation:
Package com.ganymede.test;/** * Hex conversion operation * @author Ganymede * */public class Hex {/** * lowercase character array used to build the output of hexadecimal characters */private St Atic final char[] digits_lower = {' 0 ', ' 1 ', ' 2 ', ' 3 ', ' 4 ', ' 5 ', ' 6 ', ' 7 ', ' 8 ', ' 9 ', ' A ', ' B ', ' C ', ' d ', ' e ', ' f '};/** * with An array of uppercase characters that establish the output of the hexadecimal character */private static final char[] Digits_upper = {' 0 ', ' 1 ', ' 2 ', ' 3 ', ' 4 ', ' 5 ', ' 6 ', ' 7 ', ' 8 ', ' 9 ', ' A ', ' B ', ' C ', ' D ', ' E ', ' F '};/** * Converts a byte array to a hexadecimal character array * * @param data * byte[] * @return hex char[] */public static Char[] Encodehex (byte[] data) {return Encodehex (data, true);} /** * Converts a byte array to an array of hexadecimal characters * * @param data * byte[] * @param tolowercase * <CODE>TRUE</CODE> ; Convert to lowercase format. <code>false</code> Convert to uppercase format * @return hex char[] */public static char[] Encodehex (byte[] data, Boolean toLower case) {return Encodehex (data, toLowerCase? Digits_lower:digits_upper);} /** * Converts a byte array to an array of hexadecimal characters * * @param data * byte[] * @param todigits * char[for control output] * @return Hex ChaR[] */protected static char[] Encodehex (byte[] data, char[] todigits) {int L = data.length;char[] out = new Char[l << ; 1];//characters form the hex value.for (int i = 0, j = 0; i < L; i++) {out[j++] = todigits[(0xF0 & Data[i]) &G t;>> 4];out[j++] = todigits[0x0f & Data[i]];} return out;} /** * Convert byte array to hexadecimal string * * @param data * byte[] * @return hex string */public static string Encodehexstr (byte[] Data) {return Encodehexstr (data, true);} /** * Convert a byte array to a hexadecimal string * * @param data * byte[] * @param tolowercase * <code>true</code> Convert to lowercase format, <code>false</code> convert to uppercase format * @return hex string */public static string Encodehexstr (byte[] data, bo Olean tolowercase) {return encodehexstr (data, tolowercase?)
Digits_lower:digits_upper);} /** * Convert byte array to hexadecimal string * * @param data * byte[] * @param todigits * char[for control output] * @return Hex Stri ng */protected static string Encodehexstr (byte[] data, char[] todigits) {return new string (Encodehex (data, todigits));} /** * Converts a hexadecimal character array to an array of bytes * * @param data * Hex char[] * @return byte[] * @throws runtimeexception * Suppose the source hexadecimal character array is a strange length that will throw an exception when executing */public static byte[] Decodehex (char[] data) {int len = data.length;if (len & 0x01)! = 0) {throw new RuntimeException ("ODD number of characters."); Byte[] out = new Byte[len >> 1];//II characters form the hex value.for (int i = 0, j = 0; j < Len; i++) {int F = Todigit (Data[j], j) << 4;j++;f = f | Todigit (Data[j], j); J++;out[i] = (byte) (F & 0xFF);} return out;} /** * Converts hexadecimal characters to an integer * * @param ch * Hex char * @param index * hexadecimal character position in character array * @return an integer * @th Rows RuntimeException * When CH is not a valid hexadecimal character, throws an execution-time exception */PRotected static int todigit (char ch, int index) {int digit = CHARACTER.DIGIT (ch, h); if (digit = =-1) {throw new Runtimeexc Eption ("illegal hexadecimal character" + ch+ "at index" + index);} return digit;} public static void Main (string[] args) {string srcstr = "string to be converted"; String encodestr = Encodehexstr (Srcstr.getbytes ()); String decodestr = new String (Decodehex (Encodestr.tochararray ())); System.out.println ("Before conversion:" + srcstr); System.out.println ("After conversion:" + encodestr); SYSTEM.OUT.PRINTLN ("After Restore:" + decodestr); System.out.println ("---------------------------------------");d ecodestr = new String (Decodehex (" 3435363738390d0a626c6f62 ". ToCharArray ())); SYSTEM.OUT.PRINTLN ("After Restore:" + decodestr);}}
For hive to be converted to string before being put into storage. Or use UDF to convert directly after storage
Sqoop processing Clob and BLOB fields