/** * @author HJX * @version 1.0,2013-01-16 * @since JDK1.7,Ubuntu-12.04-64bit * 在hadoop環境下運行 * 將一個String寫入到本地lzo檔案中(不是hadoop的hdfs上) * 再從該lzo檔案中讀取出來並與原String進行校對 */import java.io.BufferedReader;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStream;import java.util.ArrayList;import java.util.List;import org.apache.hadoop.conf.Configuration;import com.hadoop.compression.lzo.LzopCodec;public class LzoCompress { /** * @param args */ public static void main(String[] args) { //產生資料 String dataSource = "abcdefghijklmnopqrstuvwxyz0123456789~!%#^@*#*%$(\n"; dataSource = dataSource.concat(dataSource); dataSource = dataSource.concat(dataSource); dataSource = dataSource.concat(dataSource);/* System.out.println("dataSource = " + dataSource);*/ String lzoFilePath = "/home/hadoop/LzoCompressTest.lzo"; //寫入到lzo檔案,即lzo壓縮 write2LzoFile(lzoFilePath, getDefaultConf(),dataSource.getBytes()); StringBuilder sb = new StringBuilder(); //讀取lzo檔案,即lzo解壓縮 List<String> lines = readLzoFile(lzoFilePath, getDefaultConf()); for(String line : lines) { sb.append(line); //LINUX/UNIX 下添加一個分行符號 sb.append("\n"); /* //Windows 下添加一個分行符號 sb.append("\r\n");*/ } if (sb.toString().equals(dataSource)) { System.out.println(sb.toString()); } else { System.err.println("Error line : " + sb.toString()); } } private static Configuration getDefaultConf(){ Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); conf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzoCodec"); return conf; } /** * 寫資料到lzo檔案,即lzo壓縮 * @param destLzoFilePath * @param conf * @param datas * @return void */ public static void write2LzoFile(String destLzoFilePath,Configuration conf,byte[] datas) { LzopCodec lzo = null; OutputStream out = null; try {/* System.setProperty("java.library.path", "/usr/local/hadoop/lib/native/Linux-amd64-64/lib");*/ lzo = new LzopCodec(); lzo.setConf(conf); out = lzo.createOutputStream(new FileOutputStream(destLzoFilePath)); out.write(datas); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(out != null) { out.close(); } } catch (IOException e) { e.printStackTrace(); } } } /** * 從lzo檔案中讀取資料,即lzo解壓縮 * @param lzoFilePath * @param conf * @return void */ public static List<String> readLzoFile(String lzoFilePath,Configuration conf) { LzopCodec lzo = null; InputStream is = null; InputStreamReader isr = null; BufferedReader reader = null; List<String> result = null; String line = null; try {/* System.setProperty("java.library.path", "/usr/local/hadoop/lib/native/Linux-amd64-64/lib");*/ lzo = new LzopCodec(); lzo.setConf(conf); is = lzo.createInputStream(new FileInputStream(lzoFilePath)); isr = new InputStreamReader(is); reader = new BufferedReader(isr); result = new ArrayList<String>(); while((line = reader.readLine()) != null) { result.add(line); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (reader != null) { reader.close(); } if (isr != null) { isr.close(); } if (is != null) { is.close(); } } catch (IOException e) { e.printStackTrace(); } } return result; }}
程式是沒有錯的,但是一開始啟動並執行時候總會提示無法讀取libgplcompression這個庫,其實我知道少了哪些庫的,分別是
libgplcompression.a
libgplcompression.la
libgplcompression.so
libgplcompression.so.0
libgplcompression.so.0.0.0
可問題是把這些庫放在哪裡。嘗試過把這幾個庫放在$CLASSPATH下面,但沒用。於是查看了錯誤提示,提示缺少的這個庫在com.hadoop.compression.lzo.GPLNativeCodeLoader這個類裡面被引用到,於是看了一下hadoop-lzo-0.45.jar的源檔案(當時編譯hadoop-lzo-0.45.jar時留下的源檔案,在kevinweil-hadoop-lzo-6bb1b7f/src/java/com/hadoop/compression/lzo/裡),GPLNativeCodeLoader.java的內容是這樣的:
package com.hadoop.compression.lzo;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;public class GPLNativeCodeLoader { private static final Log LOG = LogFactory.getLog(GPLNativeCodeLoader.class); private static boolean nativeLibraryLoaded = false; static { try { //try to load the lib System.loadLibrary("gplcompression"); nativeLibraryLoaded = true; LOG.info("Loaded native gpl library"); } catch (Throwable t) { LOG.error("Could not load native gpl library", t); nativeLibraryLoaded = false; } } /** * Are the native gpl libraries loaded? * @return true if loaded, otherwise false */ public static boolean isNativeCodeLoaded() { return nativeLibraryLoaded; }}
這裡跟load那個libgplcompression庫 有關的語句應該是try語句塊裡面的那個System.loadLibrary("gplcompression"); 於是我再查了一下這個loadLibrary的動作到底是怎樣的動作。於是在這篇blog裡找到瞭解答:http://blog.csdn.net/forandever/article/details/5983846
System.loadLibrary()load的是java.library.path這一jvm變數所指向的路徑中的庫。那我只要把那些libgplcompression庫 所在的檔案夾加入到java.library.path裡面不就行了~於是我尋找設定java.library.path的方法,
方法1:命令列
java -Djava.library.path=/path/to/libgplcompression/ ***.class
方法2:java語句
在程式裡加入這麼一句,System.setProperty("java.library.path", "/path/to/libgplcompression/");
就找了這麼2個方法,可是這2個方法都只能臨時改變java.library.path的值。
除了這2個方法,我找不到別的方法了,累死了,不再找了,索性把libgplcompression這些庫給copy到java.library.path指向的檔案夾裡。
copy完後,再次執行,OK了。
要擷取java.library.path的值,可以用java語句
System.out.println(System.getProperty("java.library.path")); 我的是
/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
參考資料:
http://guoyunsky.iteye.com/blog/1266226
http://blog.csdn.net/forandever/article/details/5983846
gpllibcompression庫以及hadoop-lzo-0.4.15.jar下載連結
http://pan.baidu.com/s/1mgJQ1tQ