配置Pig環境變數使它滿足MapReduce模式
vi .bashrc
PATH=$PATH:$HOME/bin:/usr/lib/jvm/java-6-sun-1.6.0.24/bin:/home/yangze/soft/pig-0.9.2/bin:/home/yangze/soft/hadoop-0.20.2/bin
export PATH
export JAVA_HOME=/usr/lib/jvm/java-6-sun-1.6.0.24
export PIG_CLASSPATH=/home/yangze/soft/pig-0.9.2/conf
export HADOOP_CONF_DIR=/home/yangze/soft/hadoop-0.20.2/conf
啟動pig 本地模式是 pig -x local
把檔案匯入到hdfs
copyFromLocal /home/yangze/newdisk/study/class08/access_log.txt accessg
--將log檔案load進表log
grunt> log = load '/user/yangze/access/access_log'
>> using PigStorage(' ')
>> as (ip,a1,a3,a4,a5,a6,a7,a8);
--對a進行過濾只保留ip欄位
grunt> b = foreach a generate ip;
--按ip做group by
grunt> c = group b by ip;
--按ip對c進行統計
grunt> d = foreach c generategroup,COUNT($1);
--顯示結果:
grunt> dump d;
Output(s):
Successfully stored 476 records (14039 bytes) in: "hdfs://master:9000/tmp/temp-912504264/tmp675085582"
Counters:
Total records written : 476
Total bytes written : 14039
Spillable Memory Manager spill count : 0
Total bags proactively spilled: 0
Total records proactively spilled: 0
Job DAG:
job_201304161138_0004
2013-04-16 13:55:28,963 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success!
2013-04-16 13:55:28,970 [main] INFO org.apache.hadoop.mapreduce.lib.input.FileInputFormat - Total input paths to process : 1
2013-04-16 13:55:28,971 [main] INFO org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil - Total input paths to process : 1
(127.0.0.1,2)
(1.59.65.67,2)
(112.4.2.19,9)
(112.4.2.51,80)
(60.2.99.33,42)
(69.28.58.5,1)
(69.28.58.6,9)
(69.28.58.8,5)
(1.193.3.227,3)
(1.202.221.3,6)
(117.136.9.4,6)
(121.31.62.3,26)
(182.204.8.4,59)
(183.9.112.2,25)
(221.12.37.6,25)
(223.4.16.88,2)
(27.9.110.75,122)
(61.189.63.2,24)
(69.28.58.12,3)
(111.161.72.7,1)
(117.136.12.6,61)
(117.136.19.9,4)
(117.136.2.98,1)
(117.136.20.3,1)
(117.136.20.9,1)
(117.136.3.46,5)
(117.136.4.18,5)
(117.136.4.19,1)
(117.136.5.39,9)
(117.136.5.70,1)
(117.136.5.73,17)
(117.136.7.67,5)
(117.136.8.11,32)
(117.136.8.48,1)
(117.136.8.52,1)
(117.136.8.97,2)
(117.136.9.52,2)
(117.136.9.68,7)
(117.24.22.57,2)
(121.28.95.48,1597)
....
grunt>