#!/bin/bash
hadoop_home=/opt/hadoop-2.4.0
Tw_nginx_log_file=/home/chiline.com.all/access_com_tw.log
Cn_nginx_log_file=/home/chiline.com.all/access_com_cn.log
current_date=$ (Date +%y%m%d)
hdfs_url=hdfs://xx.xx.xx.xx:9100
Analyse_jar_path= $hadoop _home/ianc
echo "hadoop_home = $hadoop _home"
echo "tw_nginx_log_file = $TW _nginx_log_file"
echo "cn_nginx_log_file = $CN _nginx_log_file"
echo "Hdfs_url = $hdfs _url"
echo "Analyse_jar_path = $hadoop _home/ianc"
function Puttodaylogtohdfs () {
$hadoop _home/bin/hdfs dfs-rm-r $hdfs _url/user/day-$ (adddate $current _date 1)
$hadoop _home/bin/hdfs dfs-mkdir $hdfs _url/user/day-$ (adddate $current _date 1)
$hadoop _home/bin/hdfs dfs-mkdir $hdfs _url/user/day-$ (adddate $current _date 1)/tw-log
$hadoop _home/bin/hdfs dfs-mkdir $hdfs _url/user/day-$ (adddate $current _date 1)/cn-log
$hadoop _home/bin/hdfs dfs-put $tw _nginx_log_file $hdfs _url/user/day-$ (adddate $current _date 1)/tw-log
$hadoop _home/bin/hdfs dfs-put $cn _nginx_log_file $hdfs _url/user/day-$ (adddate $current _date 1)/cn-log
}
function Adddate () {
Str=$1
Days=$2
Yy= ' echo $str |cut-c 1-4 '
Mm= ' echo $str |cut-c 5-6 '
Dd= ' echo $str |cut-c 7-8 '
Sav_dd= $days
days= ' expr $days-$dd '
While [$days-ge 0]
Do
mm= ' Expr $mm-1 '
[$mm-eq 0] && mm=12 && yy= ' expr $yy-1 '
Aaa= ' cal $mm $yy '
Bbb= ' echo $aaa |awk ' {print $NF} '
days= ' expr $days-$bbb '
Done
Dd= ' expr 0-$days '
Expr $dd: "^.$" >/dev/null && dd=0$dd
Expr $mm: "^.$" >/dev/null && mm=0$mm
Echo $yy $MM$DD
return $yy $MMSDD
}
function Getlogfilename () {
Logfilename=$1
ifs= '/' arr= ($logFileName)
arr_length=${#arr [@]}
echo ${arr[$arr _length-1]}
return ${arr[$arr _length-1]}
}
function Removelastweeklog () {
remove_date=$ (adddate $current _date 7)
echo "Start remove history log file,remove_date is $remove _date"
$hadoop _home/bin/hdfs dfs-rm-r $hdfs _url/user/day-$remove _date
}
function Analysetodaylog () {
#tw_log_file_name =getlogfilename $TW _nginx_log_file
Tw_log_file_name= ' basename $tw _nginx_log_file '
#cn_log_file_name =getlogfilename $CN _nginx_log_file
Cn_log_file_name= ' basename $cn _nginx_log_file '
tw_log_file= $hdfs _url/user/day-$ (adddate $current _date 1)/tw-log/$TW _log_file_name
cn_log_file= $hdfs _url/user/day-$ (adddate $current _date 1)/cn-log/$CN _log_file_name
Analyse_path= $hdfs _url/user/day-$ (adddate $current _date 1)/analyse
#pv Analyse
$hadoop _home/bin/hadoop jar $analyse _jar_path/pvanalyse-1.0.jar $tw _log_file $analyse _PATH/TW-PV
$hadoop _home/bin/hadoop jar $analyse _jar_path/pvanalyse-1.0.jar $cn _log_file $analyse _PATH/CN-PV
#time Analyse
$hadoop _home/bin/hadoop jar $analyse _jar_path/timeanalyse-1.0.jar $tw _log_file $analyse _path/tw-time
$hadoop _home/bin/hadoop jar $analyse _jar_path/timeanalyse-1.0.jar $cn _log_file $analyse _path/cn-time
#area Analyse
$hadoop _home/bin/hadoop jar $analyse _jar_path/locationanalyse-1.0.jar $tw _log_file $analyse _path/tw-location
$hadoop _home/bin/hadoop jar $analyse _jar_path/locationanalyse-1.0.jar $cn _log_file $analyse _path/cn-location
}
echo "Start put local log to HDFs"
Puttodaylogtohdfs;
echo "Start analyse today Log"
Analysetodaylog;
echo "Remove last week log"
Removelastweeklog;
Nginx logs are written to HDFs on a daily schedule