#! /bin/sh
############################
#split Today and yesterday
For I in $ (seq 10)
Do
echo "" >>/u1/hadoop-stat/stat.log
Done
echo "begin[" ' Date ' +%y-%m-%d "-D"-1 Days "'"] ">>/u1/hadoop-stat/stat.log
############################
#remove file
function Removefilepathnotcurrentmonth () {
Month= ' Date ' +%y-%m "-D"-1 days "'
For file in ' ls $ '
Do
If ["$month"! = "$file"]; Then
RM-RF "/" $file
Fi
Done
}
Gylog_path= "/u1/hadoop-stat/gylog"
Nginxlog_path= "/u1/hadoop-stat/nginxlog"
echo "Begin remove Gylogpath ' s files not at current month" >>/u1/hadoop-stat/stat.log
Removefilepathnotcurrentmonth $GYLOG _path
echo "Begin remove Nginxlogpath ' s files not at current month" >>/u1/hadoop-stat/stat.log
Removefilepathnotcurrentmonth $NGINXLOG _path
############################
#scp file between hosts
day= ' Date ' +%y-%m-%d "-D"-1 days "'
Month= ' Date ' +%y-%m "-D"-1 days "'
Gyfilename= "gylog-" $day ". Log"
Gyfilepath= $GYLOG _path "/" $month
if [!-D "$gyfilepath"]; Then
mkdir "$gyfilepath"
Fi
if [!-F "$gyfilepath/$gyfilename"]; Then
echo "Begin SCP Gylog" >>/u1/hadoop-stat/stat.log
SCP gy02:/u1/logs/gylog/$gyfilename $gyfilepath/
Fi
Nginxfilename= "ngxinlog-" $day ". Log"
Nginxfilepath= $NGINXLOG _path "/" $month
if [!-D "$nginxfilepath"]; Then
mkdir "$nginxfilepath"
Fi
if [!-F "$nginxfilepath/$nginxfilename"]; Then
echo "Begin SCP Nginxlog" >>/u1/hadoop-stat/stat.log
SCP Gy01:/u1/logs/lbnginx/gy_access.log.1 $nginxfilepath/
MV $nginxfilepath/gy_access.log.1 $nginxfilepath/$nginxfilename
Fi
###########################
#copy file to Hadoop
Gylog_hadoop_path= "/logs/gylog"
Nginxlog_hadoop_path= "/logs/nginxlog"
monthhadoop= ' Date ' +%y-%m-%d "-D"-1 days "'
Gyhadoopfilepath= $GYLOG _hadoop_path "/" $monthhadoop
gyhadoopfilepathinput= $gyhadoopfilepath "/input"
gyhadoopfilepathoutput= $gyhadoopfilepath "/output"
/u1/hadoop-1.0.1/bin/hadoop dfs-test-e $gyhadoopfilepath
If [$?-ne 0]; Then
echo "Begin mkdir Gyhadoopfilepath in Hadoop because of not exist:" $gyhadoopfilepath >>/u1/hadoop-stat/stat.log
/u1/hadoop-1.0.1/bin/hadoop Dfs-mkdir $gyhadoopfilepath
/u1/hadoop-1.0.1/bin/hadoop Dfs-mkdir $gyhadoopfilepathinput
/u1/hadoop-1.0.1/bin/hadoop Dfs-mkdir $gyhadoopfilepathoutput
Fi
/u1/hadoop-1.0.1/bin/hadoop dfs-test-e $gyhadoopfilepathinput/$gyfilename
If [$?-ne 0]; Then
echo "Begin copy Gyhadoopfile to Hadoop" >>/u1/hadoop-stat/stat.log
/u1/hadoop-1.0.1/bin/hadoop dfs-copyfromlocal $gyfilepath/$gyfilename $gyhadoopfilepathinput/
Fi
Nginxhadoopfilepath= $NGINXLOG _hadoop_path "/" $monthhadoop
nginxhadoopfilepathinput= $nginxhadoopfilepath "/input"
nginxhadoopfilepathoutput= $nginxhadoopfilepath "/output"
/u1/hadoop-1.0.1/bin/hadoop dfs-test-e $nginxhadoopfilepath
If [$?-ne 0]; Then
echo "Begin mkdir Nginxhadoopfilepath in Hadoop because of not exist:" $nginxhadoopfilepath >>/u1/hadoop-stat/stat. Log
/u1/hadoop-1.0.1/bin/hadoop Dfs-mkdir $nginxhadoopfilepath
/u1/hadoop-1.0.1/bin/hadoop Dfs-mkdir $nginxhadoopfilepathinput
/u1/hadoop-1.0.1/bin/hadoop Dfs-mkdir $nginxhadoopfilepathoutput
Fi
/u1/hadoop-1.0.1/bin/hadoop dfs-test-e $nginxhadoopfilepathinput/$nginxfilename
If [$?-ne 0]; Then
echo "Begin copy Nginxhadoopfile to Hadoop" >>/u1/hadoop-stat/stat.log
/u1/hadoop-1.0.1/bin/hadoop dfs-copyfromlocal $nginxfilepath/$nginxfilename $nginxhadoopfilepathinput/
Fi
##########################
#begin Hadoop Stat
#echo "Begin Hadoop stat Requesttimecount" >>/u1/hadoop-stat/stat.log
#/u1/hadoop-1.0.1/bin/hadoop Jar/u1/hadoop-stat/stat.jar Gy.log.mr.requestTime.RequestTimeCount $day
#echo "Begin Hadoop stat RequestCount" >>/u1/hadoop-stat/stat.log
#/u1/hadoop-1.0.1/bin/hadoop Jar/u1/hadoop-stat/stat.jar Gy.log.mr.request.RequestCount $day
echo "Begin Hadoop stat Nginxcount" >>/u1/hadoop-stat/stat.log
/u1/hadoop-1.0.1/bin/hadoop Jar/u1/hadoop-stat/stat.jar Gy.log.mr.nginx.NginxCount $day
echo "Begin Hadoop stat Gylogcount" >>/u1/hadoop-stat/stat.log
/u1/hadoop-1.0.1/bin/hadoop Jar/u1/hadoop-stat/stat.jar Gy.log.mr.gylog.GylogCount $day
##########################
#end for all
echo "end[" ' Date ' +%y-%m-%d "-D"-1 Days "'"] ">>/u1/hadoop-stat/stat.log
Note:
/u1/hadoop-stat/stat.jar Gy.log.mr.request.RequestCount
/u1/hadoop-stat/stat.jar Gy.log.mr.nginx.NginxCount
/u1/hadoop-stat/stat.jar Gy.log.mr.gylog.GylogCount
The above Mr is a custom statistical rule that can be developed according to your own needs.
Others are mostly using Hadoop's basic commands, and it's easy to understand what Hadoop is all about.
This article is from the "Zhangdh Open Space" blog, so be sure to keep this source http://linuxblind.blog.51cto.com/7616603/1711936
A case of Hadoop MR statistical analysis log Script