#! /usr/bin/perl-w
$path = '/root/documents '; # Current Working directory
$dir = "$path/images"; # The Directory to traverse
$log _file = "$path/access_201209.log"; # nginx Log 0903~0907, filesize:5.4g
$result _file = ' result.f '; # file where results are placed
if (!open $output, ">> $result _file") {# Opens the file as an append
Die "Open file failed: $!";
}
&find_dir ($dir);
Sub Find_dir () {
my $base _dir = $_[0]; # $_[0] represents the first parameter of a subroutine (function)
if (!opendir (DIR, $base _dir)) {
warn "Open dir failed: $! \ n ";
}
My @father_dir = Readdir (dir); # resource dumps
Closedir (DIR);
$base _dir =~ s/\/$//; # Delete the back of the directory/
foreach $sub _dir (@father_dir) {
if ($sub _dir =~/^\./) {# filter out. and. and hidden files
Next
}
if (-D "$base _dir/$sub _dir") {# If the directory is the callback
&find_dir ("$base _dir/$sub _dir"); # Reference recursive functions to avoid opening multiple copies in memory
}elsif (f "$base _dir/$sub _dir") {# If it is a file ....
# a space is reserved in front of the file to ensure that no directories such as theme_skin/blue/images are searched
My $this _file = "$base _dir/$sub _dir";
$this _file =~ s/$path//; # Delete the string/root/documents
# uses Linux commands, Find [$this _file string] in [$log _file file] and count the number of times the string appears
my $result = ' grep-c ' $this _file ' $log _file '; # $this _ File should be enclosed in double quotes to prevent a program error from having spaces in the picture name
chomp $result # After removing the Linux execution command, the return value has a newline character
print $output "$this _file: $result \ n # to write processing results to $output specified file
# files that have been recorded are deleted so that each time you terminate the script, you can continue to find the previous content.
Unlink "$base _dir/$sub _dir";
}
}
}
print "\ finished \ n";
# now open the Result.f file and replace the/images/with images/to delete the file in the current working directory
# Linux commands look for records that are accessed 0 times within 5 days and delete
# The number 0 on both sides of the space to prevent finding the file name contains 0 of the record
# method 1:
# gawk-f ': ' $ ~/0/{print $} ' result.f | Xargs RM-RF
# Method 2: (Completely equal to Method 1)
# grep ' 0 ' result.f | Gawk-f ': ' {print $} ' | Xargs RM-RF