#!/usr/bin/perl
#
# Fdupe tool-finding Duplicate files
#
# $Id: fdupe,v 1.7 2011/10/14 20:11:21 root EXP root $
#
# Source Code Copyright (c) 1998,2011 Bernhard Schneider.
# May is used only for non-commercial purposes with
# appropriate acknowledgement of copyright.
#
# File:fdupe
# Description:script finds duplicate Files.
# Author:bernhard Schneider <bernhard@neaptide.org>
# hints, crrections & ideas are welcome
#
# usage:fdupe.pl <path> <path> ...
# Find/-xdev | fdupe.pl
#
# How to select and remove duplicates:
# REDIRECT output to >file, edit the file and Mark lines
# wish to Move/delete with a preceding dash (-)
# Use following script to delete marked files:
# #!/usr/bin/perl-n
# chomp; unlink if s/^-//;
#
# History:
# 12.05.99-goto Statment replaced with next
# 14.05.99-minor Changes
# 18.05.99-removed confusing ' for $y '
# included Hash-search
# 20.05.99-minor Changes
# 02.03.00-some functions rewritten, optimized for speed
# 10.01.01-hint-fix by Ozzie |ozric at kyuzz.org|
# 05.03.02-fixed Hangups by reading Block/char-devices
# 08.09.11-skips checking of hard links
# 14.10.11-accept file names from stdin
#
#use Strict; # Uncomment for debugging
$|=1;
Local (*F1,*F2); My%farray = (); My $statF 1;
# ------------------------------
# Traverse Directories
Sub Scan ($) {
My ($dir) = $_[0];
Opendir (DIR, $dir) or Die "($dir) $!:$@";
Map {
(d)? Scan ($_): Push @{$farray {-S $_}},$_
Unless (-l or-s or-p or-c or-b);
The map "$dir/$_", grep!/^\.\.? $/, Readdir (DIR); Closedir (DIR);
}
# ------------------------------
# get chunk of bytes from a file
Sub GetChunk ($$) {
My ($fsize, $pfname) = @_;
my $chunksize = 32;
My ($nread, $buff);
return undef unless open (f1,$ $pfname);
$statF 1 = [(stat F1) [3,1]];
Binmode F1;
$nread = Read (F1, $buff, $chunksize);
($nread = = $chunksize | | | $nread = = $fsize)? "$buff": undef;
}
# ------------------------------
# compare two files
Sub Mycmp ($) {
My ($FPTR) = $_[0];
My ($buffa, $BUFFB);
My ($nread 1, $nread 2);
My $statF 2;
My ($buffsize) = 16*1024;
Return-1 unless (open (F2, "<$ $fptr"));
$statF 2 = [(stat F2) [3,1]];
return 0
if ($statF 2->[0] > 1 && $statF 1->[1] = = $statF 2->[1]);
Binmode F2;
Seek (f1,0,0);
do {$nread 1 = read (F1, $buffa, $buffsize);
$nread 2 = Read (F2, $BUFFB, $buffsize);
if ($nread 1!= $nread 2) | | ($buffa cmp $BUFFB)) {
return-1;
}
while ($nread 1);
return 0;
}
# ------------------------------
Print "Collecting files and sizes ... \ n";
if (-t STDIN) {
$ARGV [0] = '. ' Unless $ARGV [0]; # Use WD if no arguments given
Map Scan $_, @ARGV;
} else {
while (<STDIN>) {
s 癧 \r\n]$ saddle g;
Push @{$farray {s $_}},$_
Unless (-l or-s or-p or-c or-b);
}
}
Print "Now comparing ... \ n";
For my $fsize (reverse sort {$a <=> $b} keys%farray) {
My ($i, $fptr, $fref, $pnum,%dupes,%index, $chunk);
# Skip files with unique file size
Next if $#{$farray {$fsize}} = = 0;
$pnum = 0;
%dupes =%index = ();
Nx:
For (my $nx =0; $nx <=$#{$farray {$fsize}}; $nx + +) # $nx now 1..count of files
{# with the same size
$fptr = \ $farray {$fsize}[$nx]; # REF to the ' a '
$chunk = GetChunk $fsize, $fptr;
if ($pnum) {
For $i (@{$index {$chunk}}) {
$fref = ${$dupes {$i}}[0];
Unless (mycmp $fref) {
# found duplicate, collecting
Push @{$dupes {$i}}, $fptr;
Next NX;
}
}
}
# nothing found, collecting
Push @{$dupes {$pnum}}, $fptr;
Push @{$index {$chunk}}, $pnum + +;
}
# Show found dupes for actual size
For $i (keys%dupes) {
$#{$dupes {$i}} | | Next
print "\ n size: $fsize \ n \ nthe";
For (@{$dupes {$i}}) {
Print $$_, "\ n";
}
}
}
Close F1;
Close F2;