Figure:
Copy codeThe Code is as follows :#! /Usr/bin/perl
#
# Fdupe tool-finding duplicate files
#
# $ Id: fdupe, v 1.7 20:11:21 root Exp root $
#
# Source code Copyright (c) 1998,2011 Bernhard Schneider.
# May be used only for non-commercial cial purposes
# Appropriate acknowledgement of copyright.
#
# FILE: fdupe
# DESCRIPTION: script finds duplicate Files.
# AUTHOR: Bernhard Schneider <bernhard@neaptide.org>
# Hints, crrections & ideas are welcome
#
# Usage: fdupe. pl <path>...
# Find/-xdev | fdupe. pl
#
# How to select and remove duplicates:
# Redirect output to> file, edit the file and mark lines you
# Wish to move/delete with a preceding dash (-)
# Use following script to delete marked files:
##! /Usr/bin/perl-n
# Chomp; unlink if s/^ -//;
#
# History:
#12.05.99-goto statment replaced with next
#14.05.99-minor changes
#18.05.99-removed confusing 'for $ y'
# Sorted ded hash-search
#2018.5.99-minor changes
#02.03.00-some functions rewritten, optimized for speed
#10.01.01-hint-fix by Ozzie | ozric at kyuzz.org |
#05.03.02-fixed hangups by reading block/char-Devices
#08.09.11-skips checking of hard links
#14.10.11-accept file names from stdin
#
# Use strict; # uncomment for debugging
$ | = 1;
Local (* F1, * F2); my % farray = (); my $ statF1;
#------------------------------
# Traverse directories
Sub scan ($ ){
My ($ dir) = $ _ [0];
Opendir (DIR, $ dir) or die "($ dir) $! : $ @";
Map {
(-D )? Scan ($ _): push @ {$ farray {-s $ _}, $ _
Unless (-l or-S or-p or-c or-B );
} Map "$ dir/$ _", grep! /^ \.\.? $/, Readdir (DIR); closedir (DIR );
}
#------------------------------
# Get chunk of bytes from a file
Sub getchunk ($ ){
My ($ fsize, $ pfname) = @_;
My $ chunksize = 32;
My ($ nread, $ buff );
Return undef unless open (F1, $ pfname );
$ StatF1 = [(stat F1) [3, 1];
Binmode F1;
$ Nread = read (F1, $ buff, $ chunksize );
($ Nread = $ chunksize | $ nread = $ fsize )? "$ Buff": undef;
}
#------------------------------
# Compare two files
Sub mycmp ($ ){
My ($ fptr) = $ _ [0];
My ($ buffa, $ buffb );
My ($ nread1, $ nread2 );
My $ statF2;
My ($ buffsize) = 16*1024;
Return-1 unless (open (F2, "<$ fptr "));
$ StatF2 = [(stat F2) [3, 1];
Return 0
If ($ statF2-> [0]> 1 & $ statF1-> [1] ==$ statF2-> [1]);
Binmode F2;
Seek (F1, 0, 0 );
Do {$ nread1 = read (F1, $ buffa, $ buffsize );
$ Nread2 = read (F2, $ buffb, $ buffsize );
If ($ nread1! = $ Nread2) | ($ buffa cmp $ buffb )){
Return-1;
}
} While ($ nread1 );
Return 0;
}
#------------------------------
Print "collecting files and sizes... \ n ";
If (-t STDIN ){
$ ARGV [0] = '.' unless $ ARGV [0]; # use wd if no arguments given
Map scan $ _, @ ARGV;
} Else {
While (<STDIN> ){
S branch \ r \ n] $ saddle g;
Push @ {$ farray {-s $ _}, $ _
Unless (-l or-S or-p or-c or-B );
}
}
Print "now comparing... \ n ";
For my $ fsize (reverse sort {$ a <=> $ B} keys % farray ){
My ($ I, $ fptr, $ fref, $ pnum, % dupes, % index, $ chunk );
# Skip files with unique file size
Next if $ # {$ farray {$ fsize }== 0;
$ Pnum = 0;
% Dupes = % index = ();
Nx:
For (my $ nx = 0; $ nx <=$ #{$ farray {$ fsize }}; $ nx ++) # $ nx now 1. count of files
{# With the same size
$ Fptr =\$ farray {$ fsize} [$ nx]; # ref to the first file
$ Chunk = getchunk $ fsize, $ fptr;
If ($ pnum ){
For $ I (@ {$ index {$ chunk }}){
$ Fref =$ {$ dupes {$ I} [0];
Unless (mycmp $ fref ){
# Found duplicate, collecting
Push @ {$ dupes {$ I}, $ fptr;
Next nx;
}
}
}
# Nothing found, collecting
Push @ {$ dupes {$ pnum}, $ fptr;
Push @ {$ index {$ chunk}, $ pnum ++;
}
# Show found dupes for actual size
For $ I (keys % dupes ){
$ # {$ Dupes {$ I }}| | next;
Print "\ n size: $ fsize \ n ";
For (@ {$ dupes {$ I }}){
Print $ _, "\ n ";
}
}
}
Close F1;
Close F2;