Fdupe Perl scripting code to find duplicate files _ Application Tips

Source: Internet
Author: User
Tags stdin
Figure:



Copy Code code as follows:

#!/usr/bin/perl
#
# Fdupe tool-finding Duplicate files
#
# $Id: fdupe,v 1.7 2011/10/14 20:11:21 root EXP root $
#
# Source Code Copyright (c) 1998,2011 Bernhard Schneider.
# May is used only for non-commercial purposes with
# appropriate acknowledgement of copyright.
#
# File:fdupe
# Description:script finds duplicate Files.
# Author:bernhard Schneider <bernhard@neaptide.org>
# hints, crrections & ideas are welcome
#
# usage:fdupe.pl <path> <path> ...
# Find/-xdev | fdupe.pl
#
# How to select and remove duplicates:
# REDIRECT output to >file, edit the file and Mark lines
# wish to Move/delete with a preceding dash (-)
# Use following script to delete marked files:
# #!/usr/bin/perl-n
# chomp; unlink if s/^-//;
#
# History:
# 12.05.99-goto Statment replaced with next
# 14.05.99-minor Changes
# 18.05.99-removed confusing ' for $y '
# included Hash-search
# 20.05.99-minor Changes
# 02.03.00-some functions rewritten, optimized for speed
# 10.01.01-hint-fix by Ozzie |ozric at kyuzz.org|
# 05.03.02-fixed Hangups by reading Block/char-devices
# 08.09.11-skips checking of hard links
# 14.10.11-accept file names from stdin
#
#use Strict; # Uncomment for debugging

$|=1;
Local (*F1,*F2); My%farray = (); My $statF 1;

# ------------------------------
# Traverse Directories
Sub Scan ($) {
My ($dir) = $_[0];
Opendir (DIR, $dir) or Die "($dir) $!:$@";
Map {
(d)? Scan ($_): Push @{$farray {-S $_}},$_
Unless (-l or-s or-p or-c or-b);
The map "$dir/$_", grep!/^\.\.? $/, Readdir (DIR); Closedir (DIR);
}

# ------------------------------
# get chunk of bytes from a file
Sub GetChunk ($$) {
My ($fsize, $pfname) = @_;
my $chunksize = 32;
My ($nread, $buff);

return undef unless open (f1,$ $pfname);

$statF 1 = [(stat F1) [3,1]];
Binmode F1;
$nread = Read (F1, $buff, $chunksize);
($nread = = $chunksize | | | $nread = = $fsize)? "$buff": undef;
}

# ------------------------------
# compare two files
Sub Mycmp ($) {
My ($FPTR) = $_[0];
My ($buffa, $BUFFB);
My ($nread 1, $nread 2);
My $statF 2;
My ($buffsize) = 16*1024;

Return-1 unless (open (F2, "<$ $fptr"));

$statF 2 = [(stat F2) [3,1]];

return 0
if ($statF 2->[0] > 1 && $statF 1->[1] = = $statF 2->[1]);

Binmode F2;
Seek (f1,0,0);

do {$nread 1 = read (F1, $buffa, $buffsize);
$nread 2 = Read (F2, $BUFFB, $buffsize);

if ($nread 1!= $nread 2) | | ($buffa cmp $BUFFB)) {
return-1;
}
while ($nread 1);

return 0;
}

# ------------------------------

Print "Collecting files and sizes ... \ n";

if (-t STDIN) {
$ARGV [0] = '. ' Unless $ARGV [0]; # Use WD if no arguments given
Map Scan $_, @ARGV;
} else {
while (<STDIN>) {
s 癧 \r\n]$ saddle g;
Push @{$farray {s $_}},$_
Unless (-l or-s or-p or-c or-b);
}
}

Print "Now comparing ... \ n";
For my $fsize (reverse sort {$a <=> $b} keys%farray) {

My ($i, $fptr, $fref, $pnum,%dupes,%index, $chunk);

# Skip files with unique file size
Next if $#{$farray {$fsize}} = = 0;

$pnum = 0;
%dupes =%index = ();

Nx:
For (my $nx =0; $nx <=$#{$farray {$fsize}}; $nx + +) # $nx now 1..count of files
{# with the same size
$fptr = \ $farray {$fsize}[$nx]; # REF to the ' a '
$chunk = GetChunk $fsize, $fptr;
if ($pnum) {
For $i (@{$index {$chunk}}) {
$fref = ${$dupes {$i}}[0];
Unless (mycmp $fref) {
# found duplicate, collecting
Push @{$dupes {$i}}, $fptr;
Next NX;
}
}
}

# nothing found, collecting
Push @{$dupes {$pnum}}, $fptr;
Push @{$index {$chunk}}, $pnum + +;
}
# Show found dupes for actual size
For $i (keys%dupes) {
$#{$dupes {$i}} | | Next
print "\ n size: $fsize \ n \ nthe";
For (@{$dupes {$i}}) {
Print $$_, "\ n";
}
}
}

Close F1;
Close F2;

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.