"" "This module contains the code from
The I-Am Python by Allen B. Downey
Http://thinkpython.com
Copyright Downey Allen B.
License:gnu GPLv3 http://www.gnu.org/licenses/gpl.html
"""
Import OS
def walk (dirname):
"" "finds the names of all files in DirName and its subdirectories.
dirname:string Name of directory
"""
names = []
For name in Os.listdir (dirname):
Path = Os.path.join (dirname, name)
If Os.path.isfile (path):
Names.append (PATH)
Else
Names.extend (Walk (Path))
return names
def compute_checksum (filename):
"" "computes the MD5 checksum of the contents of a file.
Filename:string
"""
cmd = ' md5sum ' + filename
return pipe (CMD)
def check_diff (name1, name2):
"" "computes the difference between the contents of two files.
Name1, name2:string filenames
"""
cmd = ' diff%s%s '% (name1, name2)
return pipe (CMD)
def pipe (CMD):
"" "runs a command in a subprocess.
Cmd:string Unix Command
Returns (res, stat), the output of the subprocess and the exit status.
"""
fp = Os.popen (cmd)
res = Fp.read ()
Stat = Fp.close ()
Assert stat is None
return RES, stat
def compute_checksums (dirname, suffix):
"" "Computes checksums for all files with the given suffix.
dirname:string name of directory to search
suffix:string suffix to match
Returns:map from checksum to list of files and that checksum
"""
Names = Walk (dirname)
D = {}
For name in Names:
If Name.endswith (suffix):
RES, stat = compute_checksum (name)
Checksum, _ = Res.split ()
If checksum in D:
D[checksum].append (name)
Else
D[checksum] = [name]
Return D
def check_pairs (names):
"" "Checks whether any in a list of files differs from the others.
Names:list of string filenames
"""
For name1 in Names:
For name2 in Names:
If name1 < name2:
RES, stat = Check_diff (name1, name2)
If Res:
Return False
Return True
def print_duplicates (d):
"" "Checks for duplicate files.
Reports any files with the same checksum and checks whether they
Are, in fact, identical.
D:map from checksum to list of files and that checksum
"""
For key, names in D.iteritems ():
If Len (names) > 1:
print ' The following files have the same checksum: '
For name in Names:
Print Name
If Check_pairs (names):
print ' and they are identical. '
if __name__ = = ' __main__ ':
D = compute_checksums (dirname= '. ', suffix= '. Py ')
Print_duplicates (d)