This example describes Python finding a method that specifies a file with the same content. Share to everyone for your reference. Specific as follows:
Python code to find files that specify the same content, you can specify multiple directories at the same time
Call mode: Python doublesdetector.py c: \;d \;e:\ > Doubles.txt
# Hello, this script was written in python-http://www.python.org# doublesdetector.py 1.0pimport os, Os.path, String, sys, Shamessage = "" "doublesdetector.py 1.0pThis script would search for files that is identical (whatever their name/date/time ). Syntax:python%s
where
is a directory or a list of directories separated by a semicolon (;) Examples:python%s c:\windows python%s c \;d : \;e:\ > Doubles.txt python%s c:\program Files > Doubles.txtthis script is public domain. Feel free to reuse and tweak it. The author of this script Sebastien Sauvage
http://sebsauvage.net/python/"" "% ((sys.argv[0)) def Filesha (filepath):" "" Compute SHA (Secure Hash algorythm) of a file. Input:filepath:full path and name of file (eg. ' C:\windows\emm386.exe ') Output:string:contains the hexadecimal representation of the the file. Returns ' 0 ' if file could not is read (file not found, no read rights ...) "" "Try:file = open (filepath, ' RB ') Digest = sha.new () data = File.read (65536) while Len (data)! = 0:dig Est.update (data) data = File.read (65536) file.close () Except:return ' 0 ' else:return digest.hexdigest () de F Detectdoubles (directories): Fileslist = {} # Group all files by size (in the fileslist dictionnary) for directory I n directories.split (';'): directory = Os.path.abspath (directory) sys.stderr.write (' Scanning directory ' +directory+ '. ..') Os.path.walk (directory,callback,fileslist) sys.stderr.write (' \ n ') sys.stderr.write (' Comparing files ... ') # RemoveKeys (filesize) in the dictionnary which has only 1 file for (filesize,listoffiles) in Fileslist.items (): If Len (list offiles) = = 1:del Fileslist[filesize] # now compute sha of files that has the same size, # and group files by SHA (In the Filessha dictionnary) Filessha = {} while Len (Fileslist) >0: (filesize,listoffiles) = Fileslist.popitem () For filepath in Listoffiles:sys.stderr.write ('. ') SHA = Filesha (filepath) if Filessha.has_key (SHA): Filessha[sha].append (filepath) else:filessha[sh A] = [filepath] if Filessha.has_key (' 0 '): Del filessha[' 0 '] # Remove keys (SHA) in the dictionnary which has only 1 File for (sha,listoffiles) in Filessha.items (): If Len (listoffiles) = = 1:del Filessha[sha] sys.stderr.write (' \ n ') return Filesshadef callback (fileslist,directory,files): Sys.stderr.write ('. ') For fileName in Files:filepath = Os.path.join (directory,filename) if Os.path.isfile (filepath): filesize = OS. stat (filepath) [6] if Fileslist.has_key (filesize): Fileslist[filesize].append (filepath) else:file Slist[filesize] = [filepath]if len (sys.argv) >1:doubles = Detectdoubles ("". Join (sys.argv[1:])) print ' The following Files are identical: ' print ' \ n '. Join (["----\n%s"% ' \ n '. Join (Doubles[filesha]) for Filesha in Doubles.keys ()]) print ' ----' Else:print message
Hopefully this article will help you with Python programming.