The example in this article tells the Python lookup method that specifies the same content file. Share to everyone for your reference. Specifically as follows:
Python code is used to find files that specify the same content, and you can specify multiple directories at the same time
Call mode: Python doublesdetector.py c:;d:; E: > Doubles.txt
?
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26-27--28 29---30 31--32 33 34 35 36 37 38-39 40 41 42 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 5 |
# Hello, this script are written in python-http://www.python.org # doublesdetector.py 1.0p import OS, Os.path, string, s ys, SHA message = "" "doublesdetector.py 1.0p this script would search for the files that are identical (whatever their name/da Te/time). Syntax:python%s <directories> where <directories> is a directory or a list of directories separated by a SE Micolon (;) Examples:python%s c:windows python%s c:;d:; E: > Doubles.txt python%s c:program files > doubles.txt this script is public domain. Feel free to reuse and tweak it. The author of this script Sebastien Sauvage <sebsauvage at sebsauvage dot net> http://sebsauvage.net/python/"" "( (Sys.argv[0],) *4 def Filesha (filepath): "" "Compute SHA (Secure Hash algorythm) of a file. Input:filepath:full path and name of file (eg. ' C:windowsemm386.exe ') Output:string:contains the hexadecimal representation of the SHA of the file. Returns ' 0 ' if file could not is read (file not found, no read RIghts ...) "" "Try:file = open (filepath, ' RB ') Digest = sha.new () data = File.read (65536) while Len (data)!= 0:digest.update (data) d ATA = File.read (65536) file.close () Except:return ' 0 ' Else:return digest.hexdigest () def detectdoubles (directories): F Ileslist = {} # Group all files by size (in the fileslist dictionnary) for directory in Directories.split (';'): Directory = Os.path.abspath (directory) sys.stderr.write (' Scanning directory ' +directory+ ' ... ') os.path.walk (directory, callback,fileslist) Sys.stderr.write (' n ') sys.stderr.write (' Comparing files ... ') # Remove keys (filesize) in the Dictionnary which have only 1 file for (filesize,listoffiles) in Fileslist.items (): If Len (listoffiles) = = 1:del Fileslis T[filesize] # Now compute SHA of files that have the same size, # and group files by SHA (in the Filessha dictionnary) fil Essha = {} while Len (Fileslist) >0: (filesize,listoffiles) = Fileslist.popitem () to FilePath in Listoffiles:sys.stderr . write ('. ') Sha = Filesha (filepath) IF Filessha.has_key (SHA): Filessha[sha].append (filepath) Else:filessha[sha] = [filepath] if Filessha.has_key (' 0 '): Del filessha[' 0] # Remove keys (SHA) in the dictionnary which have only 1 file for (sha,listoffiles) in Filessha.items (): If Len (listoffiles) = = 1:del Filessha[sha] Sys.stderr.write (' n ') return Filessha def callback (Fileslist,directory,files): Sys.stderr.write ('. ') for fileName in Files:filepath = Os.path.join (directory,filename) if Os.path.isfile (filepath): FileSize = Os.stat (filepath) [6] if Fileslist.has_key (filesize): Fileslist[filesize].append (filepath) else:fileslist[ FileSize] = [filepath] If Len (sys.argv) >1:doubles = Detectdoubles ("". Join (sys.argv[1:)) print ' following files are identical: ' print ' n '. Join (["----n%s"% ' n '. Join (Doubles[filesha]) for Filesha in Doubles.keys ()]) print '----' Else: Print message |
I hope this article will help you with your Python programming.