Python finds methods that specify a file with the same content

Source: Internet
Author: User
Tags dot net
This example describes Python finding a method that specifies a file with the same content. Share to everyone for your reference. Specific as follows:

Python code to find files that specify the same content, you can specify multiple directories at the same time
Call mode: Python doublesdetector.py c: \;d \;e:\ > Doubles.txt

# Hello, this script was written in python-http://www.python.org# doublesdetector.py 1.0pimport os, Os.path, String, sys, Shamessage = "" "doublesdetector.py 1.0pThis script would search for files that is identical (whatever their name/date/time ). Syntax:python%s
 
  where
  
   is a directory or a list of directories separated by a semicolon (;) Examples:python%s c:\windows python%s c \;d : \;e:\ > Doubles.txt python%s c:\program Files > Doubles.txtthis script is public domain. Feel free to reuse and tweak it. The author of this script Sebastien Sauvage
   
    
http://sebsauvage.net/python/"" "% ((sys.argv[0)) def Filesha (filepath):" "" Compute SHA (Secure Hash algorythm)    of a file. Input:filepath:full path and name of file (eg.             ' C:\windows\emm386.exe ') Output:string:contains the hexadecimal representation of the the file.  Returns ' 0 ' if file could not is read (file not found, no read rights ...) "" "Try:file = open (filepath, ' RB ') Digest = sha.new () data = File.read (65536) while Len (data)! = 0:dig Est.update (data) data = File.read (65536) file.close () Except:return ' 0 ' else:return digest.hexdigest () de F Detectdoubles (directories): Fileslist = {} # Group all files by size (in the fileslist dictionnary) for directory I n directories.split (';'): directory = Os.path.abspath (directory) sys.stderr.write (' Scanning directory ' +directory+ '.    ..') Os.path.walk (directory,callback,fileslist) sys.stderr.write (' \ n ') sys.stderr.write (' Comparing files ... ') # RemoveKeys (filesize) in the dictionnary which has only 1 file for (filesize,listoffiles) in Fileslist.items (): If Len (list  offiles) = = 1:del Fileslist[filesize] # now compute sha of files that has the same size, # and group files by SHA    (In the Filessha dictionnary) Filessha = {} while Len (Fileslist) >0: (filesize,listoffiles) = Fileslist.popitem ()      For filepath in Listoffiles:sys.stderr.write ('. ') SHA = Filesha (filepath) if Filessha.has_key (SHA): Filessha[sha].append (filepath) else:filessha[sh A] = [filepath] if Filessha.has_key (' 0 '): Del filessha[' 0 '] # Remove keys (SHA) in the dictionnary which has only 1 File for (sha,listoffiles) in Filessha.items (): If Len (listoffiles) = = 1:del Filessha[sha] sys.stderr.write (' \ n  ') return Filesshadef callback (fileslist,directory,files): Sys.stderr.write ('. ') For fileName in Files:filepath = Os.path.join (directory,filename) if Os.path.isfile (filepath): filesize = OS. stat (filepath) [6] if Fileslist.has_key (filesize): Fileslist[filesize].append (filepath) else:file  Slist[filesize] = [filepath]if len (sys.argv) >1:doubles = Detectdoubles ("". Join (sys.argv[1:])) print ' The following Files are identical: ' print ' \ n '. Join (["----\n%s"% ' \ n '. Join (Doubles[filesha]) for Filesha in Doubles.keys ()]) print ' ----' Else:print message
   
  
 

Hopefully this article will help you with Python programming.

  • Contact Us

    The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

    If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

    A Free Trial That Lets You Build Big!

    Start building with 50+ products and up to 12 months usage for Elastic Compute Service

    • Sales Support

      1 on 1 presale consultation

    • After-Sales Support

      24/7 Technical Support 6 Free Tickets per Quarter Faster Response

    • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.