Python deletes duplicate files with source code,
Don't talk about anything. Go to the source code directly.
#! /Usr/bin/env python # coding = utf-8import osimport md5import timedef getmd5 (filename): ''' parameter: file name returned: file MD5 code ''' file = open (filename, 'rb') file_content = file. read (1024*1024) file. close () m = md5.new (file_content) return m. hexdigest () def delfile (flist_temp): ''' parameter: file name list returned: list of file names to delete '''dellist = [] flist = [] fsize = [] for f in flist_temp: if OS. path. isfile (f): fsize. append (OS. stat (f ). st_size) flist. append (f) for I in range (len (fsize): for j in range (I + 1, len (fsize )): if fsize [I] = fsize [j]: if getmd5 (flist [I]) = getmd5 (flist [j]): dellist. append (flist [I]) break return dellistdef main (): ''' Delete duplicate files in the current directory (including duplicate files in subfolders) '''print': deletes duplicate files in the current directory (including duplicate files in subfolders) \ n' start = time. clock () path = OS. getcwd () list_fn = [] for I in OS. walk (path): for fn in I [-1]: full_path = OS. path. join (I [0], fn) list_fn.append (full_path) list_fn.reverse () print 'total number of files: \ t', len (list_fn),' \ n' delf = delfile (list_fn) for f in delf: print 'delete \ t', f # OS. remove (f) end = time. clock () print '\ n total number of files: \ t', len (list_fn),' \ n' print 'delete file: \ t', len (delf ), '\ n' print' total time: \ t', end-start, '\ n' # OS. remove ('delrefile. py') time. sleep (30) return 0if _ name _ = '_ main _': main ()View Code
Result:
Thank you!
Reference: Delete the same file under a directory-> Step-by-Step Optimization (python implementation) http://www.cnblogs.com/ma6174/archive/2012/05/05/2484415.html