The file name of the duplicate backup usually contains (1), (2), and so on, with Python implementation, by the way to detect the same file name under different directories, also deleted
#!/usr/bin/env python#Coding:utf-8"""__title__ = ' __author__ = ' hadoop ' __mtime__ = ' 2016/6/18 '"""#The program runs first, prints the information correctly, cancels the Os.remove (path) comment, and actually deletes the fileImportOS fromTimeImportClock as nowdefList_dir (RootDir): FileNum=0 Deletenum=0 Samename=0 Str= u'(' #Delete the file name contains (the files, this can be changed according to the actual needs of the desired name, is now based on the file name recognition, advanced algorithm is based on the image content to find similarDirs =[] Files=[]#Save file namedirs.append (RootDir) forRootinchdirs: forListsinchOs.listdir (Root): path=Os.path.join (root,lists)ifOs.path.isdir (path): Dirs.append (path)Else:#is a fileFileNum + = 1filename=os.path.basename (path)ifFilename.decode ('GBK')inchFiles:samename+ = 1Print 'Delete files with the same file name:', Path.decode ('GBK') Else: Files.append (Filename.decode ('GBK')) #os.remove (path) #print Filename.decode (' GBK ') #解码成gbk可以打印出中文名字, otherwise garbled #Print (Filename.decode (' GBK '). Find (str)) #find返回的是找到字符串的位置, not found return-1 ifFilename.decode ('GBK'). FIND (str)! =-1: Deletenum+ = 1Print 'To delete a file:', Path.decode ('GBK') #os.remove (path) Print 'The total number of files is:', FileNumPrint 'The total number of deleted duplicate files is:', DeletenumPrint 'The total number of deleted files is:', Samenameif __name__=='__main__': RootDir= Raw_input ("Enter the root directory where the files need to be processed:") Start=Now () List_dir (rootdir) End=Now ()Print 'Total time:', End-start,'seconds'
Delete a picture of a duplicate backup on your computer