#! /Bin/bash
# Name: remove_one.sh
# Purpose: Find and delete duplicate files. Only one sample is retained for each file.
# Sort and output files by size
Ls-lS | awk 'in in {
# Obtain the total number of the first row and discard it. Read the next row.
Getline;
Name1 = $9; size = $5;
}
{
Name2 = $9;
If (size = $5)
# Files of the same size may have the same content
{
# Md5 checksum
("Md5sum" name1) | getline; csum1 = $1;
("Md5sum" name2) | getline; csum2 = $1;
# If the checksum is the same, it is the same text set, and the output name
If (csum1 = csum2)
{
{Print name1; print name2}
}
};
Size = $5; name1 = name2;
} '| Sort-u> duplicate_files
# Calculate the md5sum of the duplicate file and write a sample from the duplicate file to duplicate_sample.
Cat duplicate_files | xargs-I {} md5sum {} | sort | uniq-w 32 | awk '{print $2}' | sort-u> duplicate_sample
Echo Removing...
# Delete all files listed in duplicate_files and not listed in duplicate_sample
Comm duplicate_files duplicate_sample-2-3 | tee/dev/stderr | xargs rm
Echo Removed duplicates files successfully
--------------------------------------------------------
Run:
[Root @ node1 tmp] # sh remove_one.sh
Filters the subdirectories in the current directory. The subdirectories are not processed recursively.