Added support for file pattern matching.
[audiosum.git] / src / audiodup
blobdcd8a7ef6ccc543bf0446a29f9d52ef3ed88fd64
1 #!/bin/bash
3 ALGO=md5
4 PERCENT=2
5 PATTERN='*.mp3'
6 while getopts ":a:b:p:h" OPT; do
7 case $OPT in
8 a)
9 ALGO=$OPTARG;
12 PERCENT=$OPTARG;
15 PATTERN=$OPTARG;
18 echo "
19 This script finds duplicate mp3 files by audio content (it ignores tags).
21 usage: $0 [-a algorithm] [-b percent] [-p pattern] [path] [path...]
23 -a algorithm Chooses the hash algorithm to use. See audiosum -l.
24 -b percent Integer >=1 and <=99, chooses the amount of the file data to
25 read during the intermediate phase of processing.
26 -p pattern File pattern to match against.
28 This script works by processing the mp3 files according to audiosum -h. This
29 is made by three parts:
31 1. Discards files which have different sizes.
32 2. Discards files with same size but different hash for the first n% data.
33 3. Discards files with same size but different hash for the whole data.
35 The remaining files are duplicated files.
37 exit
39 esac
40 done
42 HASH_LEN=`audiosum -l | grep ": .$ALGO" | ( read COLON NAME LEN; echo $((LEN/4)) )`
43 if [ -z "$HASH_LEN" ]; then
44 echo Algorithm not supported.
45 exit 1;
48 if [[ $HASH_LEN = *[^0-9]* ]]; then
49 echo Algorithm not supported.
50 exit 1;
53 if [[ $PERCENT = *[^0-9]* ]]; then
54 echo Percent value must be an integer between 1 and 99.
55 exit 1;
58 if [ $PERCENT -lt 1 ] || [ $PERCENT -gt 99 ]; then
59 echo Percent value must be an integer between 1 and 99.
60 exit 1;
63 shift $((OPTIND-1))
65 find $* -iname "$PATTERN" | \
66 audiosum -b | sort | uniq -D -w 8 | cut -d ' ' -f 6- | \
67 audiosum -b $PERCENT | sort | uniq -D -w $((HASH_LEN+9)) | cut -d ' ' -f 7- | \
68 audiosum | sort | uniq --all-repeated=separate -w $((HASH_LEN+9))