1 /******************************************************************************
3 audiosum.c -- Prints the hash of an audio file excluding tag sections.
4 (C) Copyright 2001-2008 Octavio Alvarez Piza, alvarezp@alvarezp.ods.org
6 Released under GNU/GPL license. All other rights reserved.
8 ## For program operation, see the showhelp() function. ##
10 Version history before Git:
12 + Code has been restructured to avoid nested if().
13 + Code now uses MD5 instead of CRC32.
14 + Moved to Linux, so development is now focused on Linux. Sorry, Win32 folks.
15 + Added "--help" switch.
16 + 0.1.1: + Now knows how to ignore Lyrics3 sections. Since I
17 don't have MP3 files with Lyrics3v1, this feature
18 is not tested, and may not work as expected.
19 + 0.1.0: + Initial Release. Plain program, knows how to ignore
20 ID3v1.x and ID3v2.x tags. Never released.
22 ******************************************************************************/
34 /* This code will most probably go away. */
36 void printd(char *msg
, const char *filename
, int code
)
38 fprintf(stderr
, "%s:%04X:%s\n", msg
, code
, filename
);
41 #define printd(a,b,c) ((void)0)
44 hashid default_algorithm
= MHASH_MD5
;
47 const char *cmdline_name
;
50 // { "crc32", MHASH_CRC32 },
51 // { "crc32b", MHASH_CRC32B },
52 // { "adler32", MHASH_ADLER32 },
53 // { "md2", MHASH_MD2 },
54 // { "md4", MHASH_MD4 },
56 { "sha1", MHASH_SHA1
},
57 { "sha224", MHASH_SHA224
},
58 { "sha256", MHASH_SHA256
},
59 { "sha384", MHASH_SHA384
},
60 { "sha512", MHASH_SHA512
},
61 { "gost", MHASH_GOST
},
62 { "ripemd128", MHASH_RIPEMD128
},
63 { "ripemd160", MHASH_RIPEMD160
},
64 { "ripemd256", MHASH_RIPEMD256
},
65 { "ripemd320", MHASH_RIPEMD320
},
66 { "tiger128", MHASH_TIGER128
},
67 { "tiger160", MHASH_TIGER160
},
68 { "tiger192", MHASH_TIGER192
},
69 { "haval224", MHASH_HAVAL224
},
70 { "haval256", MHASH_HAVAL256
},
71 { "haval192", MHASH_HAVAL192
},
72 { "haval160", MHASH_HAVAL160
},
73 { "haval128", MHASH_HAVAL128
},
74 { "whirlpool", MHASH_WHIRLPOOL
},
75 { "snefru128", MHASH_SNEFRU128
},
76 { "snefru256", MHASH_SNEFRU256
},
79 const algorithms_n
= sizeof(algorithms
) / sizeof(algorithms
[0]);
82 ProcessFileID3v1(FILE * f
, unsigned long *OffsetStart
,
83 unsigned long *OffsetEnd
)
85 char TagBuffer
[3] = { 0 };
87 if (fseek(f
, *OffsetEnd
- 128, SEEK_SET
) != 0) {
91 if (fread(&TagBuffer
, 3, 1, f
) == 0) {
95 if (strncmp("TAG", TagBuffer
, 3) != 0) {
96 /* TAG not found. We are done. */
105 ProcessFileID3v2(FILE * f
, unsigned long *OffsetStart
,
106 unsigned long *OffsetEnd
)
111 struct ID3V2_Header
{
112 unsigned char sign
[3];
113 unsigned char version
[2];
115 unsigned char size
[4];
118 fseek(f
, *OffsetStart
, SEEK_SET
);
120 if (fread(&id3
, sizeof(struct ID3V2_Header
), 1, f
) <= 0) {
124 if (!((id3
.sign
[0] == 'I') &&
125 (id3
.sign
[1] == 'D') &&
126 (id3
.sign
[2] == '3') &&
127 (id3
.version
[0] < 0xFF) &&
128 (id3
.size
[0] < 0x80) &&
129 (id3
.size
[1] < 0x80) &&
130 (id3
.size
[2] < 0x80) && (id3
.size
[3] < 0x80)))
135 for (x
= 0; x
< 4; x
++)
136 id3size
+= pow(128, x
) * (unsigned long) (id3
.size
[3 - x
] & 127);
140 *OffsetStart
+= id3size
;
147 ProcessFileLyrics3v1(FILE * f
, unsigned long *OffsetStart
,
148 unsigned long *OffsetEnd
)
153 if (fseek(f
, *OffsetEnd
- 9, SEEK_SET
) != 0) {
157 if (fread(&L3Buffer
, 9, 1, f
) <= 0 ) {
161 if (strncmp("LYRICSEND", L3Buffer
, 9) != 0) {
162 /* We found no ending LYRICSEND tag. We are done. */
166 if (fseek(f
, *OffsetEnd
- 5109, SEEK_SET
) != 0) {
170 /* Since maximum lyrics length is 5100, we go back 5109 and _search_
171 * from this point on. 5109 == 5100 + strlen("LYRICSEND")
173 * This is the recommended method from http://www.id3.org/Lyrics3
175 if (fread(&L3Buffer
, 1, 5109, f
) <= 0) {
179 char *pos
=strstr(L3Buffer
, "LYRICSBEGIN");
182 /* We found LYRICSEND but not LYRICSBEGIN. Treat as unexpected. */
186 /* We found a valid LYRICSBEGIN signature, so we have found a complete
190 *OffsetEnd
+= pos
- (char *)&L3Buffer
;
195 ProcessFileLyrics3v2(FILE * f
, unsigned long *OffsetStart
,
196 unsigned long *OffsetEnd
)
200 unsigned long size
= 0;
202 if (fseek(f
, *OffsetEnd
- 9, SEEK_SET
) != 0) {
206 if (fread(&L3Buffer
, 9, 1, f
) <= 0) {
210 if (strncmp("LYRICS200", L3Buffer
, 9) != 0) {
211 /* We found no ending "LYRICS200" tag. We are done. */
215 if (fseek(f
, *OffsetEnd
- 15, SEEK_SET
) != 0) {
219 if (fgets((char *) &L3Buffer
, 7, f
) <= 0) {
224 sscanf(L3Buffer
, "%lu", &size
);
227 if (fseek(f
, *OffsetEnd
- size
- 15, SEEK_SET
) != 0) {
231 if ((r
= fread(&L3Buffer
, 1, 11, f
)) <= 0) {
235 if (!strncmp("LYRICSBEGIN", L3Buffer
, 11)) {
236 /* LYRICSBEGIN found where it should be. We are done. */
237 *OffsetEnd
-= size
+ 15;
241 /* LYRICSBEGIN not found where supposed to. Try as if the size mark
242 * included the ending signature.
244 if (fseek(f
, *OffsetEnd
- size
, SEEK_SET
) != 0) {
248 if (fread(&L3Buffer
, 11, 1, f
) <= 0) {
252 if (!strncmp("LYRICSBEGIN", L3Buffer
, 11)) {
253 /* Yes, the size mark was invalid: it included the last ending
256 * From the Lyrics3v2 spec: The size value includes the "LYRICSBEGIN"
257 * string, but does not include the 6 character size descriptor and
258 * the trailing "LYRICS200" string.
264 /* So the size mark is invalid. We didn't find the LYRICSBEGIN signature.
265 * Treat as an unexpected error.
270 unsigned long filesize(FILE * stream
)
273 curpos
= ftell(stream
);
274 fseek(stream
, 0L, SEEK_END
);
275 length
= ftell(stream
);
276 fseek(stream
, curpos
, SEEK_SET
);
282 const char *help
= "\r\n\
283 usage: audiosum [options]\r\n\
286 -a algo Choose a different algorithm from MD5 for hashing.\r\n\
287 -l Print the list of supported hashes.\r\n\
288 -b n (%) Brief: Only compute n percent of the size of each file.\r\n\
289 If n == 0 or ommited, only print the file size.\r\n\
290 -h Shows this help.\r\n\
292 Program operation:\r\n\
293 + It reads a sequence of file names from stdin (they should be MP3 files)\r\n\
294 sends to stdout the following information about them:\r\n\
295 : File size, in hex format (8 chars).\r\n\
296 : Hash of the file without ID3 or Lyrics tags, in hex format.\r\n\
297 : What signatures were found.\r\n\
298 : Complete file name.\r\n\
300 It tries to ignore non-audio parts. Currently ignored sections are:\r\n\
303 : Lyrics3 v1 (not tested)\r\n\
307 : Audiosum is designed to be wrapped by sort and uniq, like this:\r\n\
308 (you can copy and paste):\r\n\
310 find $HOME /mnt/music -iname \"*.mp3\" | \\\r\n\
311 audiosum -b | sort | uniq -D -w 8 | cut -d ' ' -f 6- | \\\r\n\
312 audiosum -b 2 | sort | uniq -D -w 41 | cut -d ' ' -f 7- | \\\r\n\
313 audiosum | sort | uniq --all-repeated=separate -w 41 > result.txt\r\n\
324 printf("Supported hashes:\n\n");
325 printf(" %-10s %4s\n", "Hash", "Bits");
326 for (i
= 0; i
< algorithms_n
; i
++) {
327 printf(" : %c%-10s %4d\n",
328 algorithms
[i
].libmhash_id
== default_algorithm
? '*' : ' ',
329 algorithms
[i
].cmdline_name
,
330 mhash_get_block_size(algorithms
[i
].libmhash_id
)*8);
333 printf("\n * Default algorithm if -a is ommited\n\n");
336 int main(int arg_n
, char *arg
[])
342 unsigned char buffer
[8192];
345 hashid hash_algorithm
= default_algorithm
;
354 while ((c
= getopt(arg_n
, arg
, ":lb:a:h")) != -1) {
357 for (i
= 0; i
< algorithms_n
; i
++) {
358 if (strcmp(optarg
, algorithms
[i
].cmdline_name
) == 0) {
359 hash_algorithm
= algorithms
[i
].libmhash_id
;
363 if (i
== algorithms_n
) {
364 fprintf(stderr
, "audiosum: unknown algorithm name: %s\n",
385 fprintf(stderr
, "audiosum: warning: unspecified hash algorithm."
386 " Using the default.\n");
391 fprintf(stderr
, "audiosum: unrecognized option: -%c\n", optopt
);
396 if (errflg
|| help
) {
404 while (fgets(filename
, 1024, stdin
)) {
405 if (filename
[strlen(filename
) - 1] == '\n') {
406 filename
[strlen(filename
) - 1] = '\0';
409 f
= fopen(filename
, "rb");
411 fprintf(stderr
, "ERROR:Opening file: %s\n", filename
);
415 unsigned long OffsetStart
= 0;
416 unsigned long OffsetEnd
= filesize(f
);
419 if (!(r
= ProcessFileID3v1(f
, &OffsetStart
, &OffsetEnd
)) < 0) {
420 printd("ERROR:Unexp, Analyzing ID3v1", filename
, r
);
430 if (!(r
= ProcessFileID3v2(f
, &OffsetStart
, &OffsetEnd
)) < 0) {
431 printd("ERROR:Unexp, Analyzing ID3v2", filename
, r
);
441 if (!(r
= ProcessFileLyrics3v1(f
, &OffsetStart
, &OffsetEnd
)) < 0) {
442 printd("ERROR:Unexp, Analyzing Lyrics3v1", filename
, r
);
452 if (!(r
= ProcessFileLyrics3v2(f
, &OffsetStart
, &OffsetEnd
)) < 0) {
453 printd("ERROR:Unexp, Analyzing Lyrics3v2", filename
, r
);
464 fseek(f
, OffsetStart
, SEEK_SET
);
466 printf("%08lx ", OffsetEnd
- OffsetStart
);
469 td
= mhash_init(hash_algorithm
);
471 if (td
== MHASH_FAILED
)
474 int howmany
= ((OffsetEnd
- OffsetStart
+ 1)*brief
)/100;
477 fread(&buffer
, 1, howmany
> 8192 ? 8192 : howmany
,
479 mhash(td
, &buffer
, r
);
483 hash
= mhash_end(td
);
485 for (i
= 0; i
< mhash_get_block_size(hash_algorithm
); i
++) {
486 printf("%.2x", hash
[i
]);
492 printf("[%s] [%s] [%s] [%s] %s\n", hadi3v1
, hadi3v2
, hadl3v1
, hadl3v2
, filename
);