Brief mode enhanced: allows option to process first x% of each file.
[audiosum.git] / src / audiosum.c
blob199cec89e280aee6d0c38c86a763e082374f55e7
1 /******************************************************************************
3 audiosum.c -- Prints the hash of an audio file excluding tag sections.
4 (C) Copyright 2001-2008 Octavio Alvarez Piza, alvarezp@alvarezp.ods.org
6 Released under GNU/GPL license. All other rights reserved.
8 ## For program operation, see the showhelp() function. ##
10 Version history before Git:
12 + Code has been restructured to avoid nested if().
13 + Code now uses MD5 instead of CRC32.
14 + Moved to Linux, so development is now focused on Linux. Sorry, Win32 folks.
15 + Added "--help" switch.
16 + 0.1.1: + Now knows how to ignore Lyrics3 sections. Since I
17 don't have MP3 files with Lyrics3v1, this feature
18 is not tested, and may not work as expected.
19 + 0.1.0: + Initial Release. Plain program, knows how to ignore
20 ID3v1.x and ID3v2.x tags. Never released.
22 ******************************************************************************/
24 #include <stdio.h>
25 #include <math.h>
26 #include <string.h>
28 #include <mhash.h>
30 #include "config.h"
32 /* #define DEBUG */
34 /* This code will most probably go away. */
35 #ifdef DEBUG
36 void printd(char *msg, const char *filename, int code)
38 fprintf(stderr, "%s:%04X:%s\n", msg, code, filename);
40 #else /* */
41 #define printd(a,b,c) ((void)0)
42 #endif /* */
44 hashid default_algorithm = MHASH_MD5;
46 struct algorithms {
47 const char *cmdline_name;
48 hashid libmhash_id;
49 } algorithms[] = {
50 // { "crc32", MHASH_CRC32 },
51 // { "crc32b", MHASH_CRC32B },
52 // { "adler32", MHASH_ADLER32 },
53 // { "md2", MHASH_MD2 },
54 // { "md4", MHASH_MD4 },
55 { "md5", MHASH_MD5 },
56 { "sha1", MHASH_SHA1 },
57 { "sha224", MHASH_SHA224 },
58 { "sha256", MHASH_SHA256 },
59 { "sha384", MHASH_SHA384 },
60 { "sha512", MHASH_SHA512 },
61 { "gost", MHASH_GOST },
62 { "ripemd128", MHASH_RIPEMD128 },
63 { "ripemd160", MHASH_RIPEMD160 },
64 { "ripemd256", MHASH_RIPEMD256 },
65 { "ripemd320", MHASH_RIPEMD320 },
66 { "tiger128", MHASH_TIGER128 },
67 { "tiger160", MHASH_TIGER160 },
68 { "tiger192", MHASH_TIGER192 },
69 { "haval224", MHASH_HAVAL224 },
70 { "haval256", MHASH_HAVAL256 },
71 { "haval192", MHASH_HAVAL192 },
72 { "haval160", MHASH_HAVAL160 },
73 { "haval128", MHASH_HAVAL128 },
74 { "whirlpool", MHASH_WHIRLPOOL },
75 { "snefru128", MHASH_SNEFRU128 },
76 { "snefru256", MHASH_SNEFRU256 },
79 const algorithms_n = sizeof(algorithms) / sizeof(algorithms[0]);
81 int
82 ProcessFileID3v1(FILE * f, unsigned long *OffsetStart,
83 unsigned long *OffsetEnd)
85 char TagBuffer[3] = { 0 };
87 if (fseek(f, *OffsetEnd - 128, SEEK_SET) != 0) {
88 return -1;
91 if (fread(&TagBuffer, 3, 1, f) == 0) {
92 return -1;
95 if (strncmp("TAG", TagBuffer, 3) != 0) {
96 /* TAG not found. We are done. */
97 return 0;
100 *OffsetEnd -= 128;
101 return 1;
105 ProcessFileID3v2(FILE * f, unsigned long *OffsetStart,
106 unsigned long *OffsetEnd)
108 int x;
109 long id3size = 0;
111 struct ID3V2_Header {
112 unsigned char sign[3];
113 unsigned char version[2];
114 unsigned char flags;
115 unsigned char size[4];
116 } id3;
118 fseek(f, *OffsetStart, SEEK_SET);
120 if (fread(&id3, sizeof(struct ID3V2_Header), 1, f) <= 0) {
121 return -1;
124 if (!((id3.sign[0] == 'I') &&
125 (id3.sign[1] == 'D') &&
126 (id3.sign[2] == '3') &&
127 (id3.version[0] < 0xFF) &&
128 (id3.size[0] < 0x80) &&
129 (id3.size[1] < 0x80) &&
130 (id3.size[2] < 0x80) && (id3.size[3] < 0x80)))
132 return 0;
135 for (x = 0; x < 4; x++)
136 id3size += pow(128, x) * (unsigned long) (id3.size[3 - x] & 127);
137 id3size += 10;
138 if (id3.flags & 16)
139 id3size += 10;
140 *OffsetStart += id3size;
142 return 1;
147 ProcessFileLyrics3v1(FILE * f, unsigned long *OffsetStart,
148 unsigned long *OffsetEnd)
150 char L3Buffer[5109];
151 int FoundTag = 0;
153 if (fseek(f, *OffsetEnd - 9, SEEK_SET) != 0) {
154 return -1;
157 if (fread(&L3Buffer, 9, 1, f) <= 0 ) {
158 return -1;
161 if (strncmp("LYRICSEND", L3Buffer, 9) != 0) {
162 /* We found no ending LYRICSEND tag. We are done. */
163 return 0;
166 if (fseek(f, *OffsetEnd - 5109, SEEK_SET) != 0) {
167 return -1;
170 /* Since maximum lyrics length is 5100, we go back 5109 and _search_
171 * from this point on. 5109 == 5100 + strlen("LYRICSEND")
173 * This is the recommended method from http://www.id3.org/Lyrics3
175 if (fread(&L3Buffer, 1, 5109, f) <= 0) {
176 return -1;
179 char *pos=strstr(L3Buffer, "LYRICSBEGIN");
181 if (pos == 0) {
182 /* We found LYRICSEND but not LYRICSBEGIN. Treat as unexpected. */
183 return -1;
186 /* We found a valid LYRICSBEGIN signature, so we have found a complete
187 * record.
189 *OffsetEnd -= 5109;
190 *OffsetEnd += pos - (char *)&L3Buffer;
195 ProcessFileLyrics3v2(FILE * f, unsigned long *OffsetStart,
196 unsigned long *OffsetEnd)
198 char L3Buffer[11];
199 int FoundTag = 0;
200 unsigned long size = 0;
202 if (fseek(f, *OffsetEnd - 9, SEEK_SET) != 0) {
203 return -1;
206 if (fread(&L3Buffer, 9, 1, f) <= 0) {
207 return -1;
210 if (strncmp("LYRICS200", L3Buffer, 9) != 0) {
211 /* We found no ending "LYRICS200" tag. We are done. */
212 return 0;
215 if (fseek(f, *OffsetEnd - 15, SEEK_SET) != 0) {
216 return -1;
219 if (fgets((char *) &L3Buffer, 7, f) <= 0) {
220 return -1;
223 L3Buffer[6] = 0;
224 sscanf(L3Buffer, "%lu", &size);
226 int r = 0;
227 if (fseek(f, *OffsetEnd - size - 15, SEEK_SET) != 0) {
228 return -1;
231 if ((r = fread(&L3Buffer, 1, 11, f)) <= 0) {
232 return -1;
235 if (!strncmp("LYRICSBEGIN", L3Buffer, 11)) {
236 /* LYRICSBEGIN found where it should be. We are done. */
237 *OffsetEnd -= size + 15;
238 return 1;
241 /* LYRICSBEGIN not found where supposed to. Try as if the size mark
242 * included the ending signature.
244 if (fseek(f, *OffsetEnd - size, SEEK_SET) != 0) {
245 return -1;
248 if (fread(&L3Buffer, 11, 1, f) <= 0) {
249 return -1;
252 if (!strncmp("LYRICSBEGIN", L3Buffer, 11)) {
253 /* Yes, the size mark was invalid: it included the last ending
254 * signature.
256 * From the Lyrics3v2 spec: The size value includes the "LYRICSBEGIN"
257 * string, but does not include the 6 character size descriptor and
258 * the trailing "LYRICS200" string.
260 *OffsetEnd -= size;
261 return 2;
264 /* So the size mark is invalid. We didn't find the LYRICSBEGIN signature.
265 * Treat as an unexpected error.
267 return -1;
270 unsigned long filesize(FILE * stream)
272 long curpos, length;
273 curpos = ftell(stream);
274 fseek(stream, 0L, SEEK_END);
275 length = ftell(stream);
276 fseek(stream, curpos, SEEK_SET);
277 return length;
280 void showhelp()
282 const char *help = "\r\n\
283 usage: audiosum [options]\r\n\
284 \r\n\
285 Options:\r\n\
286 -a algo Choose a different algorithm from MD5 for hashing.\r\n\
287 -l Print the list of supported hashes.\r\n\
288 -b n (%) Brief: Only compute n percent of the size of each file.\r\n\
289 If n == 0 or ommited, only print the file size.\r\n\
290 -h Shows this help.\r\n\
291 \r\n\
292 Program operation:\r\n\
293 + It reads a sequence of file names from stdin (they should be MP3 files)\r\n\
294 sends to stdout the following information about them:\r\n\
295 : File size, in hex format (8 chars).\r\n\
296 : Hash of the file without ID3 or Lyrics tags, in hex format.\r\n\
297 : What signatures were found.\r\n\
298 : Complete file name.\r\n\
299 \r\n\
300 It tries to ignore non-audio parts. Currently ignored sections are:\r\n\
301 : ID3v1.x\r\n\
302 : ID3v2.x\r\n\
303 : Lyrics3 v1 (not tested)\r\n\
304 : Lyrics3 v2.00\r\n\
305 \r\n\
308 printf("%s", help);
311 void showhashes()
313 hashid i = 0;
315 printf("Supported hashes:\n\n");
316 printf(" %-10s %4s\n", "Hash", "Bits");
317 for (i = 0; i < algorithms_n; i++) {
318 printf(" : %c%-10s %4d\n",
319 algorithms[i].libmhash_id == default_algorithm? '*' : ' ',
320 algorithms[i].cmdline_name,
321 mhash_get_block_size(algorithms[i].libmhash_id)*8);
324 printf("\n * Default algorithm if -a is ommited\n\n");
327 int main(int arg_n, char *arg[])
329 char filename[1024];
331 int i;
332 MHASH td;
333 unsigned char buffer[8192];
334 unsigned char *hash;
336 hashid hash_algorithm = default_algorithm;
337 int brief=100;
338 int help=0;
339 char c;
340 int errflg=0;
341 unsigned long crc;
342 FILE *f;
343 int r;
345 while ((c = getopt(arg_n, arg, ":lb:a:h")) != -1) {
346 switch(c) {
347 case 'a':
348 for (i = 0; i < algorithms_n; i++) {
349 if (strcmp(optarg, algorithms[i].cmdline_name) == 0) {
350 hash_algorithm = algorithms[i].libmhash_id;
351 break;
354 if (i == algorithms_n) {
355 fprintf(stderr, "audiosum: unknown algorithm name: %s\n",
356 optarg);
357 return 1;
359 break;
360 case 'b':
361 brief=atoi(optarg);
362 break;
363 case 'h':
364 help++;
365 break;
366 case 'l':
367 showhashes();
368 return 0;
369 break;
370 case ':':
371 switch (optopt) {
372 case 'b':
373 brief = 0;
374 break;
375 case 'a':
376 fprintf(stderr, "audiosum: warning: unspecified hash algorithm."
377 " Using the default.\n");
378 break;
380 break;
381 case '?':
382 fprintf(stderr, "audiosum: unrecognized option: -%c\n", optopt);
383 exit(EXIT_FAILURE);
384 break;
387 if (errflg || help) {
388 showhelp();
389 if (help)
390 exit(EXIT_SUCCESS);
391 else
392 exit(EXIT_FAILURE);
395 while (fgets(filename, 1024, stdin)) {
396 if (filename[strlen(filename) - 1] == '\n') {
397 filename[strlen(filename) - 1] = '\0';
400 f = fopen(filename, "rb");
401 if (!f) {
402 fprintf(stderr, "ERROR:Opening file: %s\n", filename);
403 continue;
406 unsigned long OffsetStart = 0;
407 unsigned long OffsetEnd = filesize(f);
409 char *hadi3v1;
410 if (!(r = ProcessFileID3v1(f, &OffsetStart, &OffsetEnd)) < 0) {
411 printd("ERROR:Unexp, Analyzing ID3v1", filename, r);
412 fclose(f);
413 continue;
415 if (r == 0)
416 hadi3v1 = "----";
417 if (r == 1)
418 hadi3v1 = "I3v1";
420 char *hadi3v2;
421 if (!(r = ProcessFileID3v2(f, &OffsetStart, &OffsetEnd)) < 0) {
422 printd("ERROR:Unexp, Analyzing ID3v2", filename, r);
423 fclose(f);
424 continue;
426 if (r == 0)
427 hadi3v2 = "----";
428 if (r == 1)
429 hadi3v2 = "I3v2";
431 char *hadl3v1;
432 if (!(r = ProcessFileLyrics3v1(f, &OffsetStart, &OffsetEnd)) < 0) {
433 printd("ERROR:Unexp, Analyzing Lyrics3v1", filename, r);
434 fclose(f);
435 continue;
437 if (r == 0)
438 hadl3v1 = "----";
439 if (r == 1)
440 hadl3v1 = "L3v1";
442 char *hadl3v2;
443 if (!(r = ProcessFileLyrics3v2(f, &OffsetStart, &OffsetEnd)) < 0) {
444 printd("ERROR:Unexp, Analyzing Lyrics3v2", filename, r);
445 fclose(f);
446 continue;
448 if (r == 0)
449 hadl3v2 = "----";
450 if (r == 1)
451 hadl3v2 = "L3v2";
452 if (r == 2)
453 hadl3v2 = "l3v2";
455 fseek(f, OffsetStart, SEEK_SET);
457 printf("%08lx ", OffsetEnd - OffsetStart);
459 if (brief > 0) {
460 td = mhash_init(hash_algorithm);
462 if (td == MHASH_FAILED)
463 exit(1);
465 int howmany = ((OffsetEnd - OffsetStart + 1)*brief)/100;
466 while (howmany > 0
467 && (r =
468 fread(&buffer, 1, howmany > 8192 ? 8192 : howmany,
469 f)) > 0) {
470 mhash(td, &buffer, r);
471 howmany -= 8192;
474 hash = mhash_end(td);
476 for (i = 0; i < mhash_get_block_size(hash_algorithm); i++) {
477 printf("%.2x", hash[i]);
479 free(hash);
482 printf(" [%s] [%s] [%s] [%s] %s\n", hadi3v1, hadi3v2, hadl3v1, hadl3v2, filename);
484 fclose (f);
488 return 0;