Fix extra space on brief output.
[audiosum.git] / src / audiosum.c
blobbce624e0f49356662784bd3d2f2395f3376d172e
1 /******************************************************************************
3 audiosum.c -- Prints the hash of an audio file excluding tag sections.
4 (C) Copyright 2001-2008 Octavio Alvarez Piza, alvarezp@alvarezp.ods.org
6 Released under GNU/GPL license. All other rights reserved.
8 ## For program operation, see the showhelp() function. ##
10 Version history before Git:
12 + Code has been restructured to avoid nested if().
13 + Code now uses MD5 instead of CRC32.
14 + Moved to Linux, so development is now focused on Linux. Sorry, Win32 folks.
15 + Added "--help" switch.
16 + 0.1.1: + Now knows how to ignore Lyrics3 sections. Since I
17 don't have MP3 files with Lyrics3v1, this feature
18 is not tested, and may not work as expected.
19 + 0.1.0: + Initial Release. Plain program, knows how to ignore
20 ID3v1.x and ID3v2.x tags. Never released.
22 ******************************************************************************/
24 #include <stdio.h>
25 #include <math.h>
26 #include <string.h>
28 #include <mhash.h>
30 #include "config.h"
32 /* #define DEBUG */
34 /* This code will most probably go away. */
35 #ifdef DEBUG
36 void printd(char *msg, const char *filename, int code)
38 fprintf(stderr, "%s:%04X:%s\n", msg, code, filename);
40 #else /* */
41 #define printd(a,b,c) ((void)0)
42 #endif /* */
44 hashid default_algorithm = MHASH_MD5;
46 struct algorithms {
47 const char *cmdline_name;
48 hashid libmhash_id;
49 } algorithms[] = {
50 // { "crc32", MHASH_CRC32 },
51 // { "crc32b", MHASH_CRC32B },
52 // { "adler32", MHASH_ADLER32 },
53 // { "md2", MHASH_MD2 },
54 // { "md4", MHASH_MD4 },
55 { "md5", MHASH_MD5 },
56 { "sha1", MHASH_SHA1 },
57 { "sha224", MHASH_SHA224 },
58 { "sha256", MHASH_SHA256 },
59 { "sha384", MHASH_SHA384 },
60 { "sha512", MHASH_SHA512 },
61 { "gost", MHASH_GOST },
62 { "ripemd128", MHASH_RIPEMD128 },
63 { "ripemd160", MHASH_RIPEMD160 },
64 { "ripemd256", MHASH_RIPEMD256 },
65 { "ripemd320", MHASH_RIPEMD320 },
66 { "tiger128", MHASH_TIGER128 },
67 { "tiger160", MHASH_TIGER160 },
68 { "tiger192", MHASH_TIGER192 },
69 { "haval224", MHASH_HAVAL224 },
70 { "haval256", MHASH_HAVAL256 },
71 { "haval192", MHASH_HAVAL192 },
72 { "haval160", MHASH_HAVAL160 },
73 { "haval128", MHASH_HAVAL128 },
74 { "whirlpool", MHASH_WHIRLPOOL },
75 { "snefru128", MHASH_SNEFRU128 },
76 { "snefru256", MHASH_SNEFRU256 },
79 const algorithms_n = sizeof(algorithms) / sizeof(algorithms[0]);
81 int
82 ProcessFileID3v1(FILE * f, unsigned long *OffsetStart,
83 unsigned long *OffsetEnd)
85 char TagBuffer[3] = { 0 };
87 if (fseek(f, *OffsetEnd - 128, SEEK_SET) != 0) {
88 return -1;
91 if (fread(&TagBuffer, 3, 1, f) == 0) {
92 return -1;
95 if (strncmp("TAG", TagBuffer, 3) != 0) {
96 /* TAG not found. We are done. */
97 return 0;
100 *OffsetEnd -= 128;
101 return 1;
105 ProcessFileID3v2(FILE * f, unsigned long *OffsetStart,
106 unsigned long *OffsetEnd)
108 int x;
109 long id3size = 0;
111 struct ID3V2_Header {
112 unsigned char sign[3];
113 unsigned char version[2];
114 unsigned char flags;
115 unsigned char size[4];
116 } id3;
118 fseek(f, *OffsetStart, SEEK_SET);
120 if (fread(&id3, sizeof(struct ID3V2_Header), 1, f) <= 0) {
121 return -1;
124 if (!((id3.sign[0] == 'I') &&
125 (id3.sign[1] == 'D') &&
126 (id3.sign[2] == '3') &&
127 (id3.version[0] < 0xFF) &&
128 (id3.size[0] < 0x80) &&
129 (id3.size[1] < 0x80) &&
130 (id3.size[2] < 0x80) && (id3.size[3] < 0x80)))
132 return 0;
135 for (x = 0; x < 4; x++)
136 id3size += pow(128, x) * (unsigned long) (id3.size[3 - x] & 127);
137 id3size += 10;
138 if (id3.flags & 16)
139 id3size += 10;
140 *OffsetStart += id3size;
142 return 1;
147 ProcessFileLyrics3v1(FILE * f, unsigned long *OffsetStart,
148 unsigned long *OffsetEnd)
150 char L3Buffer[5109];
151 int FoundTag = 0;
153 if (fseek(f, *OffsetEnd - 9, SEEK_SET) != 0) {
154 return -1;
157 if (fread(&L3Buffer, 9, 1, f) <= 0 ) {
158 return -1;
161 if (strncmp("LYRICSEND", L3Buffer, 9) != 0) {
162 /* We found no ending LYRICSEND tag. We are done. */
163 return 0;
166 if (fseek(f, *OffsetEnd - 5109, SEEK_SET) != 0) {
167 return -1;
170 /* Since maximum lyrics length is 5100, we go back 5109 and _search_
171 * from this point on. 5109 == 5100 + strlen("LYRICSEND")
173 * This is the recommended method from http://www.id3.org/Lyrics3
175 if (fread(&L3Buffer, 1, 5109, f) <= 0) {
176 return -1;
179 char *pos=strstr(L3Buffer, "LYRICSBEGIN");
181 if (pos == 0) {
182 /* We found LYRICSEND but not LYRICSBEGIN. Treat as unexpected. */
183 return -1;
186 /* We found a valid LYRICSBEGIN signature, so we have found a complete
187 * record.
189 *OffsetEnd -= 5109;
190 *OffsetEnd += pos - (char *)&L3Buffer;
195 ProcessFileLyrics3v2(FILE * f, unsigned long *OffsetStart,
196 unsigned long *OffsetEnd)
198 char L3Buffer[11];
199 int FoundTag = 0;
200 unsigned long size = 0;
202 if (fseek(f, *OffsetEnd - 9, SEEK_SET) != 0) {
203 return -1;
206 if (fread(&L3Buffer, 9, 1, f) <= 0) {
207 return -1;
210 if (strncmp("LYRICS200", L3Buffer, 9) != 0) {
211 /* We found no ending "LYRICS200" tag. We are done. */
212 return 0;
215 if (fseek(f, *OffsetEnd - 15, SEEK_SET) != 0) {
216 return -1;
219 if (fgets((char *) &L3Buffer, 7, f) <= 0) {
220 return -1;
223 L3Buffer[6] = 0;
224 sscanf(L3Buffer, "%lu", &size);
226 int r = 0;
227 if (fseek(f, *OffsetEnd - size - 15, SEEK_SET) != 0) {
228 return -1;
231 if ((r = fread(&L3Buffer, 1, 11, f)) <= 0) {
232 return -1;
235 if (!strncmp("LYRICSBEGIN", L3Buffer, 11)) {
236 /* LYRICSBEGIN found where it should be. We are done. */
237 *OffsetEnd -= size + 15;
238 return 1;
241 /* LYRICSBEGIN not found where supposed to. Try as if the size mark
242 * included the ending signature.
244 if (fseek(f, *OffsetEnd - size, SEEK_SET) != 0) {
245 return -1;
248 if (fread(&L3Buffer, 11, 1, f) <= 0) {
249 return -1;
252 if (!strncmp("LYRICSBEGIN", L3Buffer, 11)) {
253 /* Yes, the size mark was invalid: it included the last ending
254 * signature.
256 * From the Lyrics3v2 spec: The size value includes the "LYRICSBEGIN"
257 * string, but does not include the 6 character size descriptor and
258 * the trailing "LYRICS200" string.
260 *OffsetEnd -= size;
261 return 2;
264 /* So the size mark is invalid. We didn't find the LYRICSBEGIN signature.
265 * Treat as an unexpected error.
267 return -1;
270 unsigned long filesize(FILE * stream)
272 long curpos, length;
273 curpos = ftell(stream);
274 fseek(stream, 0L, SEEK_END);
275 length = ftell(stream);
276 fseek(stream, curpos, SEEK_SET);
277 return length;
280 void showhelp()
282 const char *help = "\r\n\
283 usage: audiosum [options]\r\n\
284 \r\n\
285 Options:\r\n\
286 -a algo Choose a different algorithm from MD5 for hashing.\r\n\
287 -l Print the list of supported hashes.\r\n\
288 -b Brief: Only print size of files.\r\n\
289 -h Shows this help.\r\n\
290 \r\n\
291 Program operation:\r\n\
292 + It reads a sequence of file names from stdin (they should be MP3 files)\r\n\
293 sends to stdout the following information about them:\r\n\
294 : File size, in hex format (8 chars).\r\n\
295 : Hash of the file without ID3 or Lyrics tags, in hex format.\r\n\
296 : What signatures were found.\r\n\
297 : Complete file name.\r\n\
298 \r\n\
299 It tries to ignore non-audio parts. Currently ignored sections are:\r\n\
300 : ID3v1.x\r\n\
301 : ID3v2.x\r\n\
302 : Lyrics3 v1 (not tested)\r\n\
303 : Lyrics3 v2.00\r\n\
304 \r\n\
307 printf("%s", help);
310 void showhashes()
312 hashid i = 0;
314 printf("Supported hashes:\n\n");
315 printf(" %-10s %4s\n", "Hash", "Bits");
316 for (i = 0; i < algorithms_n; i++) {
317 printf(" : %c%-10s %4d\n",
318 algorithms[i].libmhash_id == default_algorithm? '*' : ' ',
319 algorithms[i].cmdline_name,
320 mhash_get_block_size(algorithms[i].libmhash_id)*8);
323 printf("\n * Default algorithm if -a is ommited\n\n");
326 int main(int arg_n, char *arg[])
328 char filename[1024];
330 int i;
331 MHASH td;
332 unsigned char buffer[8192];
333 unsigned char *hash;
335 hashid hash_algorithm = default_algorithm;
336 int mode_brief=0;
337 int help=0;
338 char c;
339 int errflg=0;
340 unsigned long crc;
341 FILE *f;
342 int r;
344 while ((c = getopt(arg_n, arg, ":lba:h")) != -1) {
345 switch(c) {
346 case 'a':
347 for (i = 0; i < algorithms_n; i++) {
348 if (strcmp(optarg, algorithms[i].cmdline_name) == 0) {
349 hash_algorithm = algorithms[i].libmhash_id;
350 break;
353 if (i == algorithms_n) {
354 fprintf(stderr, "audiosum: unknown algorithm name: %s\n", optarg);
355 return 1;
357 break;
358 case 'b':
359 mode_brief++;
360 break;
361 case 'h':
362 help++;
363 break;
364 case 'l':
365 showhashes();
366 return 0;
367 break;
368 case '?':
369 fprintf(stderr, "audiosum: unrecognized option: -%c\n", optopt);
370 exit(EXIT_FAILURE);
371 break;
374 if (errflg || help) {
375 showhelp();
376 if (help)
377 exit(EXIT_SUCCESS);
378 else
379 exit(EXIT_FAILURE);
382 while (fgets(filename, 1024, stdin)) {
383 if (filename[strlen(filename) - 1] == '\n') {
384 filename[strlen(filename) - 1] = '\0';
387 f = fopen(filename, "rb");
388 if (!f) {
389 fprintf(stderr, "ERROR:Opening file: %s\n", filename);
390 continue;
393 unsigned long OffsetStart = 0;
394 unsigned long OffsetEnd = filesize(f);
396 char *hadi3v1;
397 if (!(r = ProcessFileID3v1(f, &OffsetStart, &OffsetEnd)) < 0) {
398 printd("ERROR:Unexp, Analyzing ID3v1", filename, r);
399 fclose(f);
400 continue;
402 if (r == 0)
403 hadi3v1 = "----";
404 if (r == 1)
405 hadi3v1 = "I3v1";
407 char *hadi3v2;
408 if (!(r = ProcessFileID3v2(f, &OffsetStart, &OffsetEnd)) < 0) {
409 printd("ERROR:Unexp, Analyzing ID3v2", filename, r);
410 fclose(f);
411 continue;
413 if (r == 0)
414 hadi3v2 = "----";
415 if (r == 1)
416 hadi3v2 = "I3v2";
418 char *hadl3v1;
419 if (!(r = ProcessFileLyrics3v1(f, &OffsetStart, &OffsetEnd)) < 0) {
420 printd("ERROR:Unexp, Analyzing Lyrics3v1", filename, r);
421 fclose(f);
422 continue;
424 if (r == 0)
425 hadl3v1 = "----";
426 if (r == 1)
427 hadl3v1 = "L3v1";
429 char *hadl3v2;
430 if (!(r = ProcessFileLyrics3v2(f, &OffsetStart, &OffsetEnd)) < 0) {
431 printd("ERROR:Unexp, Analyzing Lyrics3v2", filename, r);
432 fclose(f);
433 continue;
435 if (r == 0)
436 hadl3v2 = "----";
437 if (r == 1)
438 hadl3v2 = "L3v2";
439 if (r == 2)
440 hadl3v2 = "l3v2";
442 fseek(f, OffsetStart, SEEK_SET);
444 printf("%08lx ", OffsetEnd - OffsetStart);
446 if (!mode_brief) {
447 td = mhash_init(hash_algorithm);
449 if (td == MHASH_FAILED)
450 exit(1);
452 int howmany = OffsetEnd - OffsetStart + 1;
453 while (howmany > 0
454 && (r =
455 fread(&buffer, 1, howmany > 8192 ? 8192 : howmany,
456 f)) > 0) {
457 mhash(td, &buffer, r);
458 howmany -= 8192;
461 hash = mhash_end(td);
463 for (i = 0; i < mhash_get_block_size(hash_algorithm); i++) {
464 printf("%.2x", hash[i]);
466 free(hash);
467 printf(" ");
470 printf("[%s] [%s] [%s] [%s] %s\n", hadi3v1, hadi3v2, hadl3v1, hadl3v2, filename);
472 fclose (f);
476 return 0;