MP4 tag parser: guard against problems when the buffer is full.
[kugel-rb.git] / apps / metadata / mp4.c
blob31c9509b946fe388a374fb57dd8e3b38de69ea9e
1 /***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
10 * Copyright (C) 2005 Magnus Holmgren
12 * All files in this archive are subject to the GNU General Public License.
13 * See the file COPYING in the source tree root for full license agreement.
15 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
16 * KIND, either express or implied.
18 ****************************************************************************/
19 #include <stdio.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <ctype.h>
23 #include <inttypes.h>
25 #include "system.h"
26 #include "errno.h"
27 #include "id3.h"
28 #include "metadata_common.h"
29 #include "logf.h"
30 #include "debug.h"
31 #include "replaygain.h"
32 #include "atoi.h"
34 #define MP4_ID(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
36 #define MP4_3gp6 MP4_ID('3', 'g', 'p', '6')
37 #define MP4_aART MP4_ID('a', 'A', 'R', 'T')
38 #define MP4_alac MP4_ID('a', 'l', 'a', 'c')
39 #define MP4_calb MP4_ID(0xa9, 'a', 'l', 'b')
40 #define MP4_cART MP4_ID(0xa9, 'A', 'R', 'T')
41 #define MP4_cgrp MP4_ID(0xa9, 'g', 'r', 'p')
42 #define MP4_cgen MP4_ID(0xa9, 'g', 'e', 'n')
43 #define MP4_cnam MP4_ID(0xa9, 'n', 'a', 'm')
44 #define MP4_cwrt MP4_ID(0xa9, 'w', 'r', 't')
45 #define MP4_ccmt MP4_ID(0xa9, 'c', 'm', 't')
46 #define MP4_cday MP4_ID(0xa9, 'd', 'a', 'y')
47 #define MP4_disk MP4_ID('d', 'i', 's', 'k')
48 #define MP4_esds MP4_ID('e', 's', 'd', 's')
49 #define MP4_ftyp MP4_ID('f', 't', 'y', 'p')
50 #define MP4_gnre MP4_ID('g', 'n', 'r', 'e')
51 #define MP4_hdlr MP4_ID('h', 'd', 'l', 'r')
52 #define MP4_ilst MP4_ID('i', 'l', 's', 't')
53 #define MP4_M4A MP4_ID('M', '4', 'A', ' ')
54 #define MP4_M4B MP4_ID('M', '4', 'B', ' ')
55 #define MP4_mdat MP4_ID('m', 'd', 'a', 't')
56 #define MP4_mdia MP4_ID('m', 'd', 'i', 'a')
57 #define MP4_mdir MP4_ID('m', 'd', 'i', 'r')
58 #define MP4_meta MP4_ID('m', 'e', 't', 'a')
59 #define MP4_minf MP4_ID('m', 'i', 'n', 'f')
60 #define MP4_moov MP4_ID('m', 'o', 'o', 'v')
61 #define MP4_mp4a MP4_ID('m', 'p', '4', 'a')
62 #define MP4_mp42 MP4_ID('m', 'p', '4', '2')
63 #define MP4_qt MP4_ID('q', 't', ' ', ' ')
64 #define MP4_soun MP4_ID('s', 'o', 'u', 'n')
65 #define MP4_stbl MP4_ID('s', 't', 'b', 'l')
66 #define MP4_stsd MP4_ID('s', 't', 's', 'd')
67 #define MP4_stts MP4_ID('s', 't', 't', 's')
68 #define MP4_trak MP4_ID('t', 'r', 'a', 'k')
69 #define MP4_trkn MP4_ID('t', 'r', 'k', 'n')
70 #define MP4_udta MP4_ID('u', 'd', 't', 'a')
71 #define MP4_extra MP4_ID('-', '-', '-', '-')
73 /* Read the tag data from an MP4 file, storing up to buffer_size bytes in
74 * buffer.
76 static unsigned long read_mp4_tag(int fd, unsigned int size_left, char* buffer,
77 unsigned int buffer_left)
79 unsigned int bytes_read = 0;
81 if (buffer_left == 0)
83 lseek(fd, size_left, SEEK_CUR); /* Skip everything */
85 else
87 /* Skip the data tag header - maybe we should parse it properly? */
88 lseek(fd, 16, SEEK_CUR);
89 size_left -= 16;
91 if (size_left > buffer_left)
93 read(fd, buffer, buffer_left);
94 lseek(fd, size_left - buffer_left, SEEK_CUR);
95 bytes_read = buffer_left;
97 else
99 read(fd, buffer, size_left);
100 bytes_read = size_left;
104 return bytes_read;
107 /* Read a string tag from an MP4 file */
108 static unsigned int read_mp4_tag_string(int fd, int size_left, char** buffer,
109 unsigned int* buffer_left, char** dest)
111 unsigned int bytes_read = read_mp4_tag(fd, size_left, *buffer,
112 *buffer_left > 0 ? *buffer_left - 1 : 0);
113 unsigned int length = 0;
115 if (bytes_read)
117 (*buffer)[bytes_read] = 0;
118 *dest = *buffer;
119 length = strlen(*buffer) + 1;
120 *buffer_left -= length;
121 *buffer += length;
123 else
125 *dest = NULL;
128 return length;
131 static unsigned int read_mp4_atom(int fd, unsigned int* size,
132 unsigned int* type, unsigned int size_left)
134 read_uint32be(fd, size);
135 read_uint32be(fd, type);
137 if (*size == 1)
139 /* FAT32 doesn't support files this big, so something seems to
140 * be wrong. (64-bit sizes should only be used when required.)
142 errno = EFBIG;
143 *type = 0;
144 return 0;
147 if (*size > 0)
149 if (*size > size_left)
151 size_left = 0;
153 else
155 size_left -= *size;
158 *size -= 8;
160 else
162 *size = size_left;
163 size_left = 0;
166 return size_left;
169 static unsigned int read_mp4_length(int fd, unsigned int* size)
171 unsigned int length = 0;
172 int bytes = 0;
173 unsigned char c;
177 read(fd, &c, 1);
178 bytes++;
179 (*size)--;
180 length = (length << 7) | (c & 0x7F);
182 while ((c & 0x80) && (bytes < 4) && (*size > 0));
184 return length;
187 static bool read_mp4_esds(int fd, struct mp3entry* id3,
188 unsigned int* size)
190 unsigned char buf[8];
191 bool sbr = false;
193 lseek(fd, 4, SEEK_CUR); /* Version and flags. */
194 read(fd, buf, 1); /* Verify ES_DescrTag. */
195 *size -= 5;
197 if (*buf == 3)
199 /* read length */
200 if (read_mp4_length(fd, size) < 20)
202 return sbr;
205 lseek(fd, 3, SEEK_CUR);
206 *size -= 3;
208 else
210 lseek(fd, 2, SEEK_CUR);
211 *size -= 2;
214 read(fd, buf, 1); /* Verify DecoderConfigDescrTab. */
215 *size -= 1;
217 if (*buf != 4)
219 return sbr;
222 if (read_mp4_length(fd, size) < 13)
224 return sbr;
227 lseek(fd, 13, SEEK_CUR); /* Skip audio type, bit rates, etc. */
228 read(fd, buf, 1);
229 *size -= 14;
231 if (*buf != 5) /* Verify DecSpecificInfoTag. */
233 return sbr;
237 static const int sample_rates[] =
239 96000, 88200, 64000, 48000, 44100, 32000,
240 24000, 22050, 16000, 12000, 11025, 8000
242 unsigned long bits;
243 unsigned int length;
244 unsigned int index;
245 unsigned int type;
247 /* Read the (leading part of the) decoder config. */
248 length = read_mp4_length(fd, size);
249 length = MIN(length, *size);
250 length = MIN(length, sizeof(buf));
251 memset(buf, 0, sizeof(buf));
252 read(fd, buf, length);
253 *size -= length;
255 /* Maybe time to write a simple read_bits function... */
257 /* Decoder config format:
258 * Object type - 5 bits
259 * Frequency index - 4 bits
260 * Channel configuration - 4 bits
262 bits = get_long_be(buf);
263 type = bits >> 27; /* Object type - 5 bits */
264 index = (bits >> 23) & 0xf; /* Frequency index - 4 bits */
266 if (index < (sizeof(sample_rates) / sizeof(*sample_rates)))
268 id3->frequency = sample_rates[index];
271 if (type == 5)
273 DEBUGF("MP4: SBR\n");
274 unsigned int old_index = index;
276 sbr = true;
277 index = (bits >> 15) & 0xf; /* Frequency index - 4 bits */
279 if (index == 15)
281 /* 17 bits read so far... */
282 bits = get_long_be(&buf[2]);
283 id3->frequency = (bits >> 7) & 0x00ffffff;
285 else if (index < (sizeof(sample_rates) / sizeof(*sample_rates)))
287 id3->frequency = sample_rates[index];
290 if (old_index == index)
292 /* Downsampled SBR */
293 id3->frequency *= 2;
296 /* Skip 13 bits from above, plus 3 bits, then read 11 bits */
297 else if ((length >= 4) && (((bits >> 5) & 0x7ff) == 0x2b7))
299 /* extensionAudioObjectType */
300 DEBUGF("MP4: extensionAudioType\n");
301 type = bits & 0x1f; /* Object type - 5 bits*/
302 bits = get_long_be(&buf[4]);
304 if (type == 5)
306 sbr = bits >> 31;
308 if (sbr)
310 unsigned int old_index = index;
312 /* 1 bit read so far */
313 index = (bits >> 27) & 0xf; /* Frequency index - 4 bits */
315 if (index == 15)
317 /* 5 bits read so far */
318 id3->frequency = (bits >> 3) & 0x00ffffff;
320 else if (index < (sizeof(sample_rates) / sizeof(*sample_rates)))
322 id3->frequency = sample_rates[index];
325 if (old_index == index)
327 /* Downsampled SBR */
328 id3->frequency *= 2;
334 if (!sbr && (id3->frequency <= 24000) && (length <= 2))
336 /* Double the frequency for low-frequency files without a "long"
337 * DecSpecificConfig header. The file may or may not contain SBR,
338 * but here we guess it does if the header is short. This can
339 * fail on some files, but it's the best we can do, short of
340 * decoding (parts of) the file.
342 id3->frequency *= 2;
346 return sbr;
349 static bool read_mp4_tags(int fd, struct mp3entry* id3,
350 unsigned int size_left)
352 unsigned int size;
353 unsigned int type;
354 unsigned int buffer_left = sizeof(id3->id3v2buf) + sizeof(id3->id3v1buf);
355 char* buffer = id3->id3v2buf;
356 bool cwrt = false;
360 size_left = read_mp4_atom(fd, &size, &type, size_left);
362 /* DEBUGF("Tag atom: '%c%c%c%c' (%d bytes left)\n", type >> 24 & 0xff,
363 type >> 16 & 0xff, type >> 8 & 0xff, type & 0xff, size); */
365 switch (type)
367 case MP4_cnam:
368 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
369 &id3->title);
370 break;
372 case MP4_cART:
373 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
374 &id3->artist);
375 break;
377 case MP4_aART:
378 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
379 &id3->albumartist);
380 break;
382 case MP4_cgrp:
383 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
384 &id3->grouping);
385 break;
387 case MP4_calb:
388 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
389 &id3->album);
390 break;
392 case MP4_cwrt:
393 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
394 &id3->composer);
395 cwrt = false;
396 break;
398 case MP4_ccmt:
399 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
400 &id3->comment);
401 break;
403 case MP4_cday:
404 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
405 &id3->year_string);
407 /* Try to parse it as a year, for the benefit of the database.
409 if(id3->year_string)
411 id3->year = atoi(id3->year_string);
412 if (id3->year < 1900)
414 id3->year = 0;
417 else
418 id3->year = 0;
420 break;
422 case MP4_gnre:
424 unsigned short genre;
426 read_mp4_tag(fd, size, (char*) &genre, sizeof(genre));
427 id3->genre_string = id3_get_num_genre(betoh16(genre) - 1);
429 break;
431 case MP4_cgen:
432 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
433 &id3->genre_string);
434 break;
436 case MP4_disk:
438 unsigned short n[2];
440 read_mp4_tag(fd, size, (char*) &n, sizeof(n));
441 id3->discnum = betoh16(n[1]);
443 break;
445 case MP4_trkn:
447 unsigned short n[2];
449 read_mp4_tag(fd, size, (char*) &n, sizeof(n));
450 id3->tracknum = betoh16(n[1]);
452 break;
454 case MP4_extra:
456 char tag_name[TAG_NAME_LENGTH];
457 unsigned int sub_size;
459 /* "mean" atom */
460 read_uint32be(fd, &sub_size);
461 size -= sub_size;
462 lseek(fd, sub_size - 4, SEEK_CUR);
463 /* "name" atom */
464 read_uint32be(fd, &sub_size);
465 size -= sub_size;
466 lseek(fd, 8, SEEK_CUR);
467 sub_size -= 12;
469 if (sub_size > sizeof(tag_name) - 1)
471 read(fd, tag_name, sizeof(tag_name) - 1);
472 lseek(fd, sub_size - (sizeof(tag_name) - 1), SEEK_CUR);
473 tag_name[sizeof(tag_name) - 1] = 0;
475 else
477 read(fd, tag_name, sub_size);
478 tag_name[sub_size] = 0;
481 if ((strcasecmp(tag_name, "composer") == 0) && !cwrt)
483 read_mp4_tag_string(fd, size, &buffer, &buffer_left,
484 &id3->composer);
486 else if (strcasecmp(tag_name, "iTunSMPB") == 0)
488 char value[TAG_VALUE_LENGTH];
489 char* value_p = value;
490 char* any;
491 unsigned int length = sizeof(value);
493 read_mp4_tag_string(fd, size, &value_p, &length, &any);
494 id3->lead_trim = get_itunes_int32(value, 1);
495 id3->tail_trim = get_itunes_int32(value, 2);
496 DEBUGF("AAC: lead_trim %d, tail_trim %d\n",
497 id3->lead_trim, id3->tail_trim);
499 else
501 char* any;
502 unsigned int length = read_mp4_tag_string(fd, size,
503 &buffer, &buffer_left, &any);
505 if (length > 0)
507 /* Re-use the read buffer as the dest buffer... */
508 buffer -= length;
509 buffer_left += length;
511 if (parse_replaygain(tag_name, buffer, id3,
512 buffer, buffer_left) > 0)
514 /* Data used, keep it. */
515 buffer += length;
516 buffer_left -= length;
521 break;
523 default:
524 lseek(fd, size, SEEK_CUR);
525 break;
528 while ((size_left > 0) && (errno == 0));
530 return true;
533 static bool read_mp4_container(int fd, struct mp3entry* id3,
534 unsigned int size_left)
536 unsigned int size;
537 unsigned int type;
538 unsigned int handler = 0;
539 bool rc = true;
543 size_left = read_mp4_atom(fd, &size, &type, size_left);
545 /* DEBUGF("Atom: '%c%c%c%c' (0x%08x, %d bytes left)\n",
546 (type >> 24) & 0xff, (type >> 16) & 0xff, (type >> 8) & 0xff,
547 type & 0xff, type, size); */
549 switch (type)
551 case MP4_ftyp:
553 unsigned int id;
555 read_uint32be(fd, &id);
556 size -= 4;
558 if ((id != MP4_M4A) && (id != MP4_M4B) && (id != MP4_mp42)
559 && (id != MP4_qt) && (id != MP4_3gp6))
561 DEBUGF("Unknown MP4 file type: '%c%c%c%c'\n",
562 id >> 24 & 0xff, id >> 16 & 0xff, id >> 8 & 0xff,
563 id & 0xff);
564 return false;
567 break;
569 case MP4_meta:
570 lseek(fd, 4, SEEK_CUR); /* Skip version */
571 size -= 4;
572 /* Fall through */
574 case MP4_moov:
575 case MP4_udta:
576 case MP4_mdia:
577 case MP4_stbl:
578 case MP4_trak:
579 rc = read_mp4_container(fd, id3, size);
580 size = 0;
581 break;
583 case MP4_ilst:
584 if (handler == MP4_mdir)
586 rc = read_mp4_tags(fd, id3, size);
587 size = 0;
589 break;
591 case MP4_minf:
592 if (handler == MP4_soun)
594 rc = read_mp4_container(fd, id3, size);
595 size = 0;
597 break;
599 case MP4_stsd:
600 lseek(fd, 8, SEEK_CUR);
601 size -= 8;
602 rc = read_mp4_container(fd, id3, size);
603 size = 0;
604 break;
606 case MP4_hdlr:
607 lseek(fd, 8, SEEK_CUR);
608 read_uint32be(fd, &handler);
609 size -= 12;
610 /* DEBUGF(" Handler '%c%c%c%c'\n", handler >> 24 & 0xff,
611 handler >> 16 & 0xff, handler >> 8 & 0xff,handler & 0xff); */
612 break;
614 case MP4_stts:
616 unsigned int entries;
617 unsigned int i;
619 lseek(fd, 4, SEEK_CUR);
620 read_uint32be(fd, &entries);
621 id3->samples = 0;
623 for (i = 0; i < entries; i++)
625 unsigned int n;
626 unsigned int l;
628 read_uint32be(fd, &n);
629 read_uint32be(fd, &l);
630 id3->samples += n * l;
633 size = 0;
635 break;
637 case MP4_mp4a:
638 case MP4_alac:
640 unsigned int frequency;
642 id3->codectype = (type == MP4_mp4a) ? AFMT_AAC : AFMT_ALAC;
643 lseek(fd, 22, SEEK_CUR);
644 read_uint32be(fd, &frequency);
645 size -= 26;
646 id3->frequency = frequency;
648 if (type == MP4_mp4a)
650 unsigned int subsize;
651 unsigned int subtype;
653 /* Get frequency from the decoder info tag, if possible. */
654 lseek(fd, 2, SEEK_CUR);
655 /* The esds atom is a part of the mp4a atom, so ignore
656 * the returned size (it's already accounted for).
658 read_mp4_atom(fd, &subsize, &subtype, size);
659 size -= 10;
661 if (subtype == MP4_esds)
663 read_mp4_esds(fd, id3, &size);
667 break;
669 case MP4_mdat:
670 id3->filesize = size;
671 break;
673 default:
674 break;
677 lseek(fd, size, SEEK_CUR);
679 while (rc && (size_left > 0) && (errno == 0) && (id3->filesize == 0));
680 /* Break on non-zero filesize, since Rockbox currently doesn't support
681 * metadata after the mdat atom (which sets the filesize field).
684 return rc;
687 bool get_mp4_metadata(int fd, struct mp3entry* id3)
689 id3->codectype = AFMT_UNKNOWN;
690 id3->filesize = 0;
691 errno = 0;
693 if (read_mp4_container(fd, id3, filesize(fd)) && (errno == 0)
694 && (id3->samples > 0) && (id3->frequency > 0)
695 && (id3->filesize > 0))
697 if (id3->codectype == AFMT_UNKNOWN)
699 logf("Not an ALAC or AAC file");
700 return false;
703 id3->length = ((int64_t) id3->samples * 1000) / id3->frequency;
705 if (id3->length <= 0)
707 logf("mp4 length invalid!");
708 return false;
711 id3->bitrate = ((int64_t) id3->filesize * 8) / id3->length;
712 DEBUGF("MP4 bitrate %d, frequency %ld Hz, length %ld ms\n",
713 id3->bitrate, id3->frequency, id3->length);
715 else
717 logf("MP4 metadata error");
718 DEBUGF("MP4 metadata error. errno %d, frequency %ld, filesize %ld\n",
719 errno, id3->frequency, id3->filesize);
720 return false;
723 return true;