sd_ass: initialize structs for external tracks properly
[mplayer.git] / libmpdemux / ebml.c
blobfba8177805b6f0fea85ff46cbd74b72505fa6b18
1 /*
2 * native ebml reader for the Matroska demuxer
3 * new parser copyright (c) 2010 Uoti Urpala
4 * copyright (c) 2004 Aurelien Jacobs <aurel@gnuage.org>
5 * based on the one written by Ronald Bultje for gstreamer
7 * This file is part of MPlayer.
9 * MPlayer is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * MPlayer is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include "config.h"
26 #include <stdlib.h>
27 #include <stdbool.h>
28 #include <inttypes.h>
29 #include <stddef.h>
30 #include <assert.h>
32 #include <libavutil/intfloat.h>
33 #include <libavutil/common.h>
34 #include "talloc.h"
35 #include "ebml.h"
36 #include "stream/stream.h"
37 #include "mpbswap.h"
38 #include "mp_msg.h"
40 #ifndef SIZE_MAX
41 #define SIZE_MAX ((size_t)-1)
42 #endif
45 * Read: the element content data ID.
46 * Return: the ID.
48 uint32_t ebml_read_id(stream_t *s, int *length)
50 int i, len_mask = 0x80;
51 uint32_t id;
53 for (i = 0, id = stream_read_char(s); i < 4 && !(id & len_mask); i++)
54 len_mask >>= 1;
55 if (i >= 4)
56 return EBML_ID_INVALID;
57 if (length)
58 *length = i + 1;
59 while (i--)
60 id = (id << 8) | stream_read_char(s);
61 return id;
65 * Read a variable length unsigned int.
67 uint64_t ebml_read_vlen_uint(uint8_t *buffer, int *length)
69 int i, j, num_ffs = 0, len_mask = 0x80;
70 uint64_t num;
72 for (i = 0, num = *buffer++; i < 8 && !(num & len_mask); i++)
73 len_mask >>= 1;
74 if (i >= 8)
75 return EBML_UINT_INVALID;
76 j = i + 1;
77 if (length)
78 *length = j;
79 if ((int) (num &= (len_mask - 1)) == len_mask - 1)
80 num_ffs++;
81 while (i--) {
82 num = (num << 8) | *buffer++;
83 if ((num & 0xFF) == 0xFF)
84 num_ffs++;
86 if (j == num_ffs)
87 return EBML_UINT_INVALID;
88 return num;
92 * Read a variable length signed int.
94 int64_t ebml_read_vlen_int(uint8_t *buffer, int *length)
96 uint64_t unum;
97 int l;
99 /* read as unsigned number first */
100 unum = ebml_read_vlen_uint(buffer, &l);
101 if (unum == EBML_UINT_INVALID)
102 return EBML_INT_INVALID;
103 if (length)
104 *length = l;
106 return unum - ((1 << ((7 * l) - 1)) - 1);
110 * Read: element content length.
112 uint64_t ebml_read_length(stream_t *s, int *length)
114 int i, j, num_ffs = 0, len_mask = 0x80;
115 uint64_t len;
117 for (i = 0, len = stream_read_char(s); i < 8 && !(len & len_mask); i++)
118 len_mask >>= 1;
119 if (i >= 8)
120 return EBML_UINT_INVALID;
121 j = i + 1;
122 if (length)
123 *length = j;
124 if ((int) (len &= (len_mask - 1)) == len_mask - 1)
125 num_ffs++;
126 while (i--) {
127 len = (len << 8) | stream_read_char(s);
128 if ((len & 0xFF) == 0xFF)
129 num_ffs++;
131 if (j == num_ffs)
132 return EBML_UINT_INVALID;
133 if (len >= 1ULL<<63) // Can happen if stream_read_char returns EOF
134 return EBML_UINT_INVALID;
135 return len;
139 * Read the next element as an unsigned int.
141 uint64_t ebml_read_uint(stream_t *s, uint64_t *length)
143 uint64_t len, value = 0;
144 int l;
146 len = ebml_read_length(s, &l);
147 if (len == EBML_UINT_INVALID || len < 1 || len > 8)
148 return EBML_UINT_INVALID;
149 if (length)
150 *length = len + l;
152 while (len--)
153 value = (value << 8) | stream_read_char(s);
155 return value;
159 * Read the next element as a signed int.
161 int64_t ebml_read_int(stream_t *s, uint64_t *length)
163 int64_t value = 0;
164 uint64_t len;
165 int l;
167 len = ebml_read_length(s, &l);
168 if (len == EBML_UINT_INVALID || len < 1 || len > 8)
169 return EBML_INT_INVALID;
170 if (length)
171 *length = len + l;
173 len--;
174 l = stream_read_char(s);
175 if (l & 0x80)
176 value = -1;
177 value = (value << 8) | l;
178 while (len--)
179 value = (value << 8) | stream_read_char(s);
181 return value;
185 * Read the next element as a float.
187 double ebml_read_float(stream_t *s, uint64_t *length)
189 double value;
190 uint64_t len;
191 int l;
193 len = ebml_read_length(s, &l);
194 switch (len) {
195 case 4:
196 value = av_int2float(stream_read_dword(s));
197 break;
199 case 8:
200 value = av_int2double(stream_read_qword(s));
201 break;
203 default:
204 return EBML_FLOAT_INVALID;
207 if (length)
208 *length = len + l;
210 return value;
214 * Read the next element as an ASCII string.
216 char *ebml_read_ascii(stream_t *s, uint64_t *length)
218 uint64_t len;
219 char *str;
220 int l;
222 len = ebml_read_length(s, &l);
223 if (len == EBML_UINT_INVALID)
224 return NULL;
225 if (len > SIZE_MAX - 1)
226 return NULL;
227 if (length)
228 *length = len + l;
230 str = malloc(len + 1);
231 if (stream_read(s, str, len) != (int) len) {
232 free(str);
233 return NULL;
235 str[len] = '\0';
237 return str;
241 * Read the next element as a UTF-8 string.
243 char *ebml_read_utf8(stream_t *s, uint64_t *length)
245 return ebml_read_ascii(s, length);
249 * Skip the next element.
251 int ebml_read_skip(stream_t *s, uint64_t *length)
253 uint64_t len;
254 int l;
256 len = ebml_read_length(s, &l);
257 if (len == EBML_UINT_INVALID)
258 return 1;
259 if (length)
260 *length = len + l;
262 stream_skip(s, len);
264 return 0;
268 * Read the next element, but only the header. The contents
269 * are supposed to be sub-elements which can be read separately.
271 uint32_t ebml_read_master(stream_t *s, uint64_t *length)
273 uint64_t len;
274 uint32_t id;
276 id = ebml_read_id(s, NULL);
277 if (id == EBML_ID_INVALID)
278 return id;
280 len = ebml_read_length(s, NULL);
281 if (len == EBML_UINT_INVALID)
282 return EBML_ID_INVALID;
283 if (length)
284 *length = len;
286 return id;
291 #define EVALARGS(F, ...) F(__VA_ARGS__)
292 #define E(str, N, type) const struct ebml_elem_desc ebml_ ## N ## _desc = { str, type };
293 #define E_SN(str, count, N) const struct ebml_elem_desc ebml_ ## N ## _desc = { str, EBML_TYPE_SUBELEMENTS, sizeof(struct ebml_ ## N), count, (const struct ebml_field_desc[]){
294 #define E_S(str, count) EVALARGS(E_SN, str, count, N)
295 #define FN(id, name, multiple, N) { id, multiple, offsetof(struct ebml_ ## N, name), offsetof(struct ebml_ ## N, n_ ## name), &ebml_##name##_desc},
296 #define F(id, name, multiple) EVALARGS(FN, id, name, multiple, N)
297 #include "ebml_defs.c"
298 #undef EVALARGS
299 #undef SN
300 #undef S
301 #undef FN
302 #undef F
304 // Used to read/write pointers to different struct types
305 struct generic;
306 #define generic_struct struct generic
308 static uint32_t ebml_parse_id(uint8_t *data, int *length)
310 int len = 1;
311 uint32_t id = *data++;
312 for (int len_mask = 0x80; !(id & len_mask); len_mask >>= 1) {
313 len++;
314 if (len > 4) {
315 *length = -1;
316 return EBML_ID_INVALID;
319 *length = len;
320 while (--len)
321 id = (id << 8) | *data++;
322 return id;
325 static uint64_t parse_vlen(uint8_t *data, int *length, bool is_length)
327 uint64_t r = *data++;
328 int len = 1;
329 int len_mask;
330 for (len_mask = 0x80; !(r & len_mask); len_mask >>= 1) {
331 len++;
332 if (len > 8) {
333 *length = -1;
334 return -1;
337 r &= len_mask - 1;
339 int num_allones = 0;
340 if (r == len_mask - 1)
341 num_allones++;
342 for (int i = 1; i < len; i++) {
343 if (*data == 255)
344 num_allones++;
345 r = (r << 8) | *data++;
347 if (is_length && num_allones == len) {
348 // According to Matroska specs this means "unknown length"
349 // Could be supported if there are any actual files using it
350 *length = -1;
351 return -1;
353 *length = len;
354 return r;
357 static uint64_t ebml_parse_length(uint8_t *data, int *length)
359 return parse_vlen(data, length, true);
362 static uint64_t ebml_parse_uint(uint8_t *data, int length)
364 assert(length >= 1 && length <= 8);
365 uint64_t r = 0;
366 while (length--)
367 r = (r << 8) + *data++;
368 return r;
371 static int64_t ebml_parse_sint(uint8_t *data, int length)
373 assert(length >=1 && length <= 8);
374 int64_t r = 0;
375 if (*data & 0x80)
376 r = -1;
377 while (length--)
378 r = (r << 8) | *data++;
379 return r;
382 static double ebml_parse_float(uint8_t *data, int length)
384 assert(length == 4 || length == 8);
385 uint64_t i = ebml_parse_uint(data, length);
386 if (length == 4)
387 return av_int2float(i);
388 else
389 return av_int2double(i);
393 // target must be initialized to zero
394 static void ebml_parse_element(struct ebml_parse_ctx *ctx, void *target,
395 uint8_t *data, int size,
396 const struct ebml_elem_desc *type, int level)
398 assert(type->type == EBML_TYPE_SUBELEMENTS);
399 assert(level < 8);
400 mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Parsing element %s\n",
401 level, " ", type->name);
403 char *s = target;
404 int len;
405 uint8_t *end = data + size;
406 uint8_t *p = data;
407 int num_elems[MAX_EBML_SUBELEMENTS] = {};
408 while (p < end) {
409 uint8_t *startp = p;
410 uint32_t id = ebml_parse_id(p, &len);
411 if (len > end - p)
412 goto past_end_error;
413 if (len < 0) {
414 mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Error parsing subelement "
415 "id\n");
416 goto other_error;
418 p += len;
419 uint64_t length = ebml_parse_length(p, &len);
420 if (len > end - p)
421 goto past_end_error;
422 if (len < 0) {
423 mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Error parsing subelement "
424 "length\n");
425 goto other_error;
427 p += len;
429 int field_idx = -1;
430 for (int i = 0; i < type->field_count; i++)
431 if (type->fields[i].id == id) {
432 field_idx = i;
433 num_elems[i]++;
434 break;
437 if (length > end - p) {
438 if (field_idx >= 0 && type->fields[field_idx].desc->type
439 != EBML_TYPE_SUBELEMENTS) {
440 mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Subelement content goes "
441 "past end of containing element\n");
442 goto other_error;
444 // Try to parse what is possible from inside this partial element
445 ctx->has_errors = true;
446 length = end - p;
448 p += length;
450 continue;
452 past_end_error:
453 mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Subelement headers go "
454 "past end of containing element\n");
455 other_error:
456 ctx->has_errors = true;
457 end = startp;
458 break;
461 for (int i = 0; i < type->field_count; i++)
462 if (num_elems[i] && type->fields[i].multiple) {
463 char *ptr = s + type->fields[i].offset;
464 switch (type->fields[i].desc->type) {
465 case EBML_TYPE_SUBELEMENTS:
466 num_elems[i] = FFMIN(num_elems[i],
467 1000000000 / type->fields[i].desc->size);
468 int size = num_elems[i] * type->fields[i].desc->size;
469 *(generic_struct **) ptr = talloc_zero_size(ctx->talloc_ctx,
470 size);
471 break;
472 case EBML_TYPE_UINT:
473 *(uint64_t **) ptr = talloc_zero_array(ctx->talloc_ctx,
474 uint64_t, num_elems[i]);
475 break;
476 case EBML_TYPE_SINT:
477 *(int64_t **) ptr = talloc_zero_array(ctx->talloc_ctx,
478 int64_t, num_elems[i]);
479 break;
480 case EBML_TYPE_FLOAT:
481 *(double **) ptr = talloc_zero_array(ctx->talloc_ctx,
482 double, num_elems[i]);
483 break;
484 case EBML_TYPE_STR:
485 case EBML_TYPE_BINARY:
486 *(struct bstr **) ptr = talloc_zero_array(ctx->talloc_ctx,
487 struct bstr,
488 num_elems[i]);
489 break;
490 case EBML_TYPE_EBML_ID:
491 *(int32_t **) ptr = talloc_zero_array(ctx->talloc_ctx,
492 uint32_t, num_elems[i]);
493 break;
494 default:
495 abort();
499 while (data < end) {
500 int len;
501 uint32_t id = ebml_parse_id(data, &len);
502 assert(len >= 0 && len <= end - data);
503 data += len;
504 uint64_t length = ebml_parse_length(data, &len);
505 assert(len >= 0 && len <= end - data);
506 data += len;
507 if (length > end - data) {
508 // Try to parse what is possible from inside this partial element
509 length = end - data;
510 mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Next subelement content goes "
511 "past end of containing element, will be truncated\n");
513 int field_idx = -1;
514 for (int i = 0; i < type->field_count; i++)
515 if (type->fields[i].id == id) {
516 field_idx = i;
517 break;
519 if (field_idx < 0) {
520 if (id == 0xec)
521 mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Ignoring Void element "
522 "size: %"PRIu64"\n", level+1, " ", length);
523 else if (id == 0xbf)
524 mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Ignoring CRC-32 "
525 "element size: %"PRIu64"\n", level+1, " ",
526 length);
527 else
528 mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Ignoring unrecognized "
529 "subelement. ID: %x size: %"PRIu64"\n", id, length);
530 data += length;
531 continue;
533 const struct ebml_field_desc *fd = &type->fields[field_idx];
534 const struct ebml_elem_desc *ed = fd->desc;
535 bool multiple = fd->multiple;
536 int *countptr = (int *) (s + fd->count_offset);
537 if (*countptr >= num_elems[field_idx]) {
538 // Shouldn't happen with on any sane file without bugs
539 mp_msg(MSGT_DEMUX, MSGL_ERR, "[mkv] Too many subelems?\n");
540 ctx->has_errors = true;
541 data += length;
542 continue;
544 if (*countptr > 0 && !multiple) {
545 mp_msg(MSGT_DEMUX, MSGL_DBG2, "[mkv] Another subelement of type "
546 "%x %s (size: %"PRIu64"). Only one allowed. Ignoring.\n",
547 id, ed->name, length);
548 ctx->has_errors = true;
549 data += length;
550 continue;
552 mp_msg(MSGT_DEMUX, MSGL_DBG2, "%.*s[mkv] Parsing %x %s size: %"PRIu64
553 " value: ", level+1, " ", id, ed->name, length);
555 char *fieldptr = s + fd->offset;
556 switch (ed->type) {
557 case EBML_TYPE_SUBELEMENTS:
558 mp_msg(MSGT_DEMUX, MSGL_DBG2, "subelements\n");
559 char *subelptr;
560 if (multiple) {
561 char *array_start = (char *) *(generic_struct **) fieldptr;
562 subelptr = array_start + *countptr * ed->size;
563 } else
564 subelptr = fieldptr;
565 ebml_parse_element(ctx, subelptr, data, length, ed, level + 1);
566 break;
568 case EBML_TYPE_UINT:;
569 uint64_t *uintptr;
570 #define GETPTR(subelptr, fieldtype) \
571 if (multiple) \
572 subelptr = *(fieldtype **) fieldptr + *countptr; \
573 else \
574 subelptr = (fieldtype *) fieldptr
575 GETPTR(uintptr, uint64_t);
576 if (length < 1 || length > 8) {
577 mp_msg(MSGT_DEMUX, MSGL_DBG2, "uint invalid length %"PRIu64
578 "\n", length);
579 goto error;
581 *uintptr = ebml_parse_uint(data, length);
582 mp_msg(MSGT_DEMUX, MSGL_DBG2, "uint %"PRIu64"\n", *uintptr);
583 break;
585 case EBML_TYPE_SINT:;
586 int64_t *sintptr;
587 GETPTR(sintptr, int64_t);
588 if (length < 1 || length > 8) {
589 mp_msg(MSGT_DEMUX, MSGL_DBG2, "sint invalid length %"PRIu64
590 "\n", length);
591 goto error;
593 *sintptr = ebml_parse_sint(data, length);
594 mp_msg(MSGT_DEMUX, MSGL_DBG2, "sint %"PRId64"\n", *sintptr);
595 break;
597 case EBML_TYPE_FLOAT:;
598 double *floatptr;
599 GETPTR(floatptr, double);
600 if (length != 4 && length != 8) {
601 mp_msg(MSGT_DEMUX, MSGL_DBG2, "float invalid length %"PRIu64
602 "\n", length);
603 goto error;
605 *floatptr = ebml_parse_float(data, length);
606 mp_msg(MSGT_DEMUX, MSGL_DBG2, "float %f\n", *floatptr);
607 break;
609 case EBML_TYPE_STR:
610 case EBML_TYPE_BINARY:;
611 struct bstr *strptr;
612 GETPTR(strptr, struct bstr);
613 strptr->start = data;
614 strptr->len = length;
615 if (ed->type == EBML_TYPE_STR)
616 mp_msg(MSGT_DEMUX, MSGL_DBG2, "string \"%.*s\"\n",
617 BSTR_P(*strptr));
618 else
619 mp_msg(MSGT_DEMUX, MSGL_DBG2, "binary %zd bytes\n",
620 strptr->len);
621 break;
623 case EBML_TYPE_EBML_ID:;
624 uint32_t *idptr;
625 GETPTR(idptr, uint32_t);
626 *idptr = ebml_parse_id(data, &len);
627 if (len != length) {
628 mp_msg(MSGT_DEMUX, MSGL_DBG2, "ebml_id broken value\n");
629 goto error;
631 mp_msg(MSGT_DEMUX, MSGL_DBG2, "ebml_id %x\n", (unsigned)*idptr);
632 break;
633 default:
634 abort();
636 *countptr += 1;
637 error:
638 data += length;
642 // target must be initialized to zero
643 int ebml_read_element(struct stream *s, struct ebml_parse_ctx *ctx,
644 void *target, const struct ebml_elem_desc *desc)
646 ctx->has_errors = false;
647 int msglevel = ctx->no_error_messages ? MSGL_DBG2 : MSGL_WARN;
648 uint64_t length = ebml_read_length(s, &ctx->bytes_read);
649 if (s->eof) {
650 mp_msg(MSGT_DEMUX, msglevel, "[mkv] Unexpected end of file "
651 "- partial or corrupt file?\n");
652 return -1;
654 if (length > 1000000000) {
655 mp_msg(MSGT_DEMUX, msglevel, "[mkv] Refusing to read element over "
656 "100 MB in size\n");
657 return -1;
659 ctx->talloc_ctx = talloc_size(NULL, length + 8);
660 int read_len = stream_read(s, ctx->talloc_ctx, length);
661 ctx->bytes_read += read_len;
662 if (read_len < length)
663 mp_msg(MSGT_DEMUX, msglevel, "[mkv] Unexpected end of file "
664 "- partial or corrupt file?\n");
665 ebml_parse_element(ctx, target, ctx->talloc_ctx, read_len, desc, 0);
666 if (ctx->has_errors)
667 mp_msg(MSGT_DEMUX, msglevel, "[mkv] Error parsing element %s\n",
668 desc->name);
669 return 0;