2 * Matroska file demuxer
3 * Copyright (c) 2003-2008 The FFmpeg Project
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Matroska file demuxer
25 * by Ronald Bultje <rbultje@ronald.bitfreak.net>
26 * with a little help from Moritz Bunkus <moritz@bunkus.org>
27 * totally reworked by Aurelien Jacobs <aurel@gnuage.org>
28 * Specs available on the Matroska project page: http://www.matroska.org/.
33 /* For codec_get_id(). */
37 #include "libavcodec/mpeg4audio.h"
38 #include "libavutil/intfloat_readwrite.h"
39 #include "libavutil/avstring.h"
40 #include "libavutil/lzo.h"
60 typedef const struct EbmlSyntax
{
69 const struct EbmlSyntax
*n
;
89 uint64_t doctype_version
;
95 } MatroskaTrackCompression
;
100 MatroskaTrackCompression compression
;
101 } MatroskaTrackEncoding
;
105 uint64_t display_width
;
106 uint64_t display_height
;
107 uint64_t pixel_width
;
108 uint64_t pixel_height
;
110 } MatroskaTrackVideo
;
114 double out_samplerate
;
118 /* real audio header (extracted from extradata) */
126 } MatroskaTrackAudio
;
135 uint64_t default_duration
;
136 uint64_t flag_default
;
137 MatroskaTrackVideo video
;
138 MatroskaTrackAudio audio
;
142 int64_t end_timecode
;
149 } MatroskaAttachement
;
185 AVFormatContext
*ctx
;
189 MatroskaLevel levels
[EBML_MAX_DEPTH
];
196 EbmlList attachments
;
202 /* byte position of the segment inside the stream */
203 int64_t segment_start
;
205 /* the packet queue */
213 /* What to skip before effectively reading a packet. */
214 int skip_to_keyframe
;
215 uint64_t skip_to_timecode
;
216 } MatroskaDemuxContext
;
229 static EbmlSyntax ebml_header
[] = {
230 { EBML_ID_EBMLREADVERSION
, EBML_UINT
, 0, offsetof(Ebml
,version
), {.u
=EBML_VERSION
} },
231 { EBML_ID_EBMLMAXSIZELENGTH
, EBML_UINT
, 0, offsetof(Ebml
,max_size
), {.u
=8} },
232 { EBML_ID_EBMLMAXIDLENGTH
, EBML_UINT
, 0, offsetof(Ebml
,id_length
), {.u
=4} },
233 { EBML_ID_DOCTYPE
, EBML_STR
, 0, offsetof(Ebml
,doctype
), {.s
="(none)"} },
234 { EBML_ID_DOCTYPEREADVERSION
, EBML_UINT
, 0, offsetof(Ebml
,doctype_version
), {.u
=1} },
235 { EBML_ID_EBMLVERSION
, EBML_NONE
},
236 { EBML_ID_DOCTYPEVERSION
, EBML_NONE
},
240 static EbmlSyntax ebml_syntax
[] = {
241 { EBML_ID_HEADER
, EBML_NEST
, 0, 0, {.n
=ebml_header
} },
245 static EbmlSyntax matroska_info
[] = {
246 { MATROSKA_ID_TIMECODESCALE
, EBML_UINT
, 0, offsetof(MatroskaDemuxContext
,time_scale
), {.u
=1000000} },
247 { MATROSKA_ID_DURATION
, EBML_FLOAT
, 0, offsetof(MatroskaDemuxContext
,duration
) },
248 { MATROSKA_ID_TITLE
, EBML_UTF8
, 0, offsetof(MatroskaDemuxContext
,title
) },
249 { MATROSKA_ID_WRITINGAPP
, EBML_NONE
},
250 { MATROSKA_ID_MUXINGAPP
, EBML_NONE
},
251 { MATROSKA_ID_DATEUTC
, EBML_NONE
},
252 { MATROSKA_ID_SEGMENTUID
, EBML_NONE
},
256 static EbmlSyntax matroska_track_video
[] = {
257 { MATROSKA_ID_VIDEOFRAMERATE
, EBML_FLOAT
,0, offsetof(MatroskaTrackVideo
,frame_rate
) },
258 { MATROSKA_ID_VIDEODISPLAYWIDTH
, EBML_UINT
, 0, offsetof(MatroskaTrackVideo
,display_width
) },
259 { MATROSKA_ID_VIDEODISPLAYHEIGHT
, EBML_UINT
, 0, offsetof(MatroskaTrackVideo
,display_height
) },
260 { MATROSKA_ID_VIDEOPIXELWIDTH
, EBML_UINT
, 0, offsetof(MatroskaTrackVideo
,pixel_width
) },
261 { MATROSKA_ID_VIDEOPIXELHEIGHT
, EBML_UINT
, 0, offsetof(MatroskaTrackVideo
,pixel_height
) },
262 { MATROSKA_ID_VIDEOCOLORSPACE
, EBML_UINT
, 0, offsetof(MatroskaTrackVideo
,fourcc
) },
263 { MATROSKA_ID_VIDEOPIXELCROPB
, EBML_NONE
},
264 { MATROSKA_ID_VIDEOPIXELCROPT
, EBML_NONE
},
265 { MATROSKA_ID_VIDEOPIXELCROPL
, EBML_NONE
},
266 { MATROSKA_ID_VIDEOPIXELCROPR
, EBML_NONE
},
267 { MATROSKA_ID_VIDEODISPLAYUNIT
, EBML_NONE
},
268 { MATROSKA_ID_VIDEOFLAGINTERLACED
,EBML_NONE
},
269 { MATROSKA_ID_VIDEOSTEREOMODE
, EBML_NONE
},
270 { MATROSKA_ID_VIDEOASPECTRATIO
, EBML_NONE
},
274 static EbmlSyntax matroska_track_audio
[] = {
275 { MATROSKA_ID_AUDIOSAMPLINGFREQ
, EBML_FLOAT
,0, offsetof(MatroskaTrackAudio
,samplerate
), {.f
=8000.0} },
276 { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ
,EBML_FLOAT
,0,offsetof(MatroskaTrackAudio
,out_samplerate
) },
277 { MATROSKA_ID_AUDIOBITDEPTH
, EBML_UINT
, 0, offsetof(MatroskaTrackAudio
,bitdepth
) },
278 { MATROSKA_ID_AUDIOCHANNELS
, EBML_UINT
, 0, offsetof(MatroskaTrackAudio
,channels
), {.u
=1} },
282 static EbmlSyntax matroska_track_encoding_compression
[] = {
283 { MATROSKA_ID_ENCODINGCOMPALGO
, EBML_UINT
, 0, offsetof(MatroskaTrackCompression
,algo
), {.u
=0} },
284 { MATROSKA_ID_ENCODINGCOMPSETTINGS
,EBML_BIN
, 0, offsetof(MatroskaTrackCompression
,settings
) },
288 static EbmlSyntax matroska_track_encoding
[] = {
289 { MATROSKA_ID_ENCODINGSCOPE
, EBML_UINT
, 0, offsetof(MatroskaTrackEncoding
,scope
), {.u
=1} },
290 { MATROSKA_ID_ENCODINGTYPE
, EBML_UINT
, 0, offsetof(MatroskaTrackEncoding
,type
), {.u
=0} },
291 { MATROSKA_ID_ENCODINGCOMPRESSION
,EBML_NEST
, 0, offsetof(MatroskaTrackEncoding
,compression
), {.n
=matroska_track_encoding_compression
} },
292 { MATROSKA_ID_ENCODINGORDER
, EBML_NONE
},
296 static EbmlSyntax matroska_track_encodings
[] = {
297 { MATROSKA_ID_TRACKCONTENTENCODING
, EBML_NEST
, sizeof(MatroskaTrackEncoding
), offsetof(MatroskaTrack
,encodings
), {.n
=matroska_track_encoding
} },
301 static EbmlSyntax matroska_track
[] = {
302 { MATROSKA_ID_TRACKNUMBER
, EBML_UINT
, 0, offsetof(MatroskaTrack
,num
) },
303 { MATROSKA_ID_TRACKTYPE
, EBML_UINT
, 0, offsetof(MatroskaTrack
,type
) },
304 { MATROSKA_ID_CODECID
, EBML_STR
, 0, offsetof(MatroskaTrack
,codec_id
) },
305 { MATROSKA_ID_CODECPRIVATE
, EBML_BIN
, 0, offsetof(MatroskaTrack
,codec_priv
) },
306 { MATROSKA_ID_TRACKLANGUAGE
, EBML_UTF8
, 0, offsetof(MatroskaTrack
,language
), {.s
="eng"} },
307 { MATROSKA_ID_TRACKDEFAULTDURATION
, EBML_UINT
, 0, offsetof(MatroskaTrack
,default_duration
) },
308 { MATROSKA_ID_TRACKTIMECODESCALE
, EBML_FLOAT
,0, offsetof(MatroskaTrack
,time_scale
), {.f
=1.0} },
309 { MATROSKA_ID_TRACKFLAGDEFAULT
, EBML_UINT
, 0, offsetof(MatroskaTrack
,flag_default
), {.u
=1} },
310 { MATROSKA_ID_TRACKVIDEO
, EBML_NEST
, 0, offsetof(MatroskaTrack
,video
), {.n
=matroska_track_video
} },
311 { MATROSKA_ID_TRACKAUDIO
, EBML_NEST
, 0, offsetof(MatroskaTrack
,audio
), {.n
=matroska_track_audio
} },
312 { MATROSKA_ID_TRACKCONTENTENCODINGS
,EBML_NEST
, 0, 0, {.n
=matroska_track_encodings
} },
313 { MATROSKA_ID_TRACKUID
, EBML_NONE
},
314 { MATROSKA_ID_TRACKNAME
, EBML_NONE
},
315 { MATROSKA_ID_TRACKFLAGENABLED
, EBML_NONE
},
316 { MATROSKA_ID_TRACKFLAGFORCED
, EBML_NONE
},
317 { MATROSKA_ID_TRACKFLAGLACING
, EBML_NONE
},
318 { MATROSKA_ID_CODECNAME
, EBML_NONE
},
319 { MATROSKA_ID_CODECDECODEALL
, EBML_NONE
},
320 { MATROSKA_ID_CODECINFOURL
, EBML_NONE
},
321 { MATROSKA_ID_CODECDOWNLOADURL
, EBML_NONE
},
322 { MATROSKA_ID_TRACKMINCACHE
, EBML_NONE
},
323 { MATROSKA_ID_TRACKMAXCACHE
, EBML_NONE
},
324 { MATROSKA_ID_TRACKMAXBLKADDID
, EBML_NONE
},
328 static EbmlSyntax matroska_tracks
[] = {
329 { MATROSKA_ID_TRACKENTRY
, EBML_NEST
, sizeof(MatroskaTrack
), offsetof(MatroskaDemuxContext
,tracks
), {.n
=matroska_track
} },
333 static EbmlSyntax matroska_attachment
[] = {
334 { MATROSKA_ID_FILENAME
, EBML_UTF8
, 0, offsetof(MatroskaAttachement
,filename
) },
335 { MATROSKA_ID_FILEMIMETYPE
, EBML_STR
, 0, offsetof(MatroskaAttachement
,mime
) },
336 { MATROSKA_ID_FILEDATA
, EBML_BIN
, 0, offsetof(MatroskaAttachement
,bin
) },
337 { MATROSKA_ID_FILEDESC
, EBML_NONE
},
338 { MATROSKA_ID_FILEUID
, EBML_NONE
},
342 static EbmlSyntax matroska_attachments
[] = {
343 { MATROSKA_ID_ATTACHEDFILE
, EBML_NEST
, sizeof(MatroskaAttachement
), offsetof(MatroskaDemuxContext
,attachments
), {.n
=matroska_attachment
} },
347 static EbmlSyntax matroska_chapter_display
[] = {
348 { MATROSKA_ID_CHAPSTRING
, EBML_UTF8
, 0, offsetof(MatroskaChapter
,title
) },
349 { MATROSKA_ID_CHAPLANG
, EBML_NONE
},
353 static EbmlSyntax matroska_chapter_entry
[] = {
354 { MATROSKA_ID_CHAPTERTIMESTART
, EBML_UINT
, 0, offsetof(MatroskaChapter
,start
), {.u
=AV_NOPTS_VALUE
} },
355 { MATROSKA_ID_CHAPTERTIMEEND
, EBML_UINT
, 0, offsetof(MatroskaChapter
,end
), {.u
=AV_NOPTS_VALUE
} },
356 { MATROSKA_ID_CHAPTERUID
, EBML_UINT
, 0, offsetof(MatroskaChapter
,uid
) },
357 { MATROSKA_ID_CHAPTERDISPLAY
, EBML_NEST
, 0, 0, {.n
=matroska_chapter_display
} },
358 { MATROSKA_ID_CHAPTERFLAGHIDDEN
, EBML_NONE
},
359 { MATROSKA_ID_CHAPTERFLAGENABLED
, EBML_NONE
},
360 { MATROSKA_ID_CHAPTERPHYSEQUIV
, EBML_NONE
},
361 { MATROSKA_ID_CHAPTERATOM
, EBML_NONE
},
365 static EbmlSyntax matroska_chapter
[] = {
366 { MATROSKA_ID_CHAPTERATOM
, EBML_NEST
, sizeof(MatroskaChapter
), offsetof(MatroskaDemuxContext
,chapters
), {.n
=matroska_chapter_entry
} },
367 { MATROSKA_ID_EDITIONUID
, EBML_NONE
},
368 { MATROSKA_ID_EDITIONFLAGHIDDEN
, EBML_NONE
},
369 { MATROSKA_ID_EDITIONFLAGDEFAULT
, EBML_NONE
},
370 { MATROSKA_ID_EDITIONFLAGORDERED
, EBML_NONE
},
374 static EbmlSyntax matroska_chapters
[] = {
375 { MATROSKA_ID_EDITIONENTRY
, EBML_NEST
, 0, 0, {.n
=matroska_chapter
} },
379 static EbmlSyntax matroska_index_pos
[] = {
380 { MATROSKA_ID_CUETRACK
, EBML_UINT
, 0, offsetof(MatroskaIndexPos
,track
) },
381 { MATROSKA_ID_CUECLUSTERPOSITION
, EBML_UINT
, 0, offsetof(MatroskaIndexPos
,pos
) },
382 { MATROSKA_ID_CUEBLOCKNUMBER
, EBML_NONE
},
386 static EbmlSyntax matroska_index_entry
[] = {
387 { MATROSKA_ID_CUETIME
, EBML_UINT
, 0, offsetof(MatroskaIndex
,time
) },
388 { MATROSKA_ID_CUETRACKPOSITION
, EBML_NEST
, sizeof(MatroskaIndexPos
), offsetof(MatroskaIndex
,pos
), {.n
=matroska_index_pos
} },
392 static EbmlSyntax matroska_index
[] = {
393 { MATROSKA_ID_POINTENTRY
, EBML_NEST
, sizeof(MatroskaIndex
), offsetof(MatroskaDemuxContext
,index
), {.n
=matroska_index_entry
} },
397 static EbmlSyntax matroska_simpletag
[] = {
398 { MATROSKA_ID_TAGNAME
, EBML_UTF8
, 0, offsetof(MatroskaTag
,name
) },
399 { MATROSKA_ID_TAGSTRING
, EBML_UTF8
, 0, offsetof(MatroskaTag
,string
) },
400 { MATROSKA_ID_SIMPLETAG
, EBML_NEST
, sizeof(MatroskaTag
), offsetof(MatroskaTag
,sub
), {.n
=matroska_simpletag
} },
401 { MATROSKA_ID_TAGLANG
, EBML_NONE
},
402 { MATROSKA_ID_TAGDEFAULT
, EBML_NONE
},
406 static EbmlSyntax matroska_tag
[] = {
407 { MATROSKA_ID_SIMPLETAG
, EBML_NEST
, sizeof(MatroskaTag
), 0, {.n
=matroska_simpletag
} },
408 { MATROSKA_ID_TAGTARGETS
, EBML_NONE
},
412 static EbmlSyntax matroska_tags
[] = {
413 { MATROSKA_ID_TAG
, EBML_NEST
, 0, offsetof(MatroskaDemuxContext
,tags
), {.n
=matroska_tag
} },
417 static EbmlSyntax matroska_seekhead_entry
[] = {
418 { MATROSKA_ID_SEEKID
, EBML_UINT
, 0, offsetof(MatroskaSeekhead
,id
) },
419 { MATROSKA_ID_SEEKPOSITION
, EBML_UINT
, 0, offsetof(MatroskaSeekhead
,pos
), {.u
=-1} },
423 static EbmlSyntax matroska_seekhead
[] = {
424 { MATROSKA_ID_SEEKENTRY
, EBML_NEST
, sizeof(MatroskaSeekhead
), offsetof(MatroskaDemuxContext
,seekhead
), {.n
=matroska_seekhead_entry
} },
428 static EbmlSyntax matroska_segment
[] = {
429 { MATROSKA_ID_INFO
, EBML_NEST
, 0, 0, {.n
=matroska_info
} },
430 { MATROSKA_ID_TRACKS
, EBML_NEST
, 0, 0, {.n
=matroska_tracks
} },
431 { MATROSKA_ID_ATTACHMENTS
, EBML_NEST
, 0, 0, {.n
=matroska_attachments
} },
432 { MATROSKA_ID_CHAPTERS
, EBML_NEST
, 0, 0, {.n
=matroska_chapters
} },
433 { MATROSKA_ID_CUES
, EBML_NEST
, 0, 0, {.n
=matroska_index
} },
434 { MATROSKA_ID_TAGS
, EBML_NEST
, 0, 0, {.n
=matroska_tags
} },
435 { MATROSKA_ID_SEEKHEAD
, EBML_NEST
, 0, 0, {.n
=matroska_seekhead
} },
436 { MATROSKA_ID_CLUSTER
, EBML_STOP
, 0, offsetof(MatroskaDemuxContext
,has_cluster_id
) },
440 static EbmlSyntax matroska_segments
[] = {
441 { MATROSKA_ID_SEGMENT
, EBML_NEST
, 0, 0, {.n
=matroska_segment
} },
445 static EbmlSyntax matroska_blockgroup
[] = {
446 { MATROSKA_ID_BLOCK
, EBML_BIN
, 0, offsetof(MatroskaBlock
,bin
) },
447 { MATROSKA_ID_SIMPLEBLOCK
, EBML_BIN
, 0, offsetof(MatroskaBlock
,bin
) },
448 { MATROSKA_ID_BLOCKDURATION
, EBML_UINT
, 0, offsetof(MatroskaBlock
,duration
), {.u
=AV_NOPTS_VALUE
} },
449 { MATROSKA_ID_BLOCKREFERENCE
, EBML_UINT
, 0, offsetof(MatroskaBlock
,reference
) },
453 static EbmlSyntax matroska_cluster
[] = {
454 { MATROSKA_ID_CLUSTERTIMECODE
,EBML_UINT
,0, offsetof(MatroskaCluster
,timecode
) },
455 { MATROSKA_ID_BLOCKGROUP
, EBML_NEST
, sizeof(MatroskaBlock
), offsetof(MatroskaCluster
,blocks
), {.n
=matroska_blockgroup
} },
456 { MATROSKA_ID_SIMPLEBLOCK
, EBML_PASS
, sizeof(MatroskaBlock
), offsetof(MatroskaCluster
,blocks
), {.n
=matroska_blockgroup
} },
457 { MATROSKA_ID_CLUSTERPOSITION
,EBML_NONE
},
458 { MATROSKA_ID_CLUSTERPREVSIZE
,EBML_NONE
},
462 static EbmlSyntax matroska_clusters
[] = {
463 { MATROSKA_ID_CLUSTER
, EBML_NEST
, 0, 0, {.n
=matroska_cluster
} },
464 { MATROSKA_ID_INFO
, EBML_NONE
},
465 { MATROSKA_ID_CUES
, EBML_NONE
},
466 { MATROSKA_ID_TAGS
, EBML_NONE
},
467 { MATROSKA_ID_SEEKHEAD
, EBML_NONE
},
471 #define SIZE_OFF(x) sizeof(((AVFormatContext*)0)->x),offsetof(AVFormatContext,x)
477 { "TITLE", SIZE_OFF(title
) },
478 { "ARTIST", SIZE_OFF(author
) },
479 { "WRITTEN_BY", SIZE_OFF(author
) },
480 { "LEAD_PERFORMER", SIZE_OFF(author
) },
481 { "COPYRIGHT", SIZE_OFF(copyright
) },
482 { "COMMENT", SIZE_OFF(comment
) },
483 { "ALBUM", SIZE_OFF(album
) },
484 { "DATE_WRITTEN", SIZE_OFF(year
) },
485 { "DATE_RELEASED", SIZE_OFF(year
) },
486 { "PART_NUMBER", SIZE_OFF(track
) },
487 { "GENRE", SIZE_OFF(genre
) },
491 * Return: Whether we reached the end of a level in the hierarchy or not.
493 static int ebml_level_end(MatroskaDemuxContext
*matroska
)
495 ByteIOContext
*pb
= matroska
->ctx
->pb
;
496 int64_t pos
= url_ftell(pb
);
498 if (matroska
->num_levels
> 0) {
499 MatroskaLevel
*level
= &matroska
->levels
[matroska
->num_levels
- 1];
500 if (pos
- level
->start
>= level
->length
) {
501 matroska
->num_levels
--;
509 * Read: an "EBML number", which is defined as a variable-length
510 * array of bytes. The first byte indicates the length by giving a
511 * number of 0-bits followed by a one. The position of the first
512 * "one" bit inside the first byte indicates the length of this
514 * Returns: number of bytes read, < 0 on error
516 static int ebml_read_num(MatroskaDemuxContext
*matroska
, ByteIOContext
*pb
,
517 int max_size
, uint64_t *number
)
519 int len_mask
= 0x80, read
= 1, n
= 1;
522 /* The first byte tells us the length in bytes - get_byte() can normally
523 * return 0, but since that's not a valid first ebmlID byte, we can
524 * use it safely here to catch EOS. */
525 if (!(total
= get_byte(pb
))) {
526 /* we might encounter EOS here */
528 int64_t pos
= url_ftell(pb
);
529 av_log(matroska
->ctx
, AV_LOG_ERROR
,
530 "Read error at pos. %"PRIu64
" (0x%"PRIx64
")\n",
533 return AVERROR(EIO
); /* EOS or actual I/O error */
536 /* get the length of the EBML number */
537 while (read
<= max_size
&& !(total
& len_mask
)) {
541 if (read
> max_size
) {
542 int64_t pos
= url_ftell(pb
) - 1;
543 av_log(matroska
->ctx
, AV_LOG_ERROR
,
544 "Invalid EBML number size tag 0x%02x at pos %"PRIu64
" (0x%"PRIx64
")\n",
545 (uint8_t) total
, pos
, pos
);
546 return AVERROR_INVALIDDATA
;
549 /* read out length */
552 total
= (total
<< 8) | get_byte(pb
);
560 * Read the next element as an unsigned int.
561 * 0 is success, < 0 is failure.
563 static int ebml_read_uint(ByteIOContext
*pb
, int size
, uint64_t *num
)
567 if (size
< 1 || size
> 8)
568 return AVERROR_INVALIDDATA
;
570 /* big-endian ordering; build up number */
573 *num
= (*num
<< 8) | get_byte(pb
);
579 * Read the next element as a float.
580 * 0 is success, < 0 is failure.
582 static int ebml_read_float(ByteIOContext
*pb
, int size
, double *num
)
585 *num
= av_int2flt(get_be32(pb
));
587 *num
= av_int2dbl(get_be64(pb
));
589 return AVERROR_INVALIDDATA
;
595 * Read the next element as an ASCII string.
596 * 0 is success, < 0 is failure.
598 static int ebml_read_ascii(ByteIOContext
*pb
, int size
, char **str
)
601 /* EBML strings are usually not 0-terminated, so we allocate one
602 * byte more, read the string and NULL-terminate it ourselves. */
603 if (!(*str
= av_malloc(size
+ 1)))
604 return AVERROR(ENOMEM
);
605 if (get_buffer(pb
, (uint8_t *) *str
, size
) != size
) {
615 * Read the next element as binary data.
616 * 0 is success, < 0 is failure.
618 static int ebml_read_binary(ByteIOContext
*pb
, int length
, EbmlBin
*bin
)
621 if (!(bin
->data
= av_malloc(length
)))
622 return AVERROR(ENOMEM
);
625 bin
->pos
= url_ftell(pb
);
626 if (get_buffer(pb
, bin
->data
, length
) != length
)
633 * Read the next element, but only the header. The contents
634 * are supposed to be sub-elements which can be read separately.
635 * 0 is success, < 0 is failure.
637 static int ebml_read_master(MatroskaDemuxContext
*matroska
, int length
)
639 ByteIOContext
*pb
= matroska
->ctx
->pb
;
640 MatroskaLevel
*level
;
642 if (matroska
->num_levels
>= EBML_MAX_DEPTH
) {
643 av_log(matroska
->ctx
, AV_LOG_ERROR
,
644 "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH
);
645 return AVERROR(ENOSYS
);
648 level
= &matroska
->levels
[matroska
->num_levels
++];
649 level
->start
= url_ftell(pb
);
650 level
->length
= length
;
656 * Read signed/unsigned "EBML" numbers.
657 * Return: number of bytes processed, < 0 on error
659 static int matroska_ebmlnum_uint(MatroskaDemuxContext
*matroska
,
660 uint8_t *data
, uint32_t size
, uint64_t *num
)
663 init_put_byte(&pb
, data
, size
, 0, NULL
, NULL
, NULL
, NULL
);
664 return ebml_read_num(matroska
, &pb
, 8, num
);
668 * Same as above, but signed.
670 static int matroska_ebmlnum_sint(MatroskaDemuxContext
*matroska
,
671 uint8_t *data
, uint32_t size
, int64_t *num
)
676 /* read as unsigned number first */
677 if ((res
= matroska_ebmlnum_uint(matroska
, data
, size
, &unum
)) < 0)
680 /* make signed (weird way) */
681 *num
= unum
- ((1LL << (7*res
- 1)) - 1);
686 static int ebml_parse_elem(MatroskaDemuxContext
*matroska
,
687 EbmlSyntax
*syntax
, void *data
);
689 static int ebml_parse_id(MatroskaDemuxContext
*matroska
, EbmlSyntax
*syntax
,
690 uint32_t id
, void *data
)
693 for (i
=0; syntax
[i
].id
; i
++)
694 if (id
== syntax
[i
].id
)
696 if (!syntax
[i
].id
&& id
!= EBML_ID_VOID
&& id
!= EBML_ID_CRC32
)
697 av_log(matroska
->ctx
, AV_LOG_INFO
, "Unknown entry 0x%X\n", id
);
698 return ebml_parse_elem(matroska
, &syntax
[i
], data
);
701 static int ebml_parse(MatroskaDemuxContext
*matroska
, EbmlSyntax
*syntax
,
705 int res
= ebml_read_num(matroska
, matroska
->ctx
->pb
, 4, &id
);
707 return res
< 0 ? res
: ebml_parse_id(matroska
, syntax
, id
, data
);
710 static int ebml_parse_nest(MatroskaDemuxContext
*matroska
, EbmlSyntax
*syntax
,
715 for (i
=0; syntax
[i
].id
; i
++)
716 switch (syntax
[i
].type
) {
718 *(uint64_t *)((char *)data
+syntax
[i
].data_offset
) = syntax
[i
].def
.u
;
721 *(double *)((char *)data
+syntax
[i
].data_offset
) = syntax
[i
].def
.f
;
725 *(char **)((char *)data
+syntax
[i
].data_offset
) = av_strdup(syntax
[i
].def
.s
);
729 while (!res
&& !ebml_level_end(matroska
))
730 res
= ebml_parse(matroska
, syntax
, data
);
735 static int ebml_parse_elem(MatroskaDemuxContext
*matroska
,
736 EbmlSyntax
*syntax
, void *data
)
738 ByteIOContext
*pb
= matroska
->ctx
->pb
;
739 uint32_t id
= syntax
->id
;
743 data
= (char *)data
+ syntax
->data_offset
;
744 if (syntax
->list_elem_size
) {
745 EbmlList
*list
= data
;
746 list
->elem
= av_realloc(list
->elem
, (list
->nb_elem
+1)*syntax
->list_elem_size
);
747 data
= (char*)list
->elem
+ list
->nb_elem
*syntax
->list_elem_size
;
748 memset(data
, 0, syntax
->list_elem_size
);
752 if (syntax
->type
!= EBML_PASS
&& syntax
->type
!= EBML_STOP
)
753 if ((res
= ebml_read_num(matroska
, pb
, 8, &length
)) < 0)
756 switch (syntax
->type
) {
757 case EBML_UINT
: res
= ebml_read_uint (pb
, length
, data
); break;
758 case EBML_FLOAT
: res
= ebml_read_float (pb
, length
, data
); break;
760 case EBML_UTF8
: res
= ebml_read_ascii (pb
, length
, data
); break;
761 case EBML_BIN
: res
= ebml_read_binary(pb
, length
, data
); break;
762 case EBML_NEST
: if ((res
=ebml_read_master(matroska
, length
)) < 0)
764 if (id
== MATROSKA_ID_SEGMENT
)
765 matroska
->segment_start
= url_ftell(matroska
->ctx
->pb
);
766 return ebml_parse_nest(matroska
, syntax
->def
.n
, data
);
767 case EBML_PASS
: return ebml_parse_id(matroska
, syntax
->def
.n
, id
, data
);
768 case EBML_STOP
: *(int *)data
= 1; return 1;
769 default: return url_fseek(pb
,length
,SEEK_CUR
)<0 ? AVERROR(EIO
) : 0;
771 if (res
== AVERROR_INVALIDDATA
)
772 av_log(matroska
->ctx
, AV_LOG_ERROR
, "Invalid element\n");
773 else if (res
== AVERROR(EIO
))
774 av_log(matroska
->ctx
, AV_LOG_ERROR
, "Read error\n");
778 static void ebml_free(EbmlSyntax
*syntax
, void *data
)
781 for (i
=0; syntax
[i
].id
; i
++) {
782 void *data_off
= (char *)data
+ syntax
[i
].data_offset
;
783 switch (syntax
[i
].type
) {
785 case EBML_UTF8
: av_freep(data_off
); break;
786 case EBML_BIN
: av_freep(&((EbmlBin
*)data_off
)->data
); break;
788 if (syntax
[i
].list_elem_size
) {
789 EbmlList
*list
= data_off
;
790 char *ptr
= list
->elem
;
791 for (j
=0; j
<list
->nb_elem
; j
++, ptr
+=syntax
[i
].list_elem_size
)
792 ebml_free(syntax
[i
].def
.n
, ptr
);
795 ebml_free(syntax
[i
].def
.n
, data_off
);
805 static int matroska_probe(AVProbeData
*p
)
808 int len_mask
= 0x80, size
= 1, n
= 1;
809 static const char probe_data
[] = "matroska";
812 if (AV_RB32(p
->buf
) != EBML_ID_HEADER
)
815 /* length of header */
817 while (size
<= 8 && !(total
& len_mask
)) {
823 total
&= (len_mask
- 1);
825 total
= (total
<< 8) | p
->buf
[4 + n
++];
827 /* Does the probe data contain the whole header? */
828 if (p
->buf_size
< 4 + size
+ total
)
831 /* The header must contain the document type 'matroska'. For now,
832 * we don't parse the whole header but simply check for the
833 * availability of that array of characters inside the header.
834 * Not fully fool-proof, but good enough. */
835 for (n
= 4+size
; n
<= 4+size
+total
-(sizeof(probe_data
)-1); n
++)
836 if (!memcmp(p
->buf
+n
, probe_data
, sizeof(probe_data
)-1))
837 return AVPROBE_SCORE_MAX
;
842 static MatroskaTrack
*matroska_find_track_by_num(MatroskaDemuxContext
*matroska
,
845 MatroskaTrack
*tracks
= matroska
->tracks
.elem
;
848 for (i
=0; i
< matroska
->tracks
.nb_elem
; i
++)
849 if (tracks
[i
].num
== num
)
852 av_log(matroska
->ctx
, AV_LOG_ERROR
, "Invalid track number %d\n", num
);
856 static int matroska_decode_buffer(uint8_t** buf
, int* buf_size
,
857 MatroskaTrack
*track
)
859 MatroskaTrackEncoding
*encodings
= track
->encodings
.elem
;
860 uint8_t* data
= *buf
;
861 int isize
= *buf_size
;
862 uint8_t* pkt_data
= NULL
;
863 int pkt_size
= isize
;
867 switch (encodings
[0].compression
.algo
) {
868 case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP
:
869 return encodings
[0].compression
.settings
.size
;
870 case MATROSKA_TRACK_ENCODING_COMP_LZO
:
872 olen
= pkt_size
*= 3;
873 pkt_data
= av_realloc(pkt_data
,
874 pkt_size
+LZO_OUTPUT_PADDING
);
875 result
= lzo1x_decode(pkt_data
, &olen
, data
, &isize
);
876 } while (result
==LZO_OUTPUT_FULL
&& pkt_size
<10000000);
882 case MATROSKA_TRACK_ENCODING_COMP_ZLIB
: {
883 z_stream zstream
= {0};
884 if (inflateInit(&zstream
) != Z_OK
)
886 zstream
.next_in
= data
;
887 zstream
.avail_in
= isize
;
890 pkt_data
= av_realloc(pkt_data
, pkt_size
);
891 zstream
.avail_out
= pkt_size
- zstream
.total_out
;
892 zstream
.next_out
= pkt_data
+ zstream
.total_out
;
893 result
= inflate(&zstream
, Z_NO_FLUSH
);
894 } while (result
==Z_OK
&& pkt_size
<10000000);
895 pkt_size
= zstream
.total_out
;
896 inflateEnd(&zstream
);
897 if (result
!= Z_STREAM_END
)
903 case MATROSKA_TRACK_ENCODING_COMP_BZLIB
: {
904 bz_stream bzstream
= {0};
905 if (BZ2_bzDecompressInit(&bzstream
, 0, 0) != BZ_OK
)
907 bzstream
.next_in
= data
;
908 bzstream
.avail_in
= isize
;
911 pkt_data
= av_realloc(pkt_data
, pkt_size
);
912 bzstream
.avail_out
= pkt_size
- bzstream
.total_out_lo32
;
913 bzstream
.next_out
= pkt_data
+ bzstream
.total_out_lo32
;
914 result
= BZ2_bzDecompress(&bzstream
);
915 } while (result
==BZ_OK
&& pkt_size
<10000000);
916 pkt_size
= bzstream
.total_out_lo32
;
917 BZ2_bzDecompressEnd(&bzstream
);
918 if (result
!= BZ_STREAM_END
)
928 *buf_size
= pkt_size
;
935 static void matroska_fix_ass_packet(MatroskaDemuxContext
*matroska
,
936 AVPacket
*pkt
, uint64_t display_duration
)
938 char *line
, *layer
, *ptr
= pkt
->data
, *end
= ptr
+pkt
->size
;
939 for (; *ptr
!=',' && ptr
<end
-1; ptr
++);
942 for (; *ptr
!=',' && ptr
<end
-1; ptr
++);
944 int64_t end_pts
= pkt
->pts
+ display_duration
;
945 int sc
= matroska
->time_scale
* pkt
->pts
/ 10000000;
946 int ec
= matroska
->time_scale
* end_pts
/ 10000000;
947 int sh
, sm
, ss
, eh
, em
, es
, len
;
948 sh
= sc
/360000; sc
-= 360000*sh
;
949 sm
= sc
/ 6000; sc
-= 6000*sm
;
950 ss
= sc
/ 100; sc
-= 100*ss
;
951 eh
= ec
/360000; ec
-= 360000*eh
;
952 em
= ec
/ 6000; ec
-= 6000*em
;
953 es
= ec
/ 100; ec
-= 100*es
;
955 len
= 50 + end
-ptr
+ FF_INPUT_BUFFER_PADDING_SIZE
;
956 if (!(line
= av_malloc(len
)))
958 snprintf(line
,len
,"Dialogue: %s,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
959 layer
, sh
, sm
, ss
, sc
, eh
, em
, es
, ec
, ptr
);
962 pkt
->size
= strlen(line
);
966 static void matroska_merge_packets(AVPacket
*out
, AVPacket
*in
)
968 out
->data
= av_realloc(out
->data
, out
->size
+in
->size
);
969 memcpy(out
->data
+out
->size
, in
->data
, in
->size
);
970 out
->size
+= in
->size
;
971 av_destruct_packet(in
);
975 static void matroska_convert_tags(AVFormatContext
*s
, EbmlList
*list
)
977 MatroskaTag
*tags
= list
->elem
;
980 for (i
=0; i
< list
->nb_elem
; i
++) {
981 for (j
=0; j
< FF_ARRAY_ELEMS(metadata
); j
++){
982 if (!strcmp(tags
[i
].name
, metadata
[j
].name
)) {
983 int *ptr
= (int *)((char *)s
+ metadata
[j
].offset
);
985 if (metadata
[j
].size
> sizeof(int))
986 av_strlcpy((char *)ptr
, tags
[i
].string
, metadata
[j
].size
);
988 *ptr
= atoi(tags
[i
].string
);
991 if (tags
[i
].sub
.nb_elem
)
992 matroska_convert_tags(s
, &tags
[i
].sub
);
996 static void matroska_execute_seekhead(MatroskaDemuxContext
*matroska
)
998 EbmlList
*seekhead_list
= &matroska
->seekhead
;
999 MatroskaSeekhead
*seekhead
= seekhead_list
->elem
;
1000 uint32_t level_up
= matroska
->level_up
;
1001 int64_t before_pos
= url_ftell(matroska
->ctx
->pb
);
1002 MatroskaLevel level
;
1005 for (i
=0; i
<seekhead_list
->nb_elem
; i
++) {
1006 int64_t offset
= seekhead
[i
].pos
+ matroska
->segment_start
;
1008 if (seekhead
[i
].pos
<= before_pos
1009 || seekhead
[i
].id
== MATROSKA_ID_SEEKHEAD
1010 || seekhead
[i
].id
== MATROSKA_ID_CLUSTER
)
1014 if (url_fseek(matroska
->ctx
->pb
, offset
, SEEK_SET
) != offset
)
1017 /* We don't want to lose our seekhead level, so we add
1018 * a dummy. This is a crude hack. */
1019 if (matroska
->num_levels
== EBML_MAX_DEPTH
) {
1020 av_log(matroska
->ctx
, AV_LOG_INFO
,
1021 "Max EBML element depth (%d) reached, "
1022 "cannot parse further.\n", EBML_MAX_DEPTH
);
1027 level
.length
= (uint64_t)-1;
1028 matroska
->levels
[matroska
->num_levels
] = level
;
1029 matroska
->num_levels
++;
1031 ebml_parse(matroska
, matroska_segment
, matroska
);
1033 /* remove dummy level */
1034 while (matroska
->num_levels
) {
1035 uint64_t length
= matroska
->levels
[--matroska
->num_levels
].length
;
1036 if (length
== (uint64_t)-1)
1042 url_fseek(matroska
->ctx
->pb
, before_pos
, SEEK_SET
);
1043 matroska
->level_up
= level_up
;
1046 static int matroska_aac_profile(char *codec_id
)
1048 static const char * const aac_profiles
[] = { "MAIN", "LC", "SSR" };
1051 for (profile
=0; profile
<FF_ARRAY_ELEMS(aac_profiles
); profile
++)
1052 if (strstr(codec_id
, aac_profiles
[profile
]))
1057 static int matroska_aac_sri(int samplerate
)
1061 for (sri
=0; sri
<FF_ARRAY_ELEMS(ff_mpeg4audio_sample_rates
); sri
++)
1062 if (ff_mpeg4audio_sample_rates
[sri
] == samplerate
)
1067 static int matroska_read_header(AVFormatContext
*s
, AVFormatParameters
*ap
)
1069 MatroskaDemuxContext
*matroska
= s
->priv_data
;
1070 EbmlList
*attachements_list
= &matroska
->attachments
;
1071 MatroskaAttachement
*attachements
;
1072 EbmlList
*chapters_list
= &matroska
->chapters
;
1073 MatroskaChapter
*chapters
;
1074 MatroskaTrack
*tracks
;
1075 EbmlList
*index_list
;
1076 MatroskaIndex
*index
;
1083 /* First read the EBML header. */
1084 if (ebml_parse(matroska
, ebml_syntax
, &ebml
)
1085 || ebml
.version
> EBML_VERSION
|| ebml
.max_size
> sizeof(uint64_t)
1086 || ebml
.id_length
> sizeof(uint32_t) || strcmp(ebml
.doctype
, "matroska")
1087 || ebml
.doctype_version
> 2) {
1088 av_log(matroska
->ctx
, AV_LOG_ERROR
,
1089 "EBML header using unsupported features\n"
1090 "(EBML version %"PRIu64
", doctype %s, doc version %"PRIu64
")\n",
1091 ebml
.version
, ebml
.doctype
, ebml
.doctype_version
);
1092 return AVERROR_NOFMT
;
1094 ebml_free(ebml_syntax
, &ebml
);
1096 /* The next thing is a segment. */
1097 if (ebml_parse(matroska
, matroska_segments
, matroska
) < 0)
1099 matroska_execute_seekhead(matroska
);
1101 if (matroska
->duration
)
1102 matroska
->ctx
->duration
= matroska
->duration
* matroska
->time_scale
1103 * 1000 / AV_TIME_BASE
;
1104 if (matroska
->title
)
1105 strncpy(matroska
->ctx
->title
, matroska
->title
,
1106 sizeof(matroska
->ctx
->title
)-1);
1107 matroska_convert_tags(s
, &matroska
->tags
);
1109 tracks
= matroska
->tracks
.elem
;
1110 for (i
=0; i
< matroska
->tracks
.nb_elem
; i
++) {
1111 MatroskaTrack
*track
= &tracks
[i
];
1112 enum CodecID codec_id
= CODEC_ID_NONE
;
1113 EbmlList
*encodings_list
= &tracks
->encodings
;
1114 MatroskaTrackEncoding
*encodings
= encodings_list
->elem
;
1115 uint8_t *extradata
= NULL
;
1116 int extradata_size
= 0;
1117 int extradata_offset
= 0;
1119 /* Apply some sanity checks. */
1120 if (track
->type
!= MATROSKA_TRACK_TYPE_VIDEO
&&
1121 track
->type
!= MATROSKA_TRACK_TYPE_AUDIO
&&
1122 track
->type
!= MATROSKA_TRACK_TYPE_SUBTITLE
) {
1123 av_log(matroska
->ctx
, AV_LOG_INFO
,
1124 "Unknown or unsupported track type %"PRIu64
"\n",
1128 if (track
->codec_id
== NULL
)
1131 if (track
->type
== MATROSKA_TRACK_TYPE_VIDEO
) {
1132 if (!track
->default_duration
)
1133 track
->default_duration
= 1000000000/track
->video
.frame_rate
;
1134 if (!track
->video
.display_width
)
1135 track
->video
.display_width
= track
->video
.pixel_width
;
1136 if (!track
->video
.display_height
)
1137 track
->video
.display_height
= track
->video
.pixel_height
;
1138 } else if (track
->type
== MATROSKA_TRACK_TYPE_AUDIO
) {
1139 if (!track
->audio
.out_samplerate
)
1140 track
->audio
.out_samplerate
= track
->audio
.samplerate
;
1142 if (encodings_list
->nb_elem
> 1) {
1143 av_log(matroska
->ctx
, AV_LOG_ERROR
,
1144 "Multiple combined encodings no supported");
1145 } else if (encodings_list
->nb_elem
== 1) {
1146 if (encodings
[0].type
||
1147 (encodings
[0].compression
.algo
!= MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP
&&
1149 encodings
[0].compression
.algo
!= MATROSKA_TRACK_ENCODING_COMP_ZLIB
&&
1152 encodings
[0].compression
.algo
!= MATROSKA_TRACK_ENCODING_COMP_BZLIB
&&
1154 encodings
[0].compression
.algo
!= MATROSKA_TRACK_ENCODING_COMP_LZO
)) {
1155 encodings
[0].scope
= 0;
1156 av_log(matroska
->ctx
, AV_LOG_ERROR
,
1157 "Unsupported encoding type");
1158 } else if (track
->codec_priv
.size
&& encodings
[0].scope
&2) {
1159 uint8_t *codec_priv
= track
->codec_priv
.data
;
1160 int offset
= matroska_decode_buffer(&track
->codec_priv
.data
,
1161 &track
->codec_priv
.size
,
1164 track
->codec_priv
.data
= NULL
;
1165 track
->codec_priv
.size
= 0;
1166 av_log(matroska
->ctx
, AV_LOG_ERROR
,
1167 "Failed to decode codec private data\n");
1168 } else if (offset
> 0) {
1169 track
->codec_priv
.data
= av_malloc(track
->codec_priv
.size
+ offset
);
1170 memcpy(track
->codec_priv
.data
,
1171 encodings
[0].compression
.settings
.data
, offset
);
1172 memcpy(track
->codec_priv
.data
+offset
, codec_priv
,
1173 track
->codec_priv
.size
);
1174 track
->codec_priv
.size
+= offset
;
1176 if (codec_priv
!= track
->codec_priv
.data
)
1177 av_free(codec_priv
);
1181 for(j
=0; ff_mkv_codec_tags
[j
].id
!= CODEC_ID_NONE
; j
++){
1182 if(!strncmp(ff_mkv_codec_tags
[j
].str
, track
->codec_id
,
1183 strlen(ff_mkv_codec_tags
[j
].str
))){
1184 codec_id
= ff_mkv_codec_tags
[j
].id
;
1189 st
= track
->stream
= av_new_stream(s
, 0);
1191 return AVERROR(ENOMEM
);
1193 if (!strcmp(track
->codec_id
, "V_MS/VFW/FOURCC")
1194 && track
->codec_priv
.size
>= 40
1195 && track
->codec_priv
.data
!= NULL
) {
1196 track
->video
.fourcc
= AV_RL32(track
->codec_priv
.data
+ 16);
1197 codec_id
= codec_get_id(codec_bmp_tags
, track
->video
.fourcc
);
1198 } else if (!strcmp(track
->codec_id
, "A_MS/ACM")
1199 && track
->codec_priv
.size
>= 18
1200 && track
->codec_priv
.data
!= NULL
) {
1201 uint16_t tag
= AV_RL16(track
->codec_priv
.data
);
1202 codec_id
= codec_get_id(codec_wav_tags
, tag
);
1203 } else if (!strcmp(track
->codec_id
, "V_QUICKTIME")
1204 && (track
->codec_priv
.size
>= 86)
1205 && (track
->codec_priv
.data
!= NULL
)) {
1206 track
->video
.fourcc
= AV_RL32(track
->codec_priv
.data
);
1207 codec_id
=codec_get_id(codec_movvideo_tags
, track
->video
.fourcc
);
1208 } else if (codec_id
== CODEC_ID_PCM_S16BE
) {
1209 switch (track
->audio
.bitdepth
) {
1210 case 8: codec_id
= CODEC_ID_PCM_U8
; break;
1211 case 24: codec_id
= CODEC_ID_PCM_S24BE
; break;
1212 case 32: codec_id
= CODEC_ID_PCM_S32BE
; break;
1214 } else if (codec_id
== CODEC_ID_PCM_S16LE
) {
1215 switch (track
->audio
.bitdepth
) {
1216 case 8: codec_id
= CODEC_ID_PCM_U8
; break;
1217 case 24: codec_id
= CODEC_ID_PCM_S24LE
; break;
1218 case 32: codec_id
= CODEC_ID_PCM_S32LE
; break;
1220 } else if (codec_id
==CODEC_ID_PCM_F32LE
&& track
->audio
.bitdepth
==64) {
1221 codec_id
= CODEC_ID_PCM_F64LE
;
1222 } else if (codec_id
== CODEC_ID_AAC
&& !track
->codec_priv
.size
) {
1223 int profile
= matroska_aac_profile(track
->codec_id
);
1224 int sri
= matroska_aac_sri(track
->audio
.samplerate
);
1225 extradata
= av_malloc(5);
1226 if (extradata
== NULL
)
1227 return AVERROR(ENOMEM
);
1228 extradata
[0] = (profile
<< 3) | ((sri
&0x0E) >> 1);
1229 extradata
[1] = ((sri
&0x01) << 7) | (track
->audio
.channels
<<3);
1230 if (strstr(track
->codec_id
, "SBR")) {
1231 sri
= matroska_aac_sri(track
->audio
.out_samplerate
);
1232 extradata
[2] = 0x56;
1233 extradata
[3] = 0xE5;
1234 extradata
[4] = 0x80 | (sri
<<3);
1238 } else if (codec_id
== CODEC_ID_TTA
) {
1240 extradata_size
= 30;
1241 extradata
= av_mallocz(extradata_size
);
1242 if (extradata
== NULL
)
1243 return AVERROR(ENOMEM
);
1244 init_put_byte(&b
, extradata
, extradata_size
, 1,
1245 NULL
, NULL
, NULL
, NULL
);
1246 put_buffer(&b
, "TTA1", 4);
1248 put_le16(&b
, track
->audio
.channels
);
1249 put_le16(&b
, track
->audio
.bitdepth
);
1250 put_le32(&b
, track
->audio
.out_samplerate
);
1251 put_le32(&b
, matroska
->ctx
->duration
* track
->audio
.out_samplerate
);
1252 } else if (codec_id
== CODEC_ID_RV10
|| codec_id
== CODEC_ID_RV20
||
1253 codec_id
== CODEC_ID_RV30
|| codec_id
== CODEC_ID_RV40
) {
1254 extradata_offset
= 26;
1255 track
->codec_priv
.size
-= extradata_offset
;
1256 } else if (codec_id
== CODEC_ID_RA_144
) {
1257 track
->audio
.out_samplerate
= 8000;
1258 track
->audio
.channels
= 1;
1259 } else if (codec_id
== CODEC_ID_RA_288
|| codec_id
== CODEC_ID_COOK
||
1260 codec_id
== CODEC_ID_ATRAC3
) {
1263 init_put_byte(&b
, track
->codec_priv
.data
,track
->codec_priv
.size
,
1264 0, NULL
, NULL
, NULL
, NULL
);
1266 track
->audio
.coded_framesize
= get_be32(&b
);
1268 track
->audio
.sub_packet_h
= get_be16(&b
);
1269 track
->audio
.frame_size
= get_be16(&b
);
1270 track
->audio
.sub_packet_size
= get_be16(&b
);
1271 track
->audio
.buf
= av_malloc(track
->audio
.frame_size
* track
->audio
.sub_packet_h
);
1272 if (codec_id
== CODEC_ID_RA_288
) {
1273 st
->codec
->block_align
= track
->audio
.coded_framesize
;
1274 track
->codec_priv
.size
= 0;
1276 st
->codec
->block_align
= track
->audio
.sub_packet_size
;
1277 extradata_offset
= 78;
1278 track
->codec_priv
.size
-= extradata_offset
;
1282 if (codec_id
== CODEC_ID_NONE
)
1283 av_log(matroska
->ctx
, AV_LOG_INFO
,
1284 "Unknown/unsupported CodecID %s.\n", track
->codec_id
);
1286 if (track
->time_scale
< 0.01)
1287 track
->time_scale
= 1.0;
1288 av_set_pts_info(st
, 64, matroska
->time_scale
*track
->time_scale
, 1000*1000*1000); /* 64 bit pts in ns */
1290 st
->codec
->codec_id
= codec_id
;
1292 if (strcmp(track
->language
, "und"))
1293 av_strlcpy(st
->language
, track
->language
, 4);
1295 if (track
->flag_default
)
1296 st
->disposition
|= AV_DISPOSITION_DEFAULT
;
1298 if (track
->default_duration
)
1299 av_reduce(&st
->codec
->time_base
.num
, &st
->codec
->time_base
.den
,
1300 track
->default_duration
, 1000000000, 30000);
1303 st
->codec
->extradata
= extradata
;
1304 st
->codec
->extradata_size
= extradata_size
;
1305 } else if(track
->codec_priv
.data
&& track
->codec_priv
.size
> 0){
1306 st
->codec
->extradata
= av_malloc(track
->codec_priv
.size
);
1307 if(st
->codec
->extradata
== NULL
)
1308 return AVERROR(ENOMEM
);
1309 st
->codec
->extradata_size
= track
->codec_priv
.size
;
1310 memcpy(st
->codec
->extradata
,
1311 track
->codec_priv
.data
+ extradata_offset
,
1312 track
->codec_priv
.size
);
1315 if (track
->type
== MATROSKA_TRACK_TYPE_VIDEO
) {
1316 st
->codec
->codec_type
= CODEC_TYPE_VIDEO
;
1317 st
->codec
->codec_tag
= track
->video
.fourcc
;
1318 st
->codec
->width
= track
->video
.pixel_width
;
1319 st
->codec
->height
= track
->video
.pixel_height
;
1320 av_reduce(&st
->sample_aspect_ratio
.num
,
1321 &st
->sample_aspect_ratio
.den
,
1322 st
->codec
->height
* track
->video
.display_width
,
1323 st
->codec
-> width
* track
->video
.display_height
,
1325 st
->need_parsing
= AVSTREAM_PARSE_HEADERS
;
1326 } else if (track
->type
== MATROSKA_TRACK_TYPE_AUDIO
) {
1327 st
->codec
->codec_type
= CODEC_TYPE_AUDIO
;
1328 st
->codec
->sample_rate
= track
->audio
.out_samplerate
;
1329 st
->codec
->channels
= track
->audio
.channels
;
1330 } else if (track
->type
== MATROSKA_TRACK_TYPE_SUBTITLE
) {
1331 st
->codec
->codec_type
= CODEC_TYPE_SUBTITLE
;
1335 attachements
= attachements_list
->elem
;
1336 for (j
=0; j
<attachements_list
->nb_elem
; j
++) {
1337 if (!(attachements
[j
].filename
&& attachements
[j
].mime
&&
1338 attachements
[j
].bin
.data
&& attachements
[j
].bin
.size
> 0)) {
1339 av_log(matroska
->ctx
, AV_LOG_ERROR
, "incomplete attachment\n");
1341 AVStream
*st
= av_new_stream(s
, 0);
1344 st
->filename
= av_strdup(attachements
[j
].filename
);
1345 st
->codec
->codec_id
= CODEC_ID_NONE
;
1346 st
->codec
->codec_type
= CODEC_TYPE_ATTACHMENT
;
1347 st
->codec
->extradata
= av_malloc(attachements
[j
].bin
.size
);
1348 if(st
->codec
->extradata
== NULL
)
1350 st
->codec
->extradata_size
= attachements
[j
].bin
.size
;
1351 memcpy(st
->codec
->extradata
, attachements
[j
].bin
.data
, attachements
[j
].bin
.size
);
1353 for (i
=0; ff_mkv_mime_tags
[i
].id
!= CODEC_ID_NONE
; i
++) {
1354 if (!strncmp(ff_mkv_mime_tags
[i
].str
, attachements
[j
].mime
,
1355 strlen(ff_mkv_mime_tags
[i
].str
))) {
1356 st
->codec
->codec_id
= ff_mkv_mime_tags
[i
].id
;
1363 chapters
= chapters_list
->elem
;
1364 for (i
=0; i
<chapters_list
->nb_elem
; i
++)
1365 if (chapters
[i
].start
!= AV_NOPTS_VALUE
&& chapters
[i
].uid
)
1366 ff_new_chapter(s
, chapters
[i
].uid
, (AVRational
){1, 1000000000},
1367 chapters
[i
].start
, chapters
[i
].end
,
1370 index_list
= &matroska
->index
;
1371 index
= index_list
->elem
;
1372 for (i
=0; i
<index_list
->nb_elem
; i
++) {
1373 EbmlList
*pos_list
= &index
[i
].pos
;
1374 MatroskaIndexPos
*pos
= pos_list
->elem
;
1375 for (j
=0; j
<pos_list
->nb_elem
; j
++) {
1376 MatroskaTrack
*track
= matroska_find_track_by_num(matroska
,
1378 if (track
&& track
->stream
)
1379 av_add_index_entry(track
->stream
,
1380 pos
[j
].pos
+ matroska
->segment_start
,
1381 index
[i
].time
, 0, 0, AVINDEX_KEYFRAME
);
1389 * Put one packet in an application-supplied AVPacket struct.
1390 * Returns 0 on success or -1 on failure.
1392 static int matroska_deliver_packet(MatroskaDemuxContext
*matroska
,
1395 if (matroska
->num_packets
> 0) {
1396 memcpy(pkt
, matroska
->packets
[0], sizeof(AVPacket
));
1397 av_free(matroska
->packets
[0]);
1398 if (matroska
->num_packets
> 1) {
1399 memmove(&matroska
->packets
[0], &matroska
->packets
[1],
1400 (matroska
->num_packets
- 1) * sizeof(AVPacket
*));
1402 av_realloc(matroska
->packets
, (matroska
->num_packets
- 1) *
1403 sizeof(AVPacket
*));
1405 av_freep(&matroska
->packets
);
1407 matroska
->num_packets
--;
1415 * Free all packets in our internal queue.
1417 static void matroska_clear_queue(MatroskaDemuxContext
*matroska
)
1419 if (matroska
->packets
) {
1421 for (n
= 0; n
< matroska
->num_packets
; n
++) {
1422 av_free_packet(matroska
->packets
[n
]);
1423 av_free(matroska
->packets
[n
]);
1425 av_freep(&matroska
->packets
);
1426 matroska
->num_packets
= 0;
1430 static int matroska_parse_block(MatroskaDemuxContext
*matroska
, uint8_t *data
,
1431 int size
, int64_t pos
, uint64_t cluster_time
,
1432 uint64_t duration
, int is_keyframe
,
1433 int64_t cluster_pos
)
1435 uint64_t timecode
= AV_NOPTS_VALUE
;
1436 MatroskaTrack
*track
;
1441 uint32_t *lace_size
= NULL
;
1442 int n
, flags
, laces
= 0;
1445 if ((n
= matroska_ebmlnum_uint(matroska
, data
, size
, &num
)) < 0) {
1446 av_log(matroska
->ctx
, AV_LOG_ERROR
, "EBML block data error\n");
1452 track
= matroska_find_track_by_num(matroska
, num
);
1453 if (size
<= 3 || !track
|| !track
->stream
) {
1454 av_log(matroska
->ctx
, AV_LOG_INFO
,
1455 "Invalid stream %"PRIu64
" or size %u\n", num
, size
);
1459 if (st
->discard
>= AVDISCARD_ALL
)
1461 if (duration
== AV_NOPTS_VALUE
)
1462 duration
= track
->default_duration
/ matroska
->time_scale
;
1464 block_time
= AV_RB16(data
);
1468 if (is_keyframe
== -1)
1469 is_keyframe
= flags
& 0x80 ? PKT_FLAG_KEY
: 0;
1471 if (cluster_time
!= (uint64_t)-1
1472 && (block_time
>= 0 || cluster_time
>= -block_time
)) {
1473 timecode
= cluster_time
+ block_time
;
1474 if (track
->type
== MATROSKA_TRACK_TYPE_SUBTITLE
1475 && timecode
< track
->end_timecode
)
1476 is_keyframe
= 0; /* overlapping subtitles are not key frame */
1478 av_add_index_entry(st
, cluster_pos
, timecode
, 0,0,AVINDEX_KEYFRAME
);
1479 track
->end_timecode
= FFMAX(track
->end_timecode
, timecode
+duration
);
1482 if (matroska
->skip_to_keyframe
&& track
->type
!= MATROSKA_TRACK_TYPE_SUBTITLE
) {
1483 if (!is_keyframe
|| timecode
< matroska
->skip_to_timecode
)
1485 matroska
->skip_to_keyframe
= 0;
1488 switch ((flags
& 0x06) >> 1) {
1489 case 0x0: /* no lacing */
1491 lace_size
= av_mallocz(sizeof(int));
1492 lace_size
[0] = size
;
1495 case 0x1: /* Xiph lacing */
1496 case 0x2: /* fixed-size lacing */
1497 case 0x3: /* EBML lacing */
1498 assert(size
>0); // size <=3 is checked before size-=3 above
1499 laces
= (*data
) + 1;
1502 lace_size
= av_mallocz(laces
* sizeof(int));
1504 switch ((flags
& 0x06) >> 1) {
1505 case 0x1: /* Xiph lacing */ {
1508 for (n
= 0; res
== 0 && n
< laces
- 1; n
++) {
1515 lace_size
[n
] += temp
;
1521 total
+= lace_size
[n
];
1523 lace_size
[n
] = size
- total
;
1527 case 0x2: /* fixed-size lacing */
1528 for (n
= 0; n
< laces
; n
++)
1529 lace_size
[n
] = size
/ laces
;
1532 case 0x3: /* EBML lacing */ {
1534 n
= matroska_ebmlnum_uint(matroska
, data
, size
, &num
);
1536 av_log(matroska
->ctx
, AV_LOG_INFO
,
1537 "EBML block data error\n");
1542 total
= lace_size
[0] = num
;
1543 for (n
= 1; res
== 0 && n
< laces
- 1; n
++) {
1546 r
= matroska_ebmlnum_sint(matroska
, data
, size
, &snum
);
1548 av_log(matroska
->ctx
, AV_LOG_INFO
,
1549 "EBML block data error\n");
1554 lace_size
[n
] = lace_size
[n
- 1] + snum
;
1555 total
+= lace_size
[n
];
1557 lace_size
[n
] = size
- total
;
1565 for (n
= 0; n
< laces
; n
++) {
1566 if (st
->codec
->codec_id
== CODEC_ID_RA_288
||
1567 st
->codec
->codec_id
== CODEC_ID_COOK
||
1568 st
->codec
->codec_id
== CODEC_ID_ATRAC3
) {
1569 int a
= st
->codec
->block_align
;
1570 int sps
= track
->audio
.sub_packet_size
;
1571 int cfs
= track
->audio
.coded_framesize
;
1572 int h
= track
->audio
.sub_packet_h
;
1573 int y
= track
->audio
.sub_packet_cnt
;
1574 int w
= track
->audio
.frame_size
;
1577 if (!track
->audio
.pkt_cnt
) {
1578 if (st
->codec
->codec_id
== CODEC_ID_RA_288
)
1579 for (x
=0; x
<h
/2; x
++)
1580 memcpy(track
->audio
.buf
+x
*2*w
+y
*cfs
,
1583 for (x
=0; x
<w
/sps
; x
++)
1584 memcpy(track
->audio
.buf
+sps
*(h
*x
+((h
+1)/2)*(y
&1)+(y
>>1)), data
+x
*sps
, sps
);
1586 if (++track
->audio
.sub_packet_cnt
>= h
) {
1587 track
->audio
.sub_packet_cnt
= 0;
1588 track
->audio
.pkt_cnt
= h
*w
/ a
;
1591 while (track
->audio
.pkt_cnt
) {
1592 pkt
= av_mallocz(sizeof(AVPacket
));
1593 av_new_packet(pkt
, a
);
1594 memcpy(pkt
->data
, track
->audio
.buf
1595 + a
* (h
*w
/ a
- track
->audio
.pkt_cnt
--), a
);
1597 pkt
->stream_index
= st
->index
;
1598 dynarray_add(&matroska
->packets
,&matroska
->num_packets
,pkt
);
1601 MatroskaTrackEncoding
*encodings
= track
->encodings
.elem
;
1602 int offset
= 0, pkt_size
= lace_size
[n
];
1603 uint8_t *pkt_data
= data
;
1605 if (encodings
&& encodings
->scope
& 1) {
1606 offset
= matroska_decode_buffer(&pkt_data
,&pkt_size
, track
);
1611 pkt
= av_mallocz(sizeof(AVPacket
));
1612 /* XXX: prevent data copy... */
1613 if (av_new_packet(pkt
, pkt_size
+offset
) < 0) {
1615 res
= AVERROR(ENOMEM
);
1620 memcpy (pkt
->data
, encodings
->compression
.settings
.data
, offset
);
1621 memcpy (pkt
->data
+offset
, pkt_data
, pkt_size
);
1623 if (pkt_data
!= data
)
1627 pkt
->flags
= is_keyframe
;
1628 pkt
->stream_index
= st
->index
;
1630 pkt
->pts
= timecode
;
1632 if (st
->codec
->codec_id
== CODEC_ID_TEXT
)
1633 pkt
->convergence_duration
= duration
;
1634 else if (track
->type
!= MATROSKA_TRACK_TYPE_SUBTITLE
)
1635 pkt
->duration
= duration
;
1637 if (st
->codec
->codec_id
== CODEC_ID_SSA
)
1638 matroska_fix_ass_packet(matroska
, pkt
, duration
);
1640 if (matroska
->prev_pkt
&&
1641 timecode
!= AV_NOPTS_VALUE
&&
1642 matroska
->prev_pkt
->pts
== timecode
&&
1643 matroska
->prev_pkt
->stream_index
== st
->index
)
1644 matroska_merge_packets(matroska
->prev_pkt
, pkt
);
1646 dynarray_add(&matroska
->packets
,&matroska
->num_packets
,pkt
);
1647 matroska
->prev_pkt
= pkt
;
1651 if (timecode
!= AV_NOPTS_VALUE
)
1652 timecode
= duration
? timecode
+ duration
: AV_NOPTS_VALUE
;
1653 data
+= lace_size
[n
];
1661 static int matroska_parse_cluster(MatroskaDemuxContext
*matroska
)
1663 MatroskaCluster cluster
= { 0 };
1664 EbmlList
*blocks_list
;
1665 MatroskaBlock
*blocks
;
1667 int64_t pos
= url_ftell(matroska
->ctx
->pb
);
1668 matroska
->prev_pkt
= NULL
;
1669 if (matroska
->has_cluster_id
){
1670 /* For the first cluster we parse, its ID was already read as
1671 part of matroska_read_header(), so don't read it again */
1672 res
= ebml_parse_id(matroska
, matroska_clusters
,
1673 MATROSKA_ID_CLUSTER
, &cluster
);
1674 pos
-= 4; /* sizeof the ID which was already read */
1675 matroska
->has_cluster_id
= 0;
1677 res
= ebml_parse(matroska
, matroska_clusters
, &cluster
);
1678 blocks_list
= &cluster
.blocks
;
1679 blocks
= blocks_list
->elem
;
1680 for (i
=0; i
<blocks_list
->nb_elem
; i
++)
1681 if (blocks
[i
].bin
.size
> 0)
1682 res
=matroska_parse_block(matroska
,
1683 blocks
[i
].bin
.data
, blocks
[i
].bin
.size
,
1684 blocks
[i
].bin
.pos
, cluster
.timecode
,
1685 blocks
[i
].duration
, !blocks
[i
].reference
,
1687 ebml_free(matroska_cluster
, &cluster
);
1688 if (res
< 0) matroska
->done
= 1;
1692 static int matroska_read_packet(AVFormatContext
*s
, AVPacket
*pkt
)
1694 MatroskaDemuxContext
*matroska
= s
->priv_data
;
1696 while (matroska_deliver_packet(matroska
, pkt
)) {
1698 return AVERROR(EIO
);
1699 matroska_parse_cluster(matroska
);
1705 static int matroska_read_seek(AVFormatContext
*s
, int stream_index
,
1706 int64_t timestamp
, int flags
)
1708 MatroskaDemuxContext
*matroska
= s
->priv_data
;
1709 MatroskaTrack
*tracks
= matroska
->tracks
.elem
;
1710 AVStream
*st
= s
->streams
[stream_index
];
1711 int i
, index
, index_sub
, index_min
;
1713 if (!st
->nb_index_entries
)
1715 timestamp
= FFMAX(timestamp
, st
->index_entries
[0].timestamp
);
1717 if ((index
= av_index_search_timestamp(st
, timestamp
, flags
)) < 0) {
1718 url_fseek(s
->pb
, st
->index_entries
[st
->nb_index_entries
-1].pos
, SEEK_SET
);
1719 while ((index
= av_index_search_timestamp(st
, timestamp
, flags
)) < 0) {
1720 matroska_clear_queue(matroska
);
1721 if (matroska_parse_cluster(matroska
) < 0)
1726 matroska_clear_queue(matroska
);
1731 for (i
=0; i
< matroska
->tracks
.nb_elem
; i
++) {
1732 tracks
[i
].end_timecode
= 0;
1733 if (tracks
[i
].type
== MATROSKA_TRACK_TYPE_SUBTITLE
1734 && !tracks
[i
].stream
->discard
!= AVDISCARD_ALL
) {
1735 index_sub
= av_index_search_timestamp(tracks
[i
].stream
, st
->index_entries
[index
].timestamp
, AVSEEK_FLAG_BACKWARD
);
1737 && st
->index_entries
[index_sub
].pos
< st
->index_entries
[index_min
].pos
1738 && st
->index_entries
[index
].timestamp
- st
->index_entries
[index_sub
].timestamp
< 30000000000/matroska
->time_scale
)
1739 index_min
= index_sub
;
1743 url_fseek(s
->pb
, st
->index_entries
[index_min
].pos
, SEEK_SET
);
1744 matroska
->skip_to_keyframe
= !(flags
& AVSEEK_FLAG_ANY
);
1745 matroska
->skip_to_timecode
= st
->index_entries
[index
].timestamp
;
1747 av_update_cur_dts(s
, st
, st
->index_entries
[index
].timestamp
);
1751 static int matroska_read_close(AVFormatContext
*s
)
1753 MatroskaDemuxContext
*matroska
= s
->priv_data
;
1754 MatroskaTrack
*tracks
= matroska
->tracks
.elem
;
1757 matroska_clear_queue(matroska
);
1759 for (n
=0; n
< matroska
->tracks
.nb_elem
; n
++)
1760 if (tracks
[n
].type
== MATROSKA_TRACK_TYPE_AUDIO
)
1761 av_free(tracks
[n
].audio
.buf
);
1762 ebml_free(matroska_segment
, matroska
);
1767 AVInputFormat matroska_demuxer
= {
1769 NULL_IF_CONFIG_SMALL("Matroska file format"),
1770 sizeof(MatroskaDemuxContext
),
1772 matroska_read_header
,
1773 matroska_read_packet
,
1774 matroska_read_close
,