Support remuxing MPEG-4 part2.
[L-SMASH.git] / box.h
blob1a6a86c6501fd2cfb7921340627973dd87824ca1
1 /*****************************************************************************
2 * box.h:
3 *****************************************************************************
4 * Copyright (C) 2010 L-SMASH project
6 * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
8 * Permission to use, copy, modify, and/or distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 *****************************************************************************/
21 /* This file is available under an ISC license. */
23 #ifndef LSMASH_BOX_H
24 #define LSMASH_BOX_H
26 /* For generating creation_time and modification_time.
27 * According to ISO/IEC-14496-5-2001, the difference between Unix time and Mac OS time is 2082758400.
28 * However this is wrong and 2082844800 is correct. */
29 #include <time.h>
30 #define ISOM_MAC_EPOCH_OFFSET 2082844800
32 #include "utils.h"
34 typedef struct isom_box_tag isom_box_t;
36 /* If size is 1, then largesize is actual size.
37 * If size is 0, then this box is the last one in the file.
38 * usertype is for uuid. */
39 #define ISOM_BASEBOX_COMMON \
40 lsmash_root_t *root; /* pointer of root */ \
41 isom_box_t *parent; /* pointer of the parent box of this box */ \
42 uint8_t manager; /* flags for L-SMASH */ \
43 uint64_t pos; /* starting position of this box in the file */ \
44 uint64_t size; /* the number of bytes in this box */ \
45 uint32_t type; /* four characters codes that identify box type */ \
46 uint8_t *usertype
48 #define ISOM_FULLBOX_COMMON \
49 ISOM_BASEBOX_COMMON; \
50 uint8_t version; /* Basically, version is either 0 or 1 */ \
51 uint32_t flags /* In the actual structure of box, flags is 24 bits. */
53 #define ISOM_DEFAULT_BOX_HEADER_SIZE 8
54 #define ISOM_DEFAULT_FULLBOX_HEADER_SIZE 12
55 #define ISOM_DEFAULT_LIST_FULLBOX_HEADER_SIZE 16
57 #define LSMASH_UNKNOWN_BOX 0x01
58 #define LSMASH_ABSENT_IN_ROOT 0x02
60 struct isom_box_tag
62 ISOM_FULLBOX_COMMON;
65 /* File Type Box
66 * This box identifies the specifications to which this file complies.
67 * This box shall occur before any variable-length box.
68 * In the absence of this box, the file is QuickTime file format or MP4 version 1 file format.
69 * In MP4 version 1 file format, Object Descriptor Box is mandatory.
70 * In QuickTime file format, Object Descriptor Box isn't defined.
71 * Therefore, if this box and an Object Descriptor Box are absent in the file, the file shall be QuikcTime file format. */
72 typedef struct
74 ISOM_BASEBOX_COMMON;
75 uint32_t major_brand; /* brand identifier */
76 uint32_t minor_version; /* the minor version of the major brand */
77 uint32_t *compatible_brands; /* a list, to the end of the box, of brands */
79 uint32_t brand_count; /* the number of factors in compatible_brands array */
80 } isom_ftyp_t;
82 /* Track Header Box
83 * This box specifies the characteristics of a single track. */
84 typedef struct
86 /* version is either 0 or 1
87 * flags
88 * 0x000001: Indicates that the track is enabled.
89 * A disabled track is treated as if it were not present.
90 * 0x000002: Indicates that the track is used in the presentation.
91 * 0x000004: Indicates that the track is used when previewing the presentation.
92 * 0x000008: Indicates that the track is used in the movie's poster. (only defined in QuickTime file format)
93 * ISOM: If in a presentation all tracks have neither track_in_movie nor track_in_preview set,
94 * then all tracks shall be treated as if both flags were set on all tracks. */
95 ISOM_FULLBOX_COMMON;
96 /* version == 0: uint64_t -> uint32_t */
97 uint64_t creation_time; /* the creation time of this track (in seconds since midnight, Jan. 1, 1904, in UTC time) */
98 uint64_t modification_time; /* the most recent time the track was modified (in seconds since midnight, Jan. 1, 1904, in UTC time) */
99 uint32_t track_ID; /* an integer that uniquely identifies the track
100 * Track IDs are never re-used and cannot be zero. */
101 uint32_t reserved1;
102 uint64_t duration; /* the duration of this track expressed in the movie timescale units */
103 /* The following fields are treated as reserved in MP4 version 1. */
104 uint32_t reserved2[2];
105 int16_t layer; /* the front-to-back ordering of video tracks; tracks with lower numbers are closer to the viewer. */
106 int16_t alternate_group; /* an integer that specifies a group or collection of tracks
107 * If this field is not 0, it should be the same for tracks that contain alternate data for one another
108 * and different for tracks belonging to different such groups.
109 * Only one track within an alternate group should be played or streamed at any one time. */
110 int16_t volume; /* fixed point 8.8 number. 0x0100 is full volume. */
111 uint16_t reserved3;
112 int32_t matrix[9]; /* transformation matrix for the video */
113 /* track's visual presentation size
114 * All images in the sequence are scaled to this size, before any overall transformation of the track represented by the matrix.
115 * Note: these fields are treated as reserved in MP4 version 1. */
116 uint32_t width; /* fixed point 16.16 number */
117 uint32_t height; /* fixed point 16.16 number */
118 /* */
119 } isom_tkhd_t;
121 /* Track Clean Aperture Dimensions Box
122 * A presentation mode where clap and pasp are reflected. */
123 typedef struct
125 ISOM_FULLBOX_COMMON;
126 uint32_t width; /* fixed point 16.16 number */
127 uint32_t height; /* fixed point 16.16 number */
128 } isom_clef_t;
130 /* Track Production Aperture Dimensions Box
131 * A presentation mode where pasp is reflected. */
132 typedef struct
134 ISOM_FULLBOX_COMMON;
135 uint32_t width; /* fixed point 16.16 number */
136 uint32_t height; /* fixed point 16.16 number */
137 } isom_prof_t;
139 /* Track Encoded Pixels Dimensions Box
140 * A presentation mode where clap and pasp are not reflected. */
141 typedef struct
143 ISOM_FULLBOX_COMMON;
144 uint32_t width; /* fixed point 16.16 number */
145 uint32_t height; /* fixed point 16.16 number */
146 } isom_enof_t;
148 /* Track Aperture Mode Dimensions Box */
149 typedef struct
151 ISOM_BASEBOX_COMMON;
152 isom_clef_t *clef; /* Track Clean Aperture Dimensions Box */
153 isom_prof_t *prof; /* Track Production Aperture Dimensions Box */
154 isom_enof_t *enof; /* Track Encoded Pixels Dimensions Box */
155 } isom_tapt_t;
157 /* Edit List Box
158 * This box contains an explicit timeline map.
159 * Each entry defines part of the track timeline: by mapping part of the media timeline, or by indicating 'empty' time,
160 * or by defining a 'dwell', where a single time-point in the media is held for a period.
161 * The last edit in a track shall never be an empty edit.
162 * Any difference between the duration in the Movie Header Box, and the track's duration is expressed as an implicit empty edit at the end.
163 * It is recommended that any edits, explicit or implied, not select any portion of the composition timeline that doesn't map to a sample.
164 * Therefore, if the first sample in the track has non-zero CTS, then this track should have at least one edit and the start time in it should
165 * correspond to the value of the CTS the first sample has or more not to exceed the largest CTS in this track. */
166 typedef struct
168 /* version == 0: 64bits -> 32bits */
169 uint64_t segment_duration; /* the duration of this edit expressed in the movie timescale units */
170 int64_t media_time; /* the starting composition time within the media of this edit segment
171 * If this field is set to -1, it is an empty edit. */
172 int32_t media_rate; /* the relative rate at which to play the media corresponding to this edit segment
173 * If this value is 0, then the edit is specifying a 'dwell':
174 * the media at media_time is presented for the segment_duration.
175 * This field is expressed as 16.16 fixed-point number. */
176 } isom_elst_entry_t;
178 typedef struct
180 ISOM_FULLBOX_COMMON; /* version is either 0 or 1 */
181 lsmash_entry_list_t *list;
182 } isom_elst_t;
184 /* Edit Box
185 * This optional box maps the presentation time-line to the media time-line as it is stored in the file.
186 * In the absence of this box, there is an implicit one-to-one mapping of these time-lines,
187 * and the presentation of a track starts at the beginning of the presentation. */
188 typedef struct
190 ISOM_BASEBOX_COMMON;
191 isom_elst_t *elst; /* Edit List Box */
192 } isom_edts_t;
194 /* Track Reference Box
195 * The Track Reference Box contains Track Reference Type Boxes.
196 * Track Reference Type Boxes define relationships between tracks.
197 * They allow one track to specify how it is related to other tracks. */
198 typedef struct
200 ISOM_BASEBOX_COMMON;
201 uint32_t *track_ID; /* track_IDs of reference tracks / Zero value must not be used */
203 uint32_t ref_count; /* number of reference tracks */
204 } isom_tref_type_t;
206 typedef struct
208 ISOM_BASEBOX_COMMON;
209 lsmash_entry_list_t *ref_list; /* Track Reference Type Boxes */
210 } isom_tref_t;
212 /* Media Header Box
213 * This box declares overall information that is media-independent, and relevant to characteristics of the media in a track.*/
214 typedef struct
216 ISOM_FULLBOX_COMMON; /* version is either 0 or 1 */
217 /* version == 0: uint64_t -> uint32_t */
218 uint64_t creation_time; /* the creation time of the media in this track (in seconds since midnight, Jan. 1, 1904, in UTC time) */
219 uint64_t modification_time; /* the most recent time the media in this track was modified (in seconds since midnight, Jan. 1, 1904, in UTC time) */
220 uint32_t timescale; /* media timescale: timescale for this media */
221 uint64_t duration; /* the duration of this media expressed in the timescale indicated in this box */
222 /* */
223 #define ISOM_LANG( lang ) ((((lang[0]-0x60)&0x1f)<<10) | (((lang[1]-0x60)&0x1f)<<5) | ((lang[2]-0x60)&0x1f))
224 uint16_t language; /* ISOM: ISO-639-2/T language codes. The first bit is 0.
225 * Each character is packed as the difference between its ASCII value and 0x60.
226 * QTFF: Macintosh language codes is usually used.
227 * Mac's value is less than 0x800 while ISO's value is 0x800 or greater. */
228 int16_t quality; /* ISOM: pre_defined / QTFF: the media's playback quality */
229 } isom_mdhd_t;
231 /* Handler Reference Box
232 * In Media Box, this box is mandatory and (ISOM: should/QTFF: must) come before Media Information Box.
233 * ISOM: this box might be also in Meta Box.
234 * QTFF: this box might be also in Media Information Box. If this box is present there, it must come before Data Information Box. */
235 typedef struct
237 ISOM_FULLBOX_COMMON;
238 uint32_t componentType; /* ISOM: pre_difined = 0
239 * QTFF: 'mhlr' for Media Handler Reference Box and 'dhlr' for Data Handler Reference Box */
240 uint32_t componentSubtype; /* ISOM and QT: when present in Media Handler Reference Box, this field defines the type of media data
241 * QTFF: when present in Data Handler Reference Box, this field defines the data reference type */
242 /* The following fields are defined in QTFF however these fields aren't mentioned in QuickTime SDK and are reserved in the specification.
243 * In ISOM, these fields are still defined as reserved. */
244 uint32_t componentManufacturer; /* vendor indentification / A value of 0 matches any manufacturer. */
245 uint32_t componentFlags; /* flags describing required component capabilities
246 * The high-order 8 bits should be set to 0.
247 * The low-order 24 bits are specific to each component type. */
248 uint32_t componentFlagsMask; /* This field indicates which flags in the componentFlags field are relevant to this operation. */
249 /* */
250 uint8_t *componentName; /* ISOM: a null-terminated string in UTF-8 characters
251 * QTFF: Pascal string */
253 uint32_t componentName_length;
254 } isom_hdlr_t;
257 /** Media Information Header Boxes
258 ** There is a different media information header for each track type
259 ** (corresponding to the media handler-type); the matching header shall be present. **/
260 /* Video Media Header Box
261 * This box contains general presentation information, independent of the coding, for video media. */
262 typedef struct
264 ISOM_FULLBOX_COMMON; /* flags is 1 */
265 uint16_t graphicsmode; /* template: graphicsmode = 0 */
266 uint16_t opcolor[3]; /* template: opcolor = { 0, 0, 0 } */
267 } isom_vmhd_t;
269 /* Sound Media Header Box
270 * This box contains general presentation information, independent of the coding, for audio media. */
271 typedef struct
273 ISOM_FULLBOX_COMMON;
274 int16_t balance; /* a fixed-point 8.8 number that places mono audio tracks in a stereo space. template: balance = 0 */
275 uint16_t reserved;
276 } isom_smhd_t;
278 /* Hint Media Header Box
279 * This box contains general information, independent of the protocol, for hint tracks. (A PDU is a Protocol Data Unit.) */
280 typedef struct
282 ISOM_FULLBOX_COMMON;
283 uint16_t maxPDUsize; /* the size in bytes of the largest PDU in this (hint) stream */
284 uint16_t avgPDUsize; /* the average size of a PDU over the entire presentation */
285 uint32_t maxbitrate; /* the maximum rate in bits/second over any window of one second */
286 uint32_t avgbitrate; /* the average rate in bits/second over the entire presentation */
287 uint32_t reserved;
288 } isom_hmhd_t;
290 /* Null Media Header Box
291 * This box may be used for streams other than visual and audio (e.g., timed metadata streams). */
292 typedef struct
294 /* Streams other than visual and audio may use a Null Media Header Box */
295 ISOM_FULLBOX_COMMON; /* flags is currently all zero */
296 } isom_nmhd_t;
298 /* Generic Media Information Box */
299 typedef struct
301 ISOM_FULLBOX_COMMON;
302 uint16_t graphicsmode;
303 uint16_t opcolor[3];
304 int16_t balance; /* This field is nomally set to 0. */
305 uint16_t reserved; /* Reserved for use by Apple. Set this field to 0. */
306 } isom_gmin_t;
308 /* Text Media Information Box */
309 typedef struct
311 ISOM_BASEBOX_COMMON;
312 int32_t matrix[9]; /* Unkown fields. Default values are probably:
313 * { 0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000 } */
314 } isom_text_t;
316 /* Generic Media Information Header Box */
317 typedef struct
319 ISOM_BASEBOX_COMMON;
320 isom_gmin_t *gmin; /* Generic Media Information Box */
321 isom_text_t *text; /* Text Media Information Box */
322 } isom_gmhd_t;
323 /** **/
325 /* Data Reference Box
326 * name and location fields are expressed in null-terminated string using UTF-8 characters. */
327 typedef struct
329 /* This box is DataEntryUrlBox or DataEntryUrnBox */
330 ISOM_FULLBOX_COMMON; /* flags == 0x000001 means that the media data is in the same file
331 * as the Movie Box containing this data reference. */
332 char *name; /* only for DataEntryUrnBox */
333 char *location; /* a location to find the resource with the given name */
335 uint32_t name_length;
336 uint32_t location_length;
337 } isom_dref_entry_t;
339 typedef struct
341 ISOM_FULLBOX_COMMON;
342 lsmash_entry_list_t *list;
343 } isom_dref_t;
345 /* Data Information Box */
346 typedef struct
348 /* This box is in Media Information Box or Meta Box */
349 ISOM_BASEBOX_COMMON;
350 isom_dref_t *dref; /* Data Reference Box */
351 } isom_dinf_t;
353 /** Sample Description **/
354 /* ES Descriptor Box */
355 struct mp4sys_ES_Descriptor_t; /* FIXME: I think these structs using mp4sys should be placed in isom.c */
356 typedef struct
358 ISOM_FULLBOX_COMMON;
359 struct mp4sys_ES_Descriptor_t *ES;
360 } isom_esds_t;
362 /* AVCDecoderConfigurationRecord */
363 typedef struct
365 #define ISOM_REQUIRES_AVCC_EXTENSION( x ) ((x) == 100 || (x) == 110 || (x) == 122 || (x) == 144)
366 ISOM_BASEBOX_COMMON;
367 uint8_t configurationVersion; /* 1 */
368 uint8_t AVCProfileIndication; /* profile_idc in SPS */
369 uint8_t profile_compatibility;
370 uint8_t AVCLevelIndication; /* level_idc in SPS */
371 uint8_t lengthSizeMinusOne; /* in bytes of the NALUnitLength field. upper 6-bits are reserved as 111111b */
372 uint8_t numOfSequenceParameterSets; /* upper 3-bits are reserved as 111b */
373 lsmash_entry_list_t *sequenceParameterSets; /* SPSs */
374 uint8_t numOfPictureParameterSets;
375 lsmash_entry_list_t *pictureParameterSets; /* PPSs */
376 /* if( ISOM_REQUIRES_AVCC_EXTENSION( AVCProfileIndication ) ) */
377 uint8_t chroma_format; /* chroma_format_idc in SPS / upper 6-bits are reserved as 111111b */
378 uint8_t bit_depth_luma_minus8; /* shall be in the range of 0 to 4 / upper 5-bits are reserved as 11111b */
379 uint8_t bit_depth_chroma_minus8; /* shall be in the range of 0 to 4 / upper 5-bits are reserved as 11111b */
380 uint8_t numOfSequenceParameterSetExt;
381 lsmash_entry_list_t *sequenceParameterSetExt; /* SPSExts */
382 /* */
383 } isom_avcC_t;
385 /* Parameter Set Entry */
386 typedef struct
388 uint16_t parameterSetLength;
389 uint8_t *parameterSetNALUnit;
390 } isom_avcC_ps_entry_t;
392 /* MPEG-4 Bit Rate Box
393 * This box signals the bit rate information of the AVC video stream. */
394 typedef struct
396 ISOM_BASEBOX_COMMON;
397 uint32_t bufferSizeDB; /* the size of the decoding buffer for the elementary stream in bytes */
398 uint32_t maxBitrate; /* the maximum rate in bits/second over any window of one second */
399 uint32_t avgBitrate; /* the average rate in bits/second over the entire presentation */
400 } isom_btrt_t;
402 /* Clean Aperture Box
403 * There are notionally four values in this box and these parameters are represented as a fraction N/D.
404 * Here, we refer to the pair of parameters fooN and fooD as foo.
405 * Considering the pixel dimensions as defined by the VisualSampleEntry width and height.
406 * If picture centre of the image is at pcX and pcY, then horizOff and vertOff are defined as follows:
407 * pcX = horizOff + (width - 1)/2;
408 * pcY = vertOff + (height - 1)/2;
409 * The leftmost/rightmost pixel and the topmost/bottommost line of the clean aperture fall at:
410 * pcX +/- (cleanApertureWidth - 1)/2;
411 * pcY +/- (cleanApertureHeight - 1)/2; */
412 typedef struct
414 ISOM_BASEBOX_COMMON;
415 uint32_t cleanApertureWidthN;
416 uint32_t cleanApertureWidthD;
417 uint32_t cleanApertureHeightN;
418 uint32_t cleanApertureHeightD;
419 int32_t horizOffN;
420 uint32_t horizOffD;
421 int32_t vertOffN;
422 uint32_t vertOffD;
423 } isom_clap_t;
425 /* Pixel Aspect Ratio Box
426 * This box specifies the aspect ratio of a pixel, in arbitrary units.
427 * If a pixel appears H wide and V tall, then hSpacing/vSpacing is equal to H/V.
428 * When adjusting pixel aspect ratio, normally, the horizontal dimension of the video is scaled, if needed. */
429 typedef struct
431 ISOM_BASEBOX_COMMON;
432 uint32_t hSpacing; /* horizontal spacing */
433 uint32_t vSpacing; /* vertical spacing */
434 } isom_pasp_t;
436 /* Color Parameter Box
437 * This box is used to map the numerical values of pixels in the file to a common representation of color
438 * in which images can be correctly compared, combined, and displayed.
439 * This box is defined in QuickTime file format. */
440 typedef struct
442 ISOM_BASEBOX_COMMON;
443 uint32_t color_parameter_type; /* 'nclc' or 'prof' */
444 /* for 'nclc' */
445 uint16_t primaries_index; /* CIE 1931 xy chromaticity coordinates */
446 uint16_t transfer_function_index; /* nonlinear transfer function from RGB to ErEgEb */
447 uint16_t matrix_index; /* matrix from ErEgEb to EyEcbEcr */
448 } isom_colr_t;
450 /* Sample Scale Box
451 * If this box is present and can be interpreted by the decoder,
452 * all samples shall be displayed according to the scaling behaviour that is specified in this box.
453 * Otherwise, all samples are scaled to the size that is indicated by the width and height field in the Track Header Box. */
454 typedef struct
456 ISOM_FULLBOX_COMMON;
457 uint8_t constraint_flag; /* Upper 7-bits are reserved.
458 * If this flag is set, all samples described by this sample entry shall be scaled
459 * according to the method specified by the field 'scale_method'. */
460 uint8_t scale_method; /* The semantics of the values for scale_method are as specified for the 'fit' attribute of regions in SMIL 1.0. */
461 int16_t display_center_x;
462 int16_t display_center_y;
463 } isom_stsl_t;
465 /* Sample Entry */
466 #define ISOM_SAMPLE_ENTRY \
467 ISOM_BASEBOX_COMMON; \
468 uint8_t reserved[6]; \
469 uint16_t data_reference_index;
471 typedef struct
473 ISOM_SAMPLE_ENTRY;
474 } isom_sample_entry_t;
476 /* Mpeg Sample Entry */
477 typedef struct
479 ISOM_SAMPLE_ENTRY;
480 isom_esds_t *esds; /* ES Descriptor Box */
481 } isom_mp4s_entry_t;
483 /* ISOM: Visual Sample Entry / QTFF: Image Description */
484 typedef struct
486 ISOM_SAMPLE_ENTRY;
487 int16_t version; /* ISOM: pre_defined / QTFF: sample description version */
488 int16_t revision_level; /* ISOM: reserved / QTFF: version of the CODEC */
489 int32_t vendor; /* ISOM: pre_defined / QTFF: whose CODEC */
490 uint32_t temporalQuality; /* ISOM: pre_defined / QTFF: the temporal quality factor */
491 uint32_t spatialQuality; /* ISOM: pre_defined / QTFF: the spatial quality factor */
492 /* The width and height are the maximum pixel counts that the codec will deliver.
493 * Since these are counts they do not take into account pixel aspect ratio. */
494 uint16_t width;
495 uint16_t height;
496 /* */
497 uint32_t horizresolution; /* 16.16 fixed-point / template: horizresolution = 0x00480000 / 72 dpi */
498 uint32_t vertresolution; /* 16.16 fixed-point / template: vertresolution = 0x00480000 / 72 dpi */
499 uint32_t dataSize; /* ISOM: reserved / QTFF: if known, the size of data for this descriptor */
500 uint16_t frame_count; /* frame per sample / template: frame_count = 1 */
501 char compressorname[33]; /* a fixed 32-byte field, with the first byte set to the number of bytes to be displayed */
502 uint16_t depth; /* ISOM: template: depth = 0x0018
503 * AVC : 0x0018: colour with no alpha
504 * 0x0028: grayscale with no alpha
505 * 0x0020: gray or colour with alpha
506 * QTFF: depth of this data (1-32) or (33-40 grayscale) */
507 int16_t color_table_ID; /* ISOM: template: pre_defined = -1
508 * QTFF: color table ID
509 * If this field is set to 0, the default color table should be used for the specified depth
510 * If the color table ID is set to 0, a color table is contained within the sample description itself.
511 * The color table immediately follows the color table ID field. */
512 /* common extensions */
513 isom_clap_t *clap; /* Clean Aperture Box @ optional */
514 isom_pasp_t *pasp; /* Pixel Aspect Ratio Box @ optional */
515 isom_colr_t *colr; /* ISOM: null / QTFF: Color Parameter Box @ optional */
516 isom_stsl_t *stsl; /* ISOM: Sample Scale Box @ optional / QTFF: null */
517 /* MP4 specific extension */
518 isom_esds_t *esds; /* ES Descriptor Box */
519 /* AVC specific extensions */
520 isom_avcC_t *avcC; /* AVCDecoderConfigurationRecord */
521 isom_btrt_t *btrt; /* MPEG-4 Bit Rate Box @ optional */
522 } isom_visual_entry_t;
524 /* Format Box
525 * This box shows the data format of the stored sound media.
526 * ISO base media file format also defines the same four-character-code for the type field,
527 * however, that is used to indicate original sample description of the media when a protected sample entry is used. */
528 typedef struct
530 ISOM_BASEBOX_COMMON;
531 uint32_t data_format; /* copy of sample description type */
532 } isom_frma_t;
534 /* Audio Endian Box */
535 typedef struct
537 ISOM_BASEBOX_COMMON;
538 int16_t littleEndian;
539 } isom_enda_t;
541 /* MPEG-4 Audio Box */
542 typedef struct
544 ISOM_BASEBOX_COMMON;
545 uint32_t unknown; /* always 0? */
546 } isom_mp4a_t;
548 /* Terminator Box
549 * This box is present to indicate the end of the sound description. It contains no data. */
550 typedef struct
552 ISOM_BASEBOX_COMMON; /* size = 8, type = 0x00000000 */
553 } isom_terminator_t;
555 /* Sound Information Decompression Parameters Box
556 * This box provides the ability to store data specific to a given audio decompressor in the sound description.
557 * The contents of this box are dependent on the audio decompressor. */
558 typedef struct
560 ISOM_BASEBOX_COMMON;
561 isom_frma_t *frma; /* Format Box */
562 isom_enda_t *enda; /* Audio Endian Box */
563 isom_mp4a_t *mp4a; /* MPEG-4 Audio Box */
564 isom_esds_t *esds; /* ES Descriptor Box */
565 isom_terminator_t *terminator; /* Terminator Box */
567 uint32_t exdata_length;
568 void *exdata;
569 } isom_wave_t;
571 /* Channel Compositor Box */
572 typedef struct
574 uint32_t channelLabel; /* the channelLabel that describes the channel */
575 uint32_t channelFlags; /* flags that control the interpretation of coordinates */
576 uint32_t coordinates[3]; /* an ordered triple that specifies a precise speaker location / 32-bit floating point */
577 } isom_channel_description_t;
579 typedef struct
581 ISOM_FULLBOX_COMMON;
582 uint32_t channelLayoutTag; /* the channelLayoutTag indicates the layout */
583 uint32_t channelBitmap; /* If channelLayoutTag is set to 0x00010000, this field is the channel usage bitmap. */
584 uint32_t numberChannelDescriptions; /* the number of items in the Channel Descriptions array */
585 /* Channel Descriptions array */
586 isom_channel_description_t *channelDescriptions;
587 } isom_chan_t;
589 /* ISOM: Audio Sample Entry / QTFF: Sound Description */
590 typedef struct
592 ISOM_SAMPLE_ENTRY;
593 int16_t version; /* ISOM: reserved
594 * QTFF: sample description version
595 * version = 0 supports only 'raw ' or 'twos' audio format.
596 * version = 1 is used to support out-of-band configuration settings for decompression.
597 * version = 2 is used to support high samplerate or 3 or more multichannel audio. */
598 int16_t revision_level; /* ISOM: reserved / QTFF: version of the CODEC */
599 int32_t vendor; /* ISOM: reserved / QTFF: whose CODEC */
600 uint16_t channelcount; /* ISOM: template: channelcount = 2
601 * QTFF: the number of audio channels
602 * Allowable values are 1 (mono) or 2 (stereo).
603 * For more than 2, set this field to 3 and use numAudioChannels instead of this field. */
604 uint16_t samplesize; /* ISOM: template: samplesize = 16
605 * QTFF: the number of bits in each uncompressed sample for a single channel
606 * Allowable values are 8 or 16.
607 * For non-mod8, set this field to 16 and use constBitsPerChannel instead of this field.
608 * For more than 16, set this field to 16 and use bytesPerPacket instead of this field. */
609 int16_t compression_ID; /* ISOM: pre_defined
610 * QTFF: version = 0 -> must be set to 0.
611 * version = 2 -> must be set to -2. */
612 uint16_t packet_size; /* ISOM: reserved / QTFF: must be set to 0. */
613 uint32_t samplerate; /* the sampling rate expressed as a 16.16 fixed-point number
614 * ISOM: template: samplerate = {default samplerate of media}<<16
615 * QTFF: the integer portion should match the media's timescale.
616 * If this field is invalid because of higher samplerate,
617 * then set this field to 0x00010000 and use audioSampleRate instead of this field. */
618 /* version 1 fields
619 * These fields are for description of the compression ratio of fixed ratio audio compression algorithms.
620 * If these fields are not used, they are set to 0. */
621 uint32_t samplesPerPacket; /* For compressed audio, be set to the number of uncompressed frames generated by a compressed frame.
622 * For uncompressed audio, shall be set to 1. */
623 uint32_t bytesPerPacket; /* the number of bytes in a sample for a single channel */
624 uint32_t bytesPerFrame; /* the number of bytes in a frame */
625 uint32_t bytesPerSample; /* 8-bit audio: 1, other audio: 2 */
626 /* version 2 fields
627 * LPCMFrame: one sample from each channel.
628 * AudioPacket: For uncompressed audio, an AudioPacket is simply one LPCMFrame.
629 * For compressed audio, an AudioPacket is the natural compressed access unit of that format. */
630 uint32_t sizeOfStructOnly; /* offset to extensions */
631 uint64_t audioSampleRate; /* 64-bit floating point */
632 uint32_t numAudioChannels; /* any channel assignment info will be in Channel Compositor Box. */
633 int32_t always7F000000; /* always 0x7F000000 */
634 uint32_t constBitsPerChannel; /* only set if constant (and only for uncompressed audio) */
635 uint32_t formatSpecificFlags;
636 uint32_t constBytesPerAudioPacket; /* only set if constant */
637 uint32_t constLPCMFramesPerAudioPacket; /* only set if constant */
638 /* extensions */
639 isom_esds_t *esds; /* ISOM: ES Descriptor Box / QTFF: null */
640 isom_wave_t *wave; /* ISOM: null / QTFF: Sound Information Decompression Parameters Box */
641 isom_chan_t *chan; /* ISOM: null / QTFF: Channel Compositor Box @ optional */
643 uint32_t exdata_length;
644 void *exdata;
645 lsmash_audio_summary_t summary;
646 } isom_audio_entry_t;
648 /* Hint Sample Entry */
649 #define ISOM_HINT_SAMPLE_ENTRY \
650 ISOM_SAMPLE_ENTRY; \
651 uint8_t *data;
653 typedef struct
655 ISOM_HINT_SAMPLE_ENTRY;
656 uint32_t data_length;
657 } isom_hint_entry_t;
659 /* Metadata Sample Entry */
660 #define ISOM_METADATA_SAMPLE_ENTRY \
661 ISOM_SAMPLE_ENTRY;
663 typedef struct
665 ISOM_METADATA_SAMPLE_ENTRY;
666 } isom_metadata_entry_t;
668 /* QuickTime Text Sample Description */
669 typedef struct
671 ISOM_SAMPLE_ENTRY;
672 int32_t displayFlags;
673 int32_t textJustification;
674 uint16_t bgColor[3]; /* background RGB color */
675 /* defaultTextBox */
676 int16_t top;
677 int16_t left;
678 int16_t bottom;
679 int16_t right;
680 /* defaultStyle */
681 int32_t scrpStartChar; /* starting character position */
682 int16_t scrpHeight;
683 int16_t scrpAscent;
684 int16_t scrpFont;
685 uint16_t scrpFace; /* only first 8-bits are used */
686 int16_t scrpSize;
687 uint16_t scrpColor[3]; /* foreground RGB color */
688 /* defaultFontName is Pascal string */
689 uint8_t font_name_length;
690 char *font_name;
691 } isom_text_entry_t;
693 /* FontRecord */
694 typedef struct
696 uint16_t font_ID;
697 /* Pascal string */
698 uint8_t font_name_length;
699 char *font_name;
700 } isom_font_record_t;
702 /* Font Table Box */
703 typedef struct
705 ISOM_BASEBOX_COMMON;
706 /* FontRecord
707 * entry_count is uint16_t. */
708 lsmash_entry_list_t *list;
709 } isom_ftab_t;
711 /* Timed Text Sample Entry */
712 typedef struct
714 ISOM_SAMPLE_ENTRY;
715 uint32_t displayFlags;
716 int8_t horizontal_justification;
717 int8_t vertical_justification;
718 uint8_t background_color_rgba[4];
719 /* BoxRecord default_text_box */
720 int16_t top;
721 int16_t left;
722 int16_t bottom;
723 int16_t right;
724 /* StyleRecord default_style */
725 uint16_t startChar; /* always 0 */
726 uint16_t endChar; /* always 0 */
727 uint16_t font_ID;
728 uint8_t face_style_flags;
729 uint8_t font_size;
730 uint8_t text_color_rgba[4];
731 /* Font Table Box font_table */
732 isom_ftab_t *ftab;
733 } isom_tx3g_entry_t;
735 /* Sample Description Box */
736 typedef struct
738 ISOM_FULLBOX_COMMON;
739 lsmash_entry_list_t *list;
740 } isom_stsd_t;
741 /** **/
743 /* Decoding Time to Sample Box
744 * This box contains a compact version of a table that allows indexing from decoding time to sample number.
745 * Each entry in the table gives the number of consecutive samples with the same time delta, and the delta of those samples.
746 * By adding the deltas a complete time-to-sample map may be built.
747 * All samples must have non-zero durations except for the last one.
748 * The sum of all deltas gives the media duration in the track (not mapped to the movie timescale, and not considering any edit list).
749 * DTS is an abbreviation of 'decoding time stamp'. */
750 typedef struct
752 uint32_t sample_count; /* number of consecutive samples that have the given sample_delta */
753 uint32_t sample_delta; /* DTS[0] = 0; DTS[n+1] = DTS[n] + sample_delta[n]; */
754 } isom_stts_entry_t;
756 typedef struct
758 ISOM_FULLBOX_COMMON;
759 lsmash_entry_list_t *list;
760 } isom_stts_t;
762 /* Composition Time to Sample Box
763 * This box provides the offset between decoding time and composition time.
764 * CTS is an abbreviation of 'composition time stamp'.
765 * This box is optional and must only be present if DTS and CTS differ for any samples.
766 * ISOM: if version is set to 1, sample_offset is signed 32-bit integer.
767 * QTFF: sample_offset is always signed 32-bit integer. */
768 typedef struct
770 uint32_t sample_count; /* number of consecutive samples that have the given sample_offset */
771 uint32_t sample_offset; /* CTS[n] = DTS[n] + sample_offset[n]; */
772 } isom_ctts_entry_t;
774 typedef struct
776 ISOM_FULLBOX_COMMON;
777 lsmash_entry_list_t *list;
778 } isom_ctts_t;
780 /* Composition to Decode Box (Composition Shift Least Greatest Box)
781 * This box may be used to relate the composition and decoding timelines,
782 * and deal with some of the ambiguities that signed composition offsets introduce. */
783 typedef struct
785 ISOM_FULLBOX_COMMON;
786 int32_t compositionToDTSShift; /* If this value is added to the composition times (as calculated by the CTS offsets from the DTS),
787 * then for all samples, their CTS is guaranteed to be greater than or equal to their DTS,
788 * and the buffer model implied by the indicated profile/level will be honoured;
789 * if leastDecodeToDisplayDelta is positive or zero, this field can be 0;
790 * otherwise it should be at least (- leastDecodeToDisplayDelta). */
791 int32_t leastDecodeToDisplayDelta; /* the smallest sample_offset in this track */
792 int32_t greatestDecodeToDisplayDelta; /* the largest sample_offset in this track */
793 int32_t compositionStartTime; /* the smallest CTS for any sample */
794 int32_t compositionEndTime; /* the CTS plus the composition duration, of the sample with the largest CTS in this track */
795 } isom_cslg_t;
797 /* Sample Size Box
798 * This box contains the sample count and a table giving the size in bytes of each sample.
799 * The total number of samples in the media is always indicated in the sample_count.
800 * Note: a sample size of zero is not prohibited in general, but it must be valid and defined for the coding system,
801 * as defined by the sample entry, that the sample belongs to. */
802 typedef struct
804 uint32_t entry_size; /* the size of a sample */
805 } isom_stsz_entry_t;
807 typedef struct
809 ISOM_FULLBOX_COMMON;
810 uint32_t sample_size; /* If this field is set to 0, then the samples have different sizes. */
811 uint32_t sample_count; /* the number of samples in the track */
812 lsmash_entry_list_t *list; /* available if sample_size == 0 */
813 } isom_stsz_t;
815 /* Sync Sample Box
816 * If this box is not present, every sample is a random access point.
817 * In AVC streams, this box cannot point non-IDR samples.
818 * The table is arranged in strictly increasing order of sample number. */
819 typedef struct
821 uint32_t sample_number; /* the numbers of the samples that are random access points in the stream. */
822 } isom_stss_entry_t;
824 typedef struct
826 ISOM_FULLBOX_COMMON;
827 lsmash_entry_list_t *list;
828 } isom_stss_t;
830 /* Partial Sync Sample Box
831 * Tip from QT engineering - Open-GOP intra frames need to be marked as "partial sync samples".
832 * Partial sync frames perform a partial reset of inter-frame dependencies;
833 * decoding two partial sync frames and the non-droppable difference frames between them is
834 * sufficient to prepare a decompressor for correctly decoding the difference frames that follow. */
835 typedef struct
837 uint32_t sample_number; /* the numbers of the samples that are partial sync samples in the stream. */
838 } isom_stps_entry_t;
840 typedef struct
842 ISOM_FULLBOX_COMMON;
843 lsmash_entry_list_t *list;
844 } isom_stps_t;
846 /* Independent and Disposable Samples Box */
847 typedef struct
849 unsigned is_leading : 2; /* ISOM: leading / QTFF: samples later in decode order may have earlier display times */
850 unsigned sample_depends_on : 2; /* independency */
851 unsigned sample_is_depended_on : 2; /* disposable */
852 unsigned sample_has_redundancy : 2; /* redundancy */
853 } isom_sdtp_entry_t;
855 typedef struct
857 ISOM_FULLBOX_COMMON;
858 /* According to the specification, the size of the table, sample_count, doesn't exist in this box.
859 * Instead of this, it is taken from the sample_count in the stsz or the stz2 box. */
860 lsmash_entry_list_t *list;
861 } isom_sdtp_t;
863 /* Sample To Chunk Box
864 * This box can be used to find the chunk that contains a sample, its position, and the associated sample description.
865 * The table is compactly coded. Each entry gives the index of the first chunk of a run of chunks with the same characteristics.
866 * By subtracting one entry here from the previous one, you can compute how many chunks are in this run.
867 * You can convert this to a sample count by multiplying by the appropriate samples_per_chunk. */
868 typedef struct
870 uint32_t first_chunk; /* the index of the first chunk in this run of chunks that share the same samples_per_chunk and sample_description_index */
871 uint32_t samples_per_chunk; /* the number of samples in each of these chunks */
872 uint32_t sample_description_index; /* the index of the sample entry that describes the samples in this chunk */
873 } isom_stsc_entry_t;
875 typedef struct
877 ISOM_FULLBOX_COMMON;
878 lsmash_entry_list_t *list;
879 } isom_stsc_t;
881 /* Chunk Offset Box
882 * chunk_offset is the offset of the start of a chunk into its containing media file.
883 * Offsets are file offsets, not the offset into any box within the file. */
884 typedef struct
886 uint32_t chunk_offset;
887 } isom_stco_entry_t;
889 typedef struct
891 /* for large presentations */
892 uint64_t chunk_offset;
893 } isom_co64_entry_t;
895 typedef struct
897 ISOM_FULLBOX_COMMON; /* type = 'stco': 32-bit chunk offsets / type = 'co64': 64-bit chunk offsets */
898 lsmash_entry_list_t *list;
900 uint8_t large_presentation; /* Set 1 to this if 64-bit chunk-offset are needed. */
901 } isom_stco_t; /* share with co64 box */
903 /* Sample Group Description Box
904 * This box gives information about the characteristics of sample groups. */
905 typedef struct
907 ISOM_FULLBOX_COMMON; /* Use of version 0 entries is deprecated. */
908 uint32_t grouping_type; /* an integer that identifies the sbgp that is associated with this sample group description */
909 uint32_t default_length; /* the length of every group entry (if the length is constant), or zero (if it is variable)
910 * This field is available only if version == 1. */
911 lsmash_entry_list_t *list;
912 } isom_sgpd_entry_t;
914 /* Random Access Entry
915 * Samples marked by this group must be random access points, and may also be sync points. */
916 typedef struct
918 /* grouping_type is 'rap ' */
919 uint32_t description_length; /* This field is available only if version == 1 and default_length == 0. */
920 unsigned num_leading_samples_known : 1; /* the value of one indicates that the number of leading samples is known for each sample in this group,
921 * and the number is specified by num_leading_samples. */
922 unsigned num_leading_samples : 7; /* the number of leading samples for each sample in this group
923 * Note: when num_leading_samples_known is equal to 0, this field should be ignored. */
924 } isom_rap_entry_t;
926 /* Roll Recovery Entry
927 * This grouping type is defined as that group of samples having the same roll distance. */
928 typedef struct
930 /* grouping_type is 'roll' */
931 uint32_t description_length; /* This field is available only if version == 1 and default_length == 0. */
932 int16_t roll_distance; /* the number of samples that must be decoded in order for a sample to be decoded correctly
933 * The value zero must not be used. */
934 } isom_roll_entry_t;
936 /* Sample to Group Box
937 * This box is used to find the group that a sample belongs to and the associated description of that sample group. */
938 typedef struct
940 ISOM_FULLBOX_COMMON;
941 uint32_t grouping_type; /* Links it to its sample group description table with the same value for grouping type. */
942 uint32_t grouping_type_parameter; /* an indication of the sub-type of the grouping
943 * This field is available only if version == 1. */
944 lsmash_entry_list_t *list;
945 } isom_sbgp_entry_t;
947 typedef struct
949 uint32_t sample_count; /* the number of consecutive samples with the same sample group descriptor */
950 uint32_t group_description_index; /* the index of the sample group entry which describes the samples in this group
951 * The index ranges from 1 to the number of sample group entries in the Sample Group Description Box,
952 * or takes the value 0 to indicate that this sample is a member of no group of this type. */
953 } isom_group_assignment_entry_t;
955 /* Sample Table Box */
956 typedef struct
958 ISOM_BASEBOX_COMMON;
959 isom_stsd_t *stsd; /* Sample Description Box */
960 isom_stts_t *stts; /* Decoding Time to Sample Box */
961 isom_ctts_t *ctts; /* Composition Time to Sample Box */
962 isom_cslg_t *cslg; /* ISOM: Composition to Decode Box / QTFF: Composition Shift Least Greatest Box */
963 isom_stss_t *stss; /* Sync Sample Box */
964 isom_stps_t *stps; /* ISOM: null / QTFF: Partial Sync Sample Box */
965 isom_sdtp_t *sdtp; /* Independent and Disposable Samples Box */
966 isom_stsc_t *stsc; /* Sample To Chunk Box */
967 isom_stsz_t *stsz; /* Sample Size Box */
968 isom_stco_t *stco; /* Chunk Offset Box */
969 lsmash_entry_list_t *sgpd_list; /* ISOM: Sample Group Description Boxes / QTFF: null */
970 lsmash_entry_list_t *sbgp_list; /* ISOM: Sample To Group Boxes / QTFF: null */
971 } isom_stbl_t;
973 /* Media Information Box */
974 typedef struct
976 ISOM_BASEBOX_COMMON;
977 /* Media Information Header Boxes */
978 isom_vmhd_t *vmhd; /* Video Media Header Box */
979 isom_smhd_t *smhd; /* Sound Media Header Box */
980 isom_hmhd_t *hmhd; /* ISOM: Hint Media Header Box / QTFF: null */
981 isom_nmhd_t *nmhd; /* ISOM: Null Media Header Box / QTFF: null */
982 isom_gmhd_t *gmhd; /* ISOM: null / QTFF: Generic Media Information Header Box */
983 /* */
984 isom_hdlr_t *hdlr; /* ISOM: null / QTFF: Data Handler Reference Box
985 * Note: this box must come before Data Information Box. */
986 isom_dinf_t *dinf; /* Data Information Box */
987 isom_stbl_t *stbl; /* Sample Table Box */
988 } isom_minf_t;
990 /* Media Box */
991 typedef struct
993 ISOM_BASEBOX_COMMON;
994 isom_mdhd_t *mdhd; /* Media Header Box */
995 isom_hdlr_t *hdlr; /* ISOM: Handler Reference Box / QTFF: Media Handler Reference Box
996 * Note: this box must come before Media Information Box. */
997 isom_minf_t *minf; /* Media Information Box */
998 } isom_mdia_t;
1000 /* Movie Header Box
1001 * This box defines overall information which is media-independent, and relevant to the entire presentation considered as a whole. */
1002 typedef struct
1004 ISOM_FULLBOX_COMMON; /* version is either 0 or 1 */
1005 /* version == 0: uint64_t -> uint32_t */
1006 uint64_t creation_time; /* the creation time of the presentation (in seconds since midnight, Jan. 1, 1904, in UTC time) */
1007 uint64_t modification_time; /* the most recent time the presentation was modified (in seconds since midnight, Jan. 1, 1904, in UTC time) */
1008 uint32_t timescale; /* movie timescale: timescale for the entire presentation */
1009 uint64_t duration; /* the duration, expressed in movie timescale, of the longest track */
1010 /* The following fields are treated as reserved in MP4 version 1. */
1011 int32_t rate; /* fixed point 16.16 number. 0x00010000 is normal forward playback. */
1012 int16_t volume; /* fixed point 8.8 number. 0x0100 is full volume. */
1013 int16_t reserved;
1014 int32_t preferredLong[2]; /* ISOM: reserved / QTFF: unknown */
1015 int32_t matrix[9]; /* transformation matrix for the video */
1016 /* The following fields are defined in QuickTime file format.
1017 * In ISO Base Media file format, these fields are treated as pre_defined. */
1018 int32_t previewTime; /* the time value in the movie at which the preview begins */
1019 int32_t previewDuration; /* the duration of the movie preview in movie timescale units */
1020 int32_t posterTime; /* the time value of the time of the movie poster */
1021 int32_t selectionTime; /* the time value for the start time of the current selection */
1022 int32_t selectionDuration; /* the duration of the current selection in movie timescale units */
1023 int32_t currentTime; /* the time value for current time position within the movie */
1024 /* */
1025 uint32_t next_track_ID; /* larger than the largest track-ID in use */
1026 } isom_mvhd_t;
1028 /* Object Descriptor Box
1029 * Note that this box is mandatory under 14496-1:2001 (mp41) while not mandatory under 14496-14:2003 (mp42). */
1030 struct mp4sys_ObjectDescriptor_t; /* FIXME: I think these structs using mp4sys should be placed in isom.c */
1031 typedef struct
1033 ISOM_FULLBOX_COMMON;
1034 struct mp4sys_ObjectDescriptor_t *OD;
1035 } isom_iods_t;
1037 /* Media Data Box
1038 * This box contains the media data.
1039 * A presentation may contain zero or more Media Data Boxes.*/
1040 typedef struct
1042 ISOM_BASEBOX_COMMON; /* If size is 0, then this box is the last box. */
1044 uint64_t placeholder_pos; /* placeholder position for largesize */
1045 } isom_mdat_t;
1047 /* Free Space Box
1048 * The contents of a free-space box are irrelevant and may be ignored without affecting the presentation. */
1049 typedef struct
1051 ISOM_BASEBOX_COMMON; /* type is 'free' or 'skip' */
1052 uint32_t length;
1053 uint8_t *data;
1054 } isom_free_t;
1056 typedef isom_free_t isom_skip_t;
1058 /* Chapter List Box
1059 * This box is NOT defined in the ISO/MPEG-4 specs. */
1060 typedef struct
1062 uint64_t start_time; /* version = 0: expressed in movie timescale units
1063 * version = 1: expressed in 100 nanoseconds */
1064 /* Chapter name is Pascal string */
1065 uint8_t chapter_name_length;
1066 char *chapter_name;
1067 } isom_chpl_entry_t;
1069 typedef struct
1071 ISOM_FULLBOX_COMMON; /* version = 0 is defined in F4V file format. */
1072 uint8_t unknown; /* only available under version = 1 */
1073 lsmash_entry_list_t *list; /* if version is set to 0, entry_count is uint8_t. */
1074 } isom_chpl_t;
1076 typedef struct
1078 char *chapter_name;
1079 uint64_t start_time;
1080 } isom_chapter_entry_t;
1082 /* User Data Box
1083 * This box is a container box for informative user-data.
1084 * This user data is formatted as a set of boxes with more specific box types, which declare more precisely their content.
1085 * QTFF: for historical reasons, this box is optionally terminated by a 32-bit integer set to 0. */
1086 typedef struct
1088 ISOM_BASEBOX_COMMON;
1089 isom_chpl_t *chpl; /* Chapter List Box */
1090 } isom_udta_t;
1092 /** Caches for handling tracks **/
1093 typedef struct
1095 uint32_t chunk_number; /* chunk number */
1096 uint32_t sample_description_index; /* sample description index */
1097 uint64_t first_dts; /* the first DTS in chunk */
1098 uint64_t pool_size; /* the sum of the size of samples in the pool */
1099 lsmash_entry_list_t *pool; /* samples pooled to interleave */
1100 } isom_chunk_t;
1102 typedef struct
1104 uint64_t dts;
1105 uint64_t cts;
1106 } isom_timestamp_t;
1108 typedef struct
1110 isom_group_assignment_entry_t *assignment; /* the address corresponding to the entry in Sample to Group Box */
1111 isom_rap_entry_t *random_access; /* the address corresponding to the random access entry in Sample Group Description Box */
1112 uint8_t is_prev_rap; /* whether the previous sample is a random access point or not */
1113 } isom_rap_group_t;
1115 typedef struct
1117 isom_group_assignment_entry_t *assignment; /* the address corresponding to the entry in Sample to Group Box */
1118 uint32_t first_sample; /* the number of the first sample of the group */
1119 uint32_t recovery_point; /* the identifier necessary for the recovery from its starting point to be completed */
1120 uint8_t delimited; /* the flag if the sample_count is determined */
1121 uint8_t described; /* the flag if the group description is determined */
1122 } isom_roll_group_t;
1124 typedef struct
1126 lsmash_entry_list_t *pool; /* grouping pooled to delimit and describe */
1127 } isom_grouping_t;
1129 typedef struct
1131 uint8_t has_samples;
1132 uint32_t traf_number;
1133 uint32_t last_duration; /* the last sample duration in this track fragment */
1134 uint64_t largest_cts; /* the largest CTS in this track fragments */
1135 } isom_fragment_t;
1137 typedef struct
1139 uint8_t all_sync; /* if all samples are sync sample */
1140 isom_chunk_t chunk;
1141 isom_timestamp_t timestamp;
1142 isom_grouping_t roll;
1143 isom_rap_group_t *rap;
1144 isom_fragment_t *fragment;
1145 } isom_cache_t;
1147 /** Movie Fragments Boxes **/
1148 /* Track Fragments Flags ('tf_flags') */
1149 typedef enum
1151 ISOM_TF_FLAGS_BASE_DATA_OFFSET_PRESENT = 0x000001, /* base_data_offset field exists. */
1152 ISOM_TF_FLAGS_SAMPLE_DESCRIPTION_INDEX_PRESENT = 0x000002, /* sample_description_index field exists. */
1153 ISOM_TF_FLAGS_DEFAULT_SAMPLE_DURATION_PRESENT = 0x000008, /* default_sample_duration field exists. */
1154 ISOM_TF_FLAGS_DEFAULT_SAMPLE_SIZE_PRESENT = 0x000010, /* default_sample_size field exists. */
1155 ISOM_TF_FLAGS_DEFAULT_SAMPLE_FLAGS_PRESENT = 0x000020, /* default_sample_flags field exists. */
1156 ISOM_TF_FLAGS_DURATION_IS_EMPTY = 0x010000, /* There are no samples for this time interval. */
1157 } isom_tf_flags_code;
1159 /* Track Run Flags ('tr_flags') */
1160 typedef enum
1162 ISOM_TR_FLAGS_DATA_OFFSET_PRESENT = 0x000001, /* data_offset field exists. */
1163 ISOM_TR_FLAGS_FIRST_SAMPLE_FLAGS_PRESENT = 0x000004, /* first_sample_flags field exists. */
1164 ISOM_TR_FLAGS_SAMPLE_DURATION_PRESENT = 0x000100, /* sample_duration field exists. */
1165 ISOM_TR_FLAGS_SAMPLE_SIZE_PRESENT = 0x000200, /* sample_size field exists. */
1166 ISOM_TR_FLAGS_SAMPLE_FLAGS_PRESENT = 0x000400, /* sample_flags field exists. */
1167 ISOM_TR_FLAGS_SAMPLE_COMPOSITION_TIME_OFFSET_PRESENT = 0x000800, /* sample_composition_time_offset field exists. */
1168 } isom_tr_flags_code;
1170 /* Sample Flags */
1171 typedef struct
1173 unsigned reserved : 4;
1174 /* The definition of the following fields is quite the same as Independent and Disposable Samples Box. */
1175 unsigned is_leading : 2;
1176 unsigned sample_depends_on : 2;
1177 unsigned sample_is_depended_on : 2;
1178 unsigned sample_has_redundancy : 2;
1179 /* */
1180 unsigned sample_padding_value : 3; /* the number of bits at the end of this sample */
1181 unsigned sample_is_non_sync_sample : 1; /* 0 value means this sample is sync sample. */
1182 uint16_t sample_degradation_priority;
1183 } isom_sample_flags_t;
1185 /* Movie Extends Header Box
1186 * This box is omitted when used in live streaming.
1187 * If this box is not present, the overall duration must be computed by examining each fragment. */
1188 typedef struct
1190 ISOM_FULLBOX_COMMON;
1191 /* version == 0: uint64_t -> uint32_t */
1192 uint64_t fragment_duration; /* the duration of the longest track, in the timescale indicated in the Movie Header Box, including movie fragments. */
1193 } isom_mehd_t;
1195 /* Track Extends Box
1196 * This box sets up default values used by the movie fragments. */
1197 typedef struct
1199 ISOM_FULLBOX_COMMON;
1200 uint32_t track_ID; /* identifier of the track; this shall be the track ID of a track in the Movie Box */
1201 uint32_t default_sample_description_index;
1202 uint32_t default_sample_duration;
1203 uint32_t default_sample_size;
1204 isom_sample_flags_t default_sample_flags;
1205 } isom_trex_entry_t;
1207 /* Movie Extends Box
1208 * This box warns readers that there might be Movie Fragment Boxes in this file. */
1209 typedef struct
1211 ISOM_BASEBOX_COMMON;
1212 isom_mehd_t *mehd; /* Movie Extends Header Box / omitted when used in live streaming */
1213 lsmash_entry_list_t *trex_list; /* Track Extends Box */
1215 uint64_t placeholder_pos; /* placeholder position for Movie Extends Header Box */
1216 } isom_mvex_t;
1218 /* Movie Fragment Header Box
1219 * This box contains a sequence number, as a safety check.
1220 * The sequence number 'usually' starts at 1 and must increase for each movie fragment in the file, in the order in which they occur. */
1221 typedef struct
1223 ISOM_FULLBOX_COMMON;
1224 uint32_t sequence_number; /* the ordinal number of this fragment, in increasing order */
1225 } isom_mfhd_t;
1227 /* Track Fragment Header Box
1228 * Each movie fragment can contain zero or more fragments for each track;
1229 * and a track fragment can contain zero or more contiguous runs of samples.
1230 * This box sets up information and defaults used for those runs of samples. */
1231 typedef struct
1233 ISOM_FULLBOX_COMMON; /* flags field is used for 'tf_flags'. */
1234 uint32_t track_ID;
1235 /* all the following are optional fields */
1236 uint64_t base_data_offset; /* an explicit anchor for the data offsets in each track run
1237 * Offsets are file offsets as like as chunk_offset in Chunk Offset Box.
1238 * If not provided, the base_data_offset for the first track in the movie fragment is the position
1239 * of the first byte of the enclosing Movie Fragment Box, and for second and subsequent track fragments,
1240 * the default is the end of the data defined by the preceding fragment.
1241 * To avoid the case this field might overflow, e.g. semi-permanent live streaming and broadcasting,
1242 * you shall not use this optional field. */
1243 uint32_t sample_description_index; /* override default_sample_description_index in Track Extends Box */
1244 uint32_t default_sample_duration; /* override default_sample_duration in Track Extends Box */
1245 uint32_t default_sample_size; /* override default_sample_size in Track Extends Box */
1246 isom_sample_flags_t default_sample_flags; /* override default_sample_flags in Track Extends Box */
1247 } isom_tfhd_t;
1249 /* Track Fragment Run Box
1250 * Within the Track Fragment Box, there are zero or more Track Fragment Run Boxes.
1251 * If the duration-is-empty flag is set in the tf_flags, there are no track runs.
1252 * A track run documents a contiguous set of samples for a track. */
1253 typedef struct
1255 ISOM_FULLBOX_COMMON; /* flags field is used for 'tr_flags'. */
1256 uint32_t sample_count; /* the number of samples being added in this run; also the number of rows in the following table */
1257 /* The following are optional fields. */
1258 int32_t data_offset; /* This value is added to the implicit or explicit data_offset established in the Track Fragment Header Box.
1259 * If this field is not present, then the data for this run starts immediately after the data of the previous run,
1260 * or at the base_data_offset defined by the Track Fragment Header Box if this is the first run in a track fragment. */
1261 isom_sample_flags_t first_sample_flags; /* a set of flags for the first sample only of this run */
1262 lsmash_entry_list_t *optional; /* all fields in this array are optional. */
1263 } isom_trun_entry_t;
1265 typedef struct
1267 /* If the following fields is present, each field overrides default value described in Track Fragment Header Box or Track Extends Box. */
1268 uint32_t sample_duration; /* override default_sample_duration */
1269 uint32_t sample_size; /* override default_sample_size */
1270 isom_sample_flags_t sample_flags; /* override default_sample_flags */
1271 /* */
1272 uint32_t sample_composition_time_offset; /* composition time offset */
1273 } isom_trun_optional_row_t;
1275 /* Track Fragment Box */
1276 typedef struct
1278 ISOM_BASEBOX_COMMON;
1279 isom_tfhd_t *tfhd; /* Track Fragment Header Box */
1280 lsmash_entry_list_t *trun_list; /* Track Fragment Run Box List
1281 * If the duration-is-empty flag is set in the tf_flags, there are no track runs. */
1283 isom_cache_t *cache;
1284 } isom_traf_entry_t;
1286 /* Movie Fragment Box */
1287 typedef struct
1289 ISOM_BASEBOX_COMMON;
1290 isom_mfhd_t *mfhd; /* Movie Fragment Header Box */
1291 lsmash_entry_list_t *traf_list; /* Track Fragment Box List */
1292 } isom_moof_entry_t;
1294 /* Track Fragment Random Access Box
1295 * Each entry in this box contains the location and the presentation time of the random accessible sample.
1296 * Note that not every random accessible sample in the track needs to be listed in the table.
1297 * The absence of this box does not mean that all the samples are sync samples. */
1298 typedef struct
1300 ISOM_FULLBOX_COMMON;
1301 uint32_t track_ID;
1302 unsigned int reserved : 26;
1303 unsigned int length_size_of_traf_num : 2; /* the length in byte of the traf_number field minus one */
1304 unsigned int length_size_of_trun_num : 2; /* the length in byte of the trun_number field minus one */
1305 unsigned int length_size_of_sample_num : 2; /* the length in byte of the sample_number field minus one */
1306 uint32_t number_of_entry; /* the number of the entries for this track
1307 * Value zero indicates that every sample is a random access point and no table entry follows. */
1308 lsmash_entry_list_t *list; /* entry_count corresponds to number_of_entry. */
1309 } isom_tfra_entry_t;
1311 typedef struct
1313 /* version == 0: 64bits -> 32bits */
1314 uint64_t time; /* the presentation time of the random access sample in units defined in the Media Header Box of the associated track
1315 * According to 14496-12:2008/FPDAM 3, presentation times are composition times. */
1316 uint64_t moof_offset; /* the offset of the Movie Fragment Box used in this entry
1317 * Offset is the byte-offset between the beginning of the file and the beginning of the Movie Fragment Box. */
1318 /* */
1319 uint32_t traf_number; /* the Track Fragment Box ('traf') number that contains the random accessible sample
1320 * The number ranges from 1 in each Movie Fragment Box ('moof'). */
1321 uint32_t trun_number; /* the Track Fragment Run Box ('trun') number that contains the random accessible sample
1322 * The number ranges from 1 in each Track Fragment Box ('traf'). */
1323 uint32_t sample_number; /* the sample number that contains the random accessible sample
1324 * The number ranges from 1 in each Track Fragment Run Box ('trun'). */
1325 } isom_tfra_location_time_entry_t;
1327 /* Movie Fragment Random Access Offset Box
1328 * This box provides a copy of the length field from the enclosing Movie Fragment Random Access Box. */
1329 typedef struct
1331 ISOM_FULLBOX_COMMON;
1332 uint32_t length; /* an integer gives the number of bytes of the enclosing Movie Fragment Random Access Box
1333 * This field is placed at the last of the enclosing box to assist readers scanning
1334 * from the end of the file in finding the Movie Fragment Random Access Box. */
1335 } isom_mfro_t;
1337 /* Movie Fragment Random Access Box
1338 * This box provides a table which may assist readers in finding random access points in a file using movie fragments,
1339 * and is usually placed at or near the end of the file.
1340 * The last box within the Movie Fragment Random Access Box, which is called Movie Fragment Random Access Offset Box,
1341 * provides a copy of the length field from the Movie Fragment Random Access Box. */
1342 typedef struct
1344 ISOM_BASEBOX_COMMON;
1345 lsmash_entry_list_t *tfra_list; /* Track Fragment Random Access Box */
1346 isom_mfro_t *mfro; /* Movie Fragment Random Access Offset Box */
1347 } isom_mfra_t;
1349 /* Movie fragment manager
1350 * The presence of this means we use the structure of movie fragments. */
1351 typedef struct
1353 isom_moof_entry_t *movie; /* the address corresponding to the current Movie Fragment Box */
1354 uint64_t fragment_count; /* the number of movie fragments we created */
1355 uint64_t pool_size;
1356 lsmash_entry_list_t *pool; /* samples pooled to interleave for the current movie fragment */
1357 } isom_fragment_manager_t;
1359 /** **/
1361 /* Movie Box */
1362 typedef struct
1364 ISOM_BASEBOX_COMMON;
1365 isom_mvhd_t *mvhd; /* Movie Header Box */
1366 isom_iods_t *iods; /* ISOM: Object Descriptor Box / QTFF: null */
1367 lsmash_entry_list_t *trak_list; /* Track Box List */
1368 isom_udta_t *udta; /* User Data Box */
1369 isom_mvex_t *mvex; /* Movie Extends Box */
1370 } isom_moov_t;
1372 /* ROOT */
1373 struct lsmash_root_tag
1375 ISOM_FULLBOX_COMMON; /* the size field expresses total file size
1376 * the flags field expresses file mode */
1377 isom_ftyp_t *ftyp; /* File Type Box */
1378 isom_moov_t *moov; /* Movie Box */
1379 lsmash_entry_list_t *moof_list; /* Movie Fragment Box List */
1380 isom_mdat_t *mdat; /* Media Data Box */
1381 isom_free_t *free; /* Free Space Box */
1382 isom_mfra_t *mfra; /* Movie Fragment Random Access Box */
1384 lsmash_bs_t *bs; /* bytestream manager */
1385 isom_fragment_manager_t *fragment; /* movie fragment manager */
1386 double max_chunk_duration; /* max duration per chunk in seconds */
1387 double max_async_tolerance; /* max tolerance, in seconds, for amount of interleaving asynchronization between tracks */
1388 uint64_t max_chunk_size; /* max size per chunk in bytes. */
1389 uint64_t max_read_size; /* max size of reading from a chunk at a time. */
1390 uint8_t file_type_written; /* whether File Type Box was written */
1391 uint8_t qt_compatible; /* compatibility with QuickTime file format */
1392 uint8_t isom_compatible; /* compatibility with ISO Base Media file format */
1393 uint8_t avc_extensions; /* compatibility with AVC extensions */
1394 uint8_t mp4_version1; /* compatibility with MP4 ver.1 file format */
1395 uint8_t mp4_version2; /* compatibility with MP4 ver.2 file format */
1396 uint8_t itunes_audio; /* compatibility with iTunes Audio */
1397 uint8_t max_3gpp_version; /* maximum 3GPP version */
1398 uint8_t max_isom_version; /* maximum ISO Base Media file format version */
1399 lsmash_entry_list_t *print;
1400 lsmash_entry_list_t *timeline;
1403 /* Track Box */
1404 typedef struct
1406 ISOM_BASEBOX_COMMON;
1407 isom_tkhd_t *tkhd; /* Track Header Box */
1408 isom_tapt_t *tapt; /* ISOM: null / QTFF: Track Aperture Mode Dimensions Box */
1409 isom_edts_t *edts; /* Edit Box */
1410 isom_tref_t *tref; /* Track Reference Box */
1411 isom_mdia_t *mdia; /* Media Box */
1412 isom_udta_t *udta; /* User Data Box */
1414 isom_cache_t *cache;
1415 uint32_t related_track_ID;
1416 uint8_t is_chapter;
1417 } isom_trak_entry_t;
1418 /** **/
1420 typedef struct
1422 uint16_t mac_value;
1423 uint16_t iso_name;
1424 } isom_language_t;
1426 static const isom_language_t isom_languages[] =
1428 { 0, ISOM_LANGUAGE_CODE_ENGLISH },
1429 { 1, ISOM_LANGUAGE_CODE_FRENCH },
1430 { 2, ISOM_LANGUAGE_CODE_GERMAN },
1431 { 3, ISOM_LANGUAGE_CODE_ITALIAN },
1432 { 4, ISOM_LANGUAGE_CODE_DUTCH_M },
1433 { 5, ISOM_LANGUAGE_CODE_SWEDISH },
1434 { 6, ISOM_LANGUAGE_CODE_SPANISH },
1435 { 7, ISOM_LANGUAGE_CODE_DANISH },
1436 { 8, ISOM_LANGUAGE_CODE_PORTUGUESE },
1437 { 9, ISOM_LANGUAGE_CODE_NORWEGIAN },
1438 { 10, ISOM_LANGUAGE_CODE_HEBREW },
1439 { 11, ISOM_LANGUAGE_CODE_JAPANESE },
1440 { 12, ISOM_LANGUAGE_CODE_ARABIC },
1441 { 13, ISOM_LANGUAGE_CODE_FINNISH },
1442 { 14, ISOM_LANGUAGE_CODE_GREEK },
1443 { 15, ISOM_LANGUAGE_CODE_ICELANDIC },
1444 { 16, ISOM_LANGUAGE_CODE_MALTESE },
1445 { 17, ISOM_LANGUAGE_CODE_TURKISH },
1446 { 18, ISOM_LANGUAGE_CODE_CROATIAN },
1447 { 19, ISOM_LANGUAGE_CODE_CHINESE },
1448 { 20, ISOM_LANGUAGE_CODE_URDU },
1449 { 21, ISOM_LANGUAGE_CODE_HINDI },
1450 { 22, ISOM_LANGUAGE_CODE_THAI },
1451 { 23, ISOM_LANGUAGE_CODE_KOREAN },
1452 { 24, ISOM_LANGUAGE_CODE_LITHUANIAN },
1453 { 25, ISOM_LANGUAGE_CODE_POLISH },
1454 { 26, ISOM_LANGUAGE_CODE_HUNGARIAN },
1455 { 27, ISOM_LANGUAGE_CODE_ESTONIAN },
1456 { 28, ISOM_LANGUAGE_CODE_LATVIAN },
1457 { 29, ISOM_LANGUAGE_CODE_SAMI },
1458 { 30, ISOM_LANGUAGE_CODE_FAROESE },
1459 { 32, ISOM_LANGUAGE_CODE_RUSSIAN },
1460 { 33, ISOM_LANGUAGE_CODE_CHINESE },
1461 { 34, ISOM_LANGUAGE_CODE_DUTCH },
1462 { 35, ISOM_LANGUAGE_CODE_IRISH },
1463 { 36, ISOM_LANGUAGE_CODE_ALBANIAN },
1464 { 37, ISOM_LANGUAGE_CODE_ROMANIAN },
1465 { 38, ISOM_LANGUAGE_CODE_CZECH },
1466 { 39, ISOM_LANGUAGE_CODE_SLOVAK },
1467 { 40, ISOM_LANGUAGE_CODE_SLOVENIA },
1468 { 41, ISOM_LANGUAGE_CODE_YIDDISH },
1469 { 42, ISOM_LANGUAGE_CODE_SERBIAN },
1470 { 43, ISOM_LANGUAGE_CODE_MACEDONIAN },
1471 { 44, ISOM_LANGUAGE_CODE_BULGARIAN },
1472 { 45, ISOM_LANGUAGE_CODE_UKRAINIAN },
1473 { 46, ISOM_LANGUAGE_CODE_BELARUSIAN },
1474 { 47, ISOM_LANGUAGE_CODE_UZBEK },
1475 { 48, ISOM_LANGUAGE_CODE_KAZAKH },
1476 { 49, ISOM_LANGUAGE_CODE_AZERBAIJANI },
1477 { 51, ISOM_LANGUAGE_CODE_ARMENIAN },
1478 { 52, ISOM_LANGUAGE_CODE_GEORGIAN },
1479 { 53, ISOM_LANGUAGE_CODE_MOLDAVIAN },
1480 { 54, ISOM_LANGUAGE_CODE_KIRGHIZ },
1481 { 55, ISOM_LANGUAGE_CODE_TAJIK },
1482 { 56, ISOM_LANGUAGE_CODE_TURKMEN },
1483 { 57, ISOM_LANGUAGE_CODE_MONGOLIAN },
1484 { 59, ISOM_LANGUAGE_CODE_PASHTO },
1485 { 60, ISOM_LANGUAGE_CODE_KURDISH },
1486 { 61, ISOM_LANGUAGE_CODE_KASHMIRI },
1487 { 62, ISOM_LANGUAGE_CODE_SINDHI },
1488 { 63, ISOM_LANGUAGE_CODE_TIBETAN },
1489 { 64, ISOM_LANGUAGE_CODE_NEPALI },
1490 { 65, ISOM_LANGUAGE_CODE_SANSKRIT },
1491 { 66, ISOM_LANGUAGE_CODE_MARATHI },
1492 { 67, ISOM_LANGUAGE_CODE_BENGALI },
1493 { 68, ISOM_LANGUAGE_CODE_ASSAMESE },
1494 { 69, ISOM_LANGUAGE_CODE_GUJARATI },
1495 { 70, ISOM_LANGUAGE_CODE_PUNJABI },
1496 { 71, ISOM_LANGUAGE_CODE_ORIYA },
1497 { 72, ISOM_LANGUAGE_CODE_MALAYALAM },
1498 { 73, ISOM_LANGUAGE_CODE_KANNADA },
1499 { 74, ISOM_LANGUAGE_CODE_TAMIL },
1500 { 75, ISOM_LANGUAGE_CODE_TELUGU },
1501 { 76, ISOM_LANGUAGE_CODE_SINHALESE },
1502 { 77, ISOM_LANGUAGE_CODE_BURMESE },
1503 { 78, ISOM_LANGUAGE_CODE_KHMER },
1504 { 79, ISOM_LANGUAGE_CODE_LAO },
1505 { 80, ISOM_LANGUAGE_CODE_VIETNAMESE },
1506 { 81, ISOM_LANGUAGE_CODE_INDONESIAN },
1507 { 82, ISOM_LANGUAGE_CODE_TAGALOG },
1508 { 83, ISOM_LANGUAGE_CODE_MALAY_ROMAN },
1509 { 84, ISOM_LANGUAGE_CODE_MAYAY_ARABIC },
1510 { 85, ISOM_LANGUAGE_CODE_AMHARIC },
1511 { 87, ISOM_LANGUAGE_CODE_OROMO },
1512 { 88, ISOM_LANGUAGE_CODE_SOMALI },
1513 { 89, ISOM_LANGUAGE_CODE_SWAHILI },
1514 { 90, ISOM_LANGUAGE_CODE_KINYARWANDA },
1515 { 91, ISOM_LANGUAGE_CODE_RUNDI },
1516 { 92, ISOM_LANGUAGE_CODE_CHEWA },
1517 { 93, ISOM_LANGUAGE_CODE_MALAGASY },
1518 { 94, ISOM_LANGUAGE_CODE_ESPERANTO },
1519 { 128, ISOM_LANGUAGE_CODE_WELSH },
1520 { 129, ISOM_LANGUAGE_CODE_BASQUE },
1521 { 130, ISOM_LANGUAGE_CODE_CATALAN },
1522 { 131, ISOM_LANGUAGE_CODE_LATIN },
1523 { 132, ISOM_LANGUAGE_CODE_QUECHUA },
1524 { 133, ISOM_LANGUAGE_CODE_GUARANI },
1525 { 134, ISOM_LANGUAGE_CODE_AYMARA },
1526 { 135, ISOM_LANGUAGE_CODE_TATAR },
1527 { 136, ISOM_LANGUAGE_CODE_UIGHUR },
1528 { 137, ISOM_LANGUAGE_CODE_DZONGKHA },
1529 { 138, ISOM_LANGUAGE_CODE_JAVANESE },
1530 { UINT16_MAX, 0 }
1533 typedef struct
1535 uint16_t primaries;
1536 uint16_t transfer;
1537 uint16_t matrix;
1538 } isom_color_parameter_t;
1540 static const isom_color_parameter_t isom_color_parameter_tbl[] =
1542 { 2, 2, 2 }, /* Not specified */
1543 { 2, 2, 2 }, /* ITU-R BT.470 System M */
1544 { 5, 2, 6 }, /* ITU-R BT.470 System B, G */
1545 { 1, 1, 1 }, /* ITU-R BT.709 */
1546 { 6, 1, 6 }, /* SMPTE 170M */
1547 { 6, 7, 7 }, /* SMPTE 240M */
1548 { 1, 1, 1 }, /* SMPTE 274M */
1549 { 5, 1, 6 }, /* SMPTE 293M */
1550 { 1, 1, 1 }, /* SMPTE 296M */
1553 enum qt_compression_id_code
1555 QT_COMPRESSION_ID_NOT_COMPRESSED = 0,
1556 QT_COMPRESSION_ID_FIXED_COMPRESSION = -1,
1557 QT_COMPRESSION_ID_VARIABLE_COMPRESSION = -2,
1558 QT_COMPRESSION_ID_TWO_TO_ONE = 1,
1559 QT_COMPRESSION_ID_EIGHT_TO_THREE = 2,
1560 QT_COMPRESSION_ID_THREE_TO_ONE = 3,
1561 QT_COMPRESSION_ID_SIX_TO_ONE = 4,
1562 QT_COMPRESSION_ID_SIX_TO_ONE_PACKET_SIZE = 8,
1563 QT_COMPRESSION_ID_THREE_TO_ONE_PACKET_SIZE = 16,
1566 enum qt_audio_format_flags_code
1568 QT_AUDIO_FORMAT_FLAG_FLOAT = 1, /* Set for floating point, clear for integer. */
1569 QT_AUDIO_FORMAT_FLAG_BIG_ENDIAN = 1<<1, /* Set for big endian, clear for little endian. */
1570 QT_AUDIO_FORMAT_FLAG_SIGNED_INTEGER = 1<<2, /* Set for signed integer, clear for unsigned integer.
1571 * This is only valid if QT_AUDIO_FORMAT_FLAG_FLOAT is clear. */
1572 QT_AUDIO_FORMAT_FLAG_PACKED = 1<<3, /* Set if the sample bits occupy the entire available bits for the channel,
1573 * clear if they are high or low aligned within the channel. */
1574 QT_AUDIO_FORMAT_FLAG_ALIGNED_HIGH = 1<<4, /* Set if the sample bits are placed into the high bits of the channel, clear for low bit placement.
1575 * This is only valid if QT_AUDIO_FORMAT_FLAG_PACKED is clear. */
1576 QT_AUDIO_FORMAT_FLAG_NON_INTERLEAVED = 1<<5, /* Set if the samples for each channel are located contiguously and the channels are layed out end to end,
1577 * clear if the samples for each frame are layed out contiguously and the frames layed out end to end. */
1578 QT_AUDIO_FORMAT_FLAG_NON_MIXABLE = 1<<6, /* Set to indicate when a format is non-mixable.
1579 * Note that this flag is only used when interacting with the HAL's stream format information.
1580 * It is not a valid flag for any other uses. */
1581 QT_AUDIO_FORMAT_FLAG_ALL_CLEAR = 1<<31, /* Set if all the flags would be clear in order to preserve 0 as the wild card value. */
1583 QT_LPCM_FORMAT_FLAG_FLOAT = QT_AUDIO_FORMAT_FLAG_FLOAT,
1584 QT_LPCM_FORMAT_FLAG_BIG_ENDIAN = QT_AUDIO_FORMAT_FLAG_BIG_ENDIAN,
1585 QT_LPCM_FORMAT_FLAG_SIGNED_INTEGER = QT_AUDIO_FORMAT_FLAG_SIGNED_INTEGER,
1586 QT_LPCM_FORMAT_FLAG_PACKED = QT_AUDIO_FORMAT_FLAG_PACKED,
1587 QT_LPCM_FORMAT_FLAG_ALIGNED_HIGH = QT_AUDIO_FORMAT_FLAG_ALIGNED_HIGH,
1588 QT_LPCM_FORMAT_FLAG_NON_INTERLEAVED = QT_AUDIO_FORMAT_FLAG_NON_INTERLEAVED,
1589 QT_LPCM_FORMAT_FLAG_NON_MIXABLE = QT_AUDIO_FORMAT_FLAG_NON_MIXABLE,
1590 QT_LPCM_FORMAT_FLAG_ALL_CLEAR = QT_AUDIO_FORMAT_FLAG_ALL_CLEAR,
1592 /* These flags are set for Apple Lossless data that was sourced from N bit native endian signed integer data. */
1593 QT_ALAC_FORMAT_FLAG_16BIT_SOURCE_DATA = 1,
1594 QT_ALAC_FORMAT_FLAG_20BIT_SOURCE_DATA = 2,
1595 QT_ALAC_FORMAT_FLAG_24BIT_SOURCE_DATA = 3,
1596 QT_ALAC_FORMAT_FLAG_32BIT_SOURCE_DATA = 4,
1599 int isom_is_fullbox( void *box );
1600 int isom_is_lpcm_audio( uint32_t type );
1602 void isom_init_box_common( void *box, void *parent, uint32_t type );
1604 int isom_check_compatibility( lsmash_root_t *root );
1606 char *isom_4cc2str( uint32_t fourcc );
1607 char *isom_unpack_iso_language( uint16_t language );
1609 isom_trak_entry_t *isom_get_trak( lsmash_root_t *root, uint32_t track_ID );
1610 isom_sgpd_entry_t *isom_get_sample_group_description( isom_stbl_t *stbl, uint32_t grouping_type );
1611 isom_sbgp_entry_t *isom_get_sample_to_group( isom_stbl_t *stbl, uint32_t grouping_type );
1613 isom_avcC_ps_entry_t *isom_create_ps_entry( uint8_t *ps, uint32_t ps_size );
1614 void isom_remove_avcC_ps( isom_avcC_ps_entry_t *ps );
1616 int isom_add_edts( isom_trak_entry_t *trak );
1617 int isom_add_elst( isom_edts_t *edts );
1618 int isom_add_clap( isom_visual_entry_t *visual );
1619 int isom_add_pasp( isom_visual_entry_t *visual );
1620 int isom_add_colr( isom_visual_entry_t *visual );
1621 int isom_add_stsl( isom_visual_entry_t *visual );
1622 int isom_add_avcC( isom_visual_entry_t *visual );
1623 int isom_add_btrt( isom_visual_entry_t *visual );
1624 int isom_add_wave( isom_audio_entry_t *audio );
1625 int isom_add_frma( isom_wave_t *wave );
1626 int isom_add_enda( isom_wave_t *wave );
1627 int isom_add_mp4a( isom_wave_t *wave );
1628 int isom_add_terminator( isom_wave_t *wave );
1629 int isom_add_chan( isom_audio_entry_t *audio );
1630 int isom_add_ftab( isom_tx3g_entry_t *tx3g );
1632 void isom_remove_tapt( isom_tapt_t *tapt );
1633 void isom_remove_clap( isom_clap_t *clap );
1634 void isom_remove_pasp( isom_pasp_t *pasp );
1635 void isom_remove_colr( isom_colr_t *colr );
1636 void isom_remove_stsl( isom_stsl_t *stsl );
1637 void isom_remove_avcC( isom_avcC_t *avcC );
1638 void isom_remove_btrt( isom_btrt_t *btrt );
1639 void isom_remove_frma( isom_frma_t *frma );
1640 void isom_remove_enda( isom_enda_t *enda );
1641 void isom_remove_mp4a( isom_mp4a_t *mp4a );
1642 void isom_remove_terminator( isom_terminator_t *terminator );
1643 void isom_remove_wave( isom_wave_t *wave );
1644 void isom_remove_chan( isom_chan_t *chan );
1645 void isom_remove_ftab( isom_ftab_t *ftab );
1646 void isom_remove_sample_description( isom_sample_entry_t *sample );
1648 #define isom_create_box( box_name, parent_name, box_4cc ) \
1649 isom_##box_name##_t *(box_name) = malloc( sizeof(isom_##box_name##_t) ); \
1650 if( !box_name ) \
1651 return -1; \
1652 memset( box_name, 0, sizeof(isom_##box_name##_t) ); \
1653 isom_init_box_common( box_name, parent_name, box_4cc )
1655 #define isom_create_list_box( box_name, parent_name, box_4cc ) \
1656 isom_create_box( box_name, parent_name, box_4cc ); \
1657 box_name->list = lsmash_create_entry_list(); \
1658 if( !box_name->list ) \
1660 free( box_name ); \
1661 return -1; \
1664 #endif