dts: Support DTS:X muxing.
[L-SMASH.git] / codecs / dts.c
blob37dd7f43e0189cb17ea126623b47d24d364fbb31
1 /*****************************************************************************
2 * dts.c:
3 *****************************************************************************
4 * Copyright (C) 2012-2015 L-SMASH project
6 * Authors: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
8 * Permission to use, copy, modify, and/or distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 *****************************************************************************/
21 /* This file is available under an ISC license. */
23 #include "common/internal.h" /* must be placed first */
25 #include <stdlib.h>
26 #include <string.h>
27 #include <inttypes.h>
29 #include "core/box.h"
31 /*****************************************************************************
32 ETSI TS 102 114 V1.2.1 (2002-12)
33 ETSI TS 102 114 V1.3.1 (2011-08)
34 ETSI TS 102 114 V1.4.1 (2012-09)
36 IMPLEMENTATION OF DTS AUDIO IN MEDIA FILES BASED ON ISO/IEC 14496
37 Document No.: 9302J81100
38 Revision: F
39 Version: 1.3
41 Common File Format & Media Formats Specification Version 2.2 31 July 2015
42 *****************************************************************************/
43 #include "dts.h"
45 #define DTS_MIN_CORE_SIZE 96
46 #define DTS_MAX_STREAM_CONSTRUCTION 21
47 #define DTS_SPECIFIC_BOX_MIN_LENGTH 28
49 typedef enum
51 DTS_SYNCWORD_CORE = 0x7FFE8001,
52 DTS_SYNCWORD_XCH = 0x5A5A5A5A,
53 DTS_SYNCWORD_XXCH = 0x47004A03,
54 DTS_SYNCWORD_X96K = 0x1D95F262,
55 DTS_SYNCWORD_XBR = 0x655E315E,
56 DTS_SYNCWORD_LBR = 0x0A801921,
57 DTS_SYNCWORD_XLL = 0x41A29547,
58 DTS_SYNCWORD_SUBSTREAM = 0x64582025,
59 DTS_SYNCWORD_SUBSTREAM_CORE = 0x02b09261,
60 DTS_SYNCWORD_X = 0x02000850,
61 } dts_syncword;
63 /* Loudspeaker Masks (up to 32-bit) for
64 * - nuCoreSpkrActivityMask
65 * - nuXXChSpkrLayoutMask
66 * - DownMixChMapMask
67 * - nChMask
68 * - nSpkrMask */
69 typedef enum
71 DTS_LOUDSPEAKER_MASK32_C = 0x00000001, /* Centre in front of listener */
72 DTS_LOUDSPEAKER_MASK32_L = 0x00000002, /* Left in front */
73 DTS_LOUDSPEAKER_MASK32_R = 0x00000004, /* Right in front */
74 DTS_LOUDSPEAKER_MASK32_LS = 0x00000008, /* Left surround on side in rear */
75 DTS_LOUDSPEAKER_MASK32_RS = 0x00000010, /* Right surround on side in rear */
76 DTS_LOUDSPEAKER_MASK32_LFE1 = 0x00000020, /* Low frequency effects subwoofer */
77 DTS_LOUDSPEAKER_MASK32_CS = 0x00000040, /* Centre surround in rear */
78 DTS_LOUDSPEAKER_MASK32_LSR = 0x00000080, /* Left surround in rear */
79 DTS_LOUDSPEAKER_MASK32_RSR = 0x00000100, /* Right surround in rear */
80 DTS_LOUDSPEAKER_MASK32_LSS = 0x00000200, /* Left surround on side */
81 DTS_LOUDSPEAKER_MASK32_RSS = 0x00000400, /* Right surround on side */
82 DTS_LOUDSPEAKER_MASK32_LC = 0x00000800, /* Between left and centre in front */
83 DTS_LOUDSPEAKER_MASK32_RC = 0x00001000, /* Between right and centre in front */
84 DTS_LOUDSPEAKER_MASK32_LH = 0x00002000, /* Left height in front */
85 DTS_LOUDSPEAKER_MASK32_CH = 0x00004000, /* Centre Height in front */
86 DTS_LOUDSPEAKER_MASK32_RH = 0x00008000, /* Right Height in front */
87 DTS_LOUDSPEAKER_MASK32_LFE2 = 0x00010000, /* Second low frequency effects subwoofer */
88 DTS_LOUDSPEAKER_MASK32_LW = 0x00020000, /* Left on side in front */
89 DTS_LOUDSPEAKER_MASK32_RW = 0x00040000, /* Right on side in front */
90 DTS_LOUDSPEAKER_MASK32_OH = 0x00080000, /* Over the listener's head */
91 DTS_LOUDSPEAKER_MASK32_LHS = 0x00100000, /* Left height on side */
92 DTS_LOUDSPEAKER_MASK32_RHS = 0x00200000, /* Right height on side */
93 DTS_LOUDSPEAKER_MASK32_CHR = 0x00400000, /* Centre height in rear */
94 DTS_LOUDSPEAKER_MASK32_LHR = 0x00800000, /* Left height in rear */
95 DTS_LOUDSPEAKER_MASK32_RHR = 0x01000000, /* Right height in rear */
96 DTS_LOUDSPEAKER_MASK32_CL = 0x02000000, /* Centre in the plane lower than listener's ears */
97 DTS_LOUDSPEAKER_MASK32_LL = 0x04000000, /* Left in the plane lower than listener's ears */
98 DTS_LOUDSPEAKER_MASK32_RL = 0x08000000, /* Right in the plane lower than listener's ears */
99 } dts_loudspeaker_mask;
101 /* Loudspeaker Masks (up to 16-bit) for
102 * - nuSpkrActivityMask
103 * - nuStndrSpkrLayoutMask
104 * - nuMixOutChMask
105 * - ChannelLayout of DTSSpecificBox */
106 typedef enum
108 DTS_CHANNEL_LAYOUT_C = 0x0001, /* Centre in front of listener */
109 DTS_CHANNEL_LAYOUT_L_R = 0x0002, /* Left/Right in front */
110 DTS_CHANNEL_LAYOUT_LS_RS = 0x0004, /* Left/Right surround on side in rear */
111 DTS_CHANNEL_LAYOUT_LFE1 = 0x0008, /* Low frequency effects subwoofer */
112 DTS_CHANNEL_LAYOUT_CS = 0x0010, /* Centre surround in rear */
113 DTS_CHANNEL_LAYOUT_LH_RH = 0x0020, /* Left/Right height in front */
114 DTS_CHANNEL_LAYOUT_LSR_RSR = 0x0040, /* Left/Right surround in rear */
115 DTS_CHANNEL_LAYOUT_CH = 0x0080, /* Centre height in front */
116 DTS_CHANNEL_LAYOUT_OH = 0x0100, /* Over the listener's head */
117 DTS_CHANNEL_LAYOUT_LC_RC = 0x0200, /* Between left/right and centre in front */
118 DTS_CHANNEL_LAYOUT_LW_RW = 0x0400, /* Left/Right on side in front */
119 DTS_CHANNEL_LAYOUT_LSS_RSS = 0x0800, /* Left/Right surround on side */
120 DTS_CHANNEL_LAYOUT_LFE2 = 0x1000, /* Second low frequency effects subwoofer */
121 DTS_CHANNEL_LAYOUT_LHS_RHS = 0x2000, /* Left/Right height on side */
122 DTS_CHANNEL_LAYOUT_CHR = 0x4000, /* Centre height in rear */
123 DTS_CHANNEL_LAYOUT_LHR_RHR = 0x8000, /* Left/Right height in rear */
124 } dts_channel_layout;
126 static const lsmash_dts_construction_flag construction_info[DTS_MAX_STREAM_CONSTRUCTION + 1] =
129 DTS_CORE_SUBSTREAM_CORE_FLAG,
130 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG,
131 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XXCH_FLAG,
132 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_X96_FLAG,
133 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG,
134 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XBR_FLAG,
135 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG | DTS_EXT_SUBSTREAM_XBR_FLAG,
136 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_XBR_FLAG,
137 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_XBR_FLAG,
138 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG,
139 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG,
140 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG,
141 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG,
142 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG,
143 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG,
144 DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_X96_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG,
145 DTS_EXT_SUBSTREAM_XLL_FLAG,
146 DTS_EXT_SUBSTREAM_LBR_FLAG,
147 DTS_EXT_SUBSTREAM_CORE_FLAG,
148 DTS_EXT_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG,
149 DTS_EXT_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG ,
152 void dts_setup_parser( dts_info_t *info )
154 dts_extension_info_t *exss = &info->exss[0];
155 /* By default the core substream data, if present, has the nuBcCoreExtSSIndex = 0 and the nuBcCoreAssetIndex = 0.
156 * Therefore, we can treat as if one extension substream is there even if no extension substreams. */
157 exss->nuNumAudioPresnt = 1;
158 exss->nuNumAssets = 1;
159 exss->bBcCorePresent [0] = 0;
160 exss->nuBcCoreExtSSIndex[0] = 0;
161 exss->nuBcCoreAssetIndex[0] = 0;
164 struct lsmash_dts_reserved_box_tag
166 uint32_t size;
167 uint8_t *data;
170 int lsmash_append_dts_reserved_box( lsmash_dts_specific_parameters_t *param, const uint8_t *box_data, uint32_t box_size )
172 if( !param || !box_data || box_size == 0 )
173 return LSMASH_ERR_FUNCTION_PARAM;
174 if( !param->box )
176 param->box = lsmash_malloc_zero( sizeof(lsmash_dts_reserved_box_t) );
177 if( !param->box )
178 return LSMASH_ERR_MEMORY_ALLOC;
180 if( param->box->size == 0 )
182 /* New the first DTSExpansionBox. */
183 param->box->data = lsmash_memdup( box_data, box_size );
184 if( !param->box->data )
186 lsmash_freep( &param->box );
187 return LSMASH_ERR_MEMORY_ALLOC;
189 param->box->size = box_size;
191 else
193 /* New a DTSExpansionBox. */
194 uint32_t size = param->box->size + box_size;
195 uint8_t *data = lsmash_realloc( param->box->data, size );
196 if( !data )
197 return LSMASH_ERR_MEMORY_ALLOC;
198 memcpy( data + param->box->size, box_data, box_size );
199 param->box->data = data;
200 param->box->size = size;
202 return 0;
205 void lsmash_remove_dts_reserved_box( lsmash_dts_specific_parameters_t *param )
207 if( !param->box )
208 return;
209 lsmash_free( param->box->data );
210 lsmash_freep( &param->box );
213 void dts_destruct_specific_data( void *data )
215 if( !data )
216 return;
217 lsmash_remove_dts_reserved_box( data );
218 lsmash_free( data );
221 uint8_t lsmash_dts_get_stream_construction( lsmash_dts_construction_flag flags )
223 uint8_t StreamConstruction;
224 for( StreamConstruction = 1; StreamConstruction <= DTS_MAX_STREAM_CONSTRUCTION; StreamConstruction++ )
225 if( flags == construction_info[StreamConstruction] )
226 break;
227 /* For any stream type not listed in the above table,
228 * StreamConstruction shall be set to 0 and the codingname shall default to 'dtsh'. */
229 return StreamConstruction <= DTS_MAX_STREAM_CONSTRUCTION ? StreamConstruction : 0;
232 lsmash_dts_construction_flag lsmash_dts_get_construction_flags( uint8_t stream_construction )
234 if( stream_construction <= DTS_MAX_STREAM_CONSTRUCTION )
235 return construction_info[stream_construction];
236 return 0;
239 lsmash_codec_type_t lsmash_dts_get_codingname( lsmash_dts_specific_parameters_t *param )
241 assert( param->StreamConstruction <= DTS_MAX_STREAM_CONSTRUCTION );
242 if( param->MultiAssetFlag )
243 return ISOM_CODEC_TYPE_DTSH_AUDIO; /* Multiple asset streams shall use the 'dtsh' coding_name. */
244 static lsmash_codec_type_t codingname_table[DTS_MAX_STREAM_CONSTRUCTION + 1] = { LSMASH_CODEC_TYPE_INITIALIZER };
245 if( lsmash_check_codec_type_identical( codingname_table[0], LSMASH_CODEC_TYPE_UNSPECIFIED ) )
247 int i = 0;
248 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO; /* Undefined stream types shall be set to 0 and the codingname shall default to 'dtsh'. */
249 codingname_table[i++] = ISOM_CODEC_TYPE_DTSC_AUDIO;
250 codingname_table[i++] = ISOM_CODEC_TYPE_DTSC_AUDIO;
251 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
252 codingname_table[i++] = ISOM_CODEC_TYPE_DTSC_AUDIO;
253 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
254 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
255 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
256 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
257 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
258 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
259 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
260 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
261 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
262 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
263 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
264 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
265 codingname_table[i++] = ISOM_CODEC_TYPE_DTSL_AUDIO;
266 codingname_table[i++] = ISOM_CODEC_TYPE_DTSE_AUDIO;
267 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
268 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
269 codingname_table[i++] = ISOM_CODEC_TYPE_DTSH_AUDIO;
271 lsmash_codec_type_t codingname = codingname_table[ param->StreamConstruction ];
272 /* Check the presence of DTSXParameters Box. */
273 if( !lsmash_check_codec_type_identical( codingname, ISOM_CODEC_TYPE_DTSC_AUDIO )
274 && !lsmash_check_codec_type_identical( codingname, ISOM_CODEC_TYPE_DTSE_AUDIO )
275 && param->box
276 && param->box->data
277 && param->box->size >= ISOM_FULLBOX_COMMON_SIZE )
279 uint8_t *data = param->box->data;
280 uint32_t pos = 0;
281 while( pos + ISOM_FULLBOX_COMMON_SIZE <= param->box->size )
283 uint32_t size = LSMASH_GET_BE32( &data[0] );
284 uint32_t type = LSMASH_GET_BE32( &data[4] );
285 if( type == LSMASH_4CC( 'd', 'x', 'p', 'b' ) )
286 return ISOM_CODEC_TYPE_DTSX_AUDIO;
287 pos += size;
290 return codingname;
293 uint8_t *lsmash_create_dts_specific_info( lsmash_dts_specific_parameters_t *param, uint32_t *data_length )
295 int reserved_box_present = (param->box && param->box->data && param->box->size);
296 lsmash_bits_t *bits = lsmash_bits_adhoc_create();
297 if( !bits )
298 return NULL;
299 /* Create a DTSSpecificBox. */
300 lsmash_bits_put( bits, 32, 0 ); /* box size */
301 lsmash_bits_put( bits, 32, ISOM_BOX_TYPE_DDTS.fourcc ); /* box type: 'ddts' */
302 lsmash_bits_put( bits, 32, param->DTSSamplingFrequency );
303 lsmash_bits_put( bits, 32, param->maxBitrate ); /* maxBitrate; setup by isom_update_bitrate_description */
304 lsmash_bits_put( bits, 32, param->avgBitrate ); /* avgBitrate; setup by isom_update_bitrate_description */
305 lsmash_bits_put( bits, 8, param->pcmSampleDepth );
306 lsmash_bits_put( bits, 2, param->FrameDuration );
307 lsmash_bits_put( bits, 5, param->StreamConstruction );
308 lsmash_bits_put( bits, 1, param->CoreLFEPresent );
309 lsmash_bits_put( bits, 6, param->CoreLayout );
310 lsmash_bits_put( bits, 14, param->CoreSize );
311 lsmash_bits_put( bits, 1, param->StereoDownmix );
312 lsmash_bits_put( bits, 3, param->RepresentationType );
313 lsmash_bits_put( bits, 16, param->ChannelLayout );
314 lsmash_bits_put( bits, 1, param->MultiAssetFlag );
315 lsmash_bits_put( bits, 1, param->LBRDurationMod );
316 lsmash_bits_put( bits, 1, reserved_box_present );
317 lsmash_bits_put( bits, 5, 0 ); /* Reserved */
318 /* ReservedBox */
319 if( reserved_box_present )
320 for( uint32_t i = 0; i < param->box->size; i++ )
321 lsmash_bits_put( bits, 8, param->box->data[i] );
322 /* */
323 uint8_t *data = lsmash_bits_export_data( bits, data_length );
324 lsmash_bits_adhoc_cleanup( bits );
325 /* Update box size. */
326 LSMASH_SET_BE32( data, *data_length );
327 return data;
330 int lsmash_setup_dts_specific_parameters_from_frame( lsmash_dts_specific_parameters_t *param, uint8_t *data, uint32_t data_length )
332 lsmash_bits_t bits = { 0 };
333 lsmash_bs_t bs = { 0 };
334 uint8_t buffer[DTS_MAX_EXSS_SIZE] = { 0 };
335 bs.buffer.data = buffer;
336 bs.buffer.store = data_length;
337 bs.buffer.alloc = DTS_MAX_EXSS_SIZE;
338 dts_info_t *info = &(dts_info_t){ .bits = &bits };
339 info->bits = &bits;
340 lsmash_bits_init( &bits, &bs );
341 memcpy( buffer, data, LSMASH_MIN( data_length, DTS_MAX_EXSS_SIZE ) );
342 dts_setup_parser( info );
343 uint64_t next_frame_pos = 0;
344 while( 1 )
346 int err;
347 /* Seek to the head of the next syncframe. */
348 bs.buffer.pos = LSMASH_MIN( data_length, next_frame_pos );
349 /* Check the remainder length of the buffer.
350 * If there is enough length, then continue to parse the frame in it.
351 * The length 10 is the required byte length to get frame size. */
352 uint64_t remain_size = lsmash_bs_get_remaining_buffer_size( &bs );
353 if( bs.eob || (bs.eof && remain_size < 10) )
354 goto setup_param; /* No more valid data. */
355 /* Parse substream frame. */
356 dts_substream_type prev_substream_type = info->substream_type;
357 info->substream_type = dts_get_substream_type( info );
358 int (*dts_parse_frame)( dts_info_t * ) = NULL;
359 switch( info->substream_type )
361 /* Decide substream frame parser and check if this frame and the previous frame belong to the same AU. */
362 case DTS_SUBSTREAM_TYPE_CORE :
363 if( prev_substream_type != DTS_SUBSTREAM_TYPE_NONE )
364 goto setup_param;
365 dts_parse_frame = dts_parse_core_substream;
366 break;
367 case DTS_SUBSTREAM_TYPE_EXTENSION :
369 uint8_t prev_exss_index = info->exss_index;
370 if( (err = dts_get_exss_index( info, &info->exss_index )) < 0 )
371 return err;
372 if( prev_substream_type == DTS_SUBSTREAM_TYPE_EXTENSION && info->exss_index <= prev_exss_index )
373 goto setup_param;
374 dts_parse_frame = dts_parse_extension_substream;
375 break;
377 default :
378 /* An unknown stream type is detected. */
379 return LSMASH_ERR_NAMELESS;
381 info->frame_size = 0;
382 if( (err = dts_parse_frame( info )) < 0 )
383 return err; /* Failed to parse. */
384 next_frame_pos += info->frame_size;
386 setup_param:
387 dts_update_specific_param( info );
388 *param = info->ddts_param;
389 return 0;
392 static inline uint64_t dts_bits_get( lsmash_bits_t *bits, uint32_t width, uint64_t *bits_pos )
394 *bits_pos += width;
395 return lsmash_bits_get( bits, width );
398 static inline void dts_bits_align( lsmash_bits_t *bits, uint64_t *bits_pos )
400 uint8_t remainder = 8 - (*bits_pos & 0x7);
401 (void)dts_bits_get( bits, remainder, bits_pos );
404 static inline void dts_bits_align4( lsmash_bits_t *bits, uint64_t *bits_pos )
406 uint8_t remainder = 32 - (*bits_pos & 0x1f);
407 (void)dts_bits_get( bits, remainder, bits_pos );
410 static int dts_get_channel_count_from_channel_layout( uint16_t channel_layout )
412 #define DTS_CHANNEL_PAIR_MASK \
413 (DTS_CHANNEL_LAYOUT_L_R \
414 | DTS_CHANNEL_LAYOUT_LS_RS \
415 | DTS_CHANNEL_LAYOUT_LH_RH \
416 | DTS_CHANNEL_LAYOUT_LSR_RSR \
417 | DTS_CHANNEL_LAYOUT_LC_RC \
418 | DTS_CHANNEL_LAYOUT_LW_RW \
419 | DTS_CHANNEL_LAYOUT_LSS_RSS \
420 | DTS_CHANNEL_LAYOUT_LHS_RHS \
421 | DTS_CHANNEL_LAYOUT_LHR_RHR)
422 return lsmash_count_bits( channel_layout )
423 + lsmash_count_bits( channel_layout & DTS_CHANNEL_PAIR_MASK );
424 #undef DTS_CHANNEL_PAIR_MASK
427 static uint32_t dts_get_channel_layout_from_ls_mask32( uint32_t mask )
429 uint32_t layout = 0;
430 if( mask & DTS_LOUDSPEAKER_MASK32_C )
431 layout |= DTS_CHANNEL_LAYOUT_C;
432 if( mask & (DTS_LOUDSPEAKER_MASK32_L | DTS_LOUDSPEAKER_MASK32_R) )
433 layout |= DTS_CHANNEL_LAYOUT_L_R;
434 if( mask & (DTS_LOUDSPEAKER_MASK32_LS | DTS_LOUDSPEAKER_MASK32_RS) )
435 layout |= DTS_CHANNEL_LAYOUT_LS_RS;
436 if( mask & DTS_LOUDSPEAKER_MASK32_LFE1 )
437 layout |= DTS_CHANNEL_LAYOUT_LFE1;
438 if( mask & DTS_LOUDSPEAKER_MASK32_CS )
439 layout |= DTS_CHANNEL_LAYOUT_CS;
440 if( mask & (DTS_LOUDSPEAKER_MASK32_LH | DTS_LOUDSPEAKER_MASK32_RH) )
441 layout |= DTS_CHANNEL_LAYOUT_LH_RH;
442 if( mask & (DTS_LOUDSPEAKER_MASK32_LSR | DTS_LOUDSPEAKER_MASK32_RSR) )
443 layout |= DTS_CHANNEL_LAYOUT_LSR_RSR;
444 if( mask & DTS_LOUDSPEAKER_MASK32_CH )
445 layout |= DTS_CHANNEL_LAYOUT_CH;
446 if( mask & DTS_LOUDSPEAKER_MASK32_OH )
447 layout |= DTS_CHANNEL_LAYOUT_OH;
448 if( mask & (DTS_LOUDSPEAKER_MASK32_LC | DTS_LOUDSPEAKER_MASK32_RC) )
449 layout |= DTS_CHANNEL_LAYOUT_LC_RC;
450 if( mask & (DTS_LOUDSPEAKER_MASK32_LW | DTS_LOUDSPEAKER_MASK32_RW) )
451 layout |= DTS_CHANNEL_LAYOUT_LW_RW;
452 if( mask & (DTS_LOUDSPEAKER_MASK32_LSS | DTS_LOUDSPEAKER_MASK32_RSS) )
453 layout |= DTS_CHANNEL_LAYOUT_LSS_RSS;
454 if( mask & DTS_LOUDSPEAKER_MASK32_LFE2 )
455 layout |= DTS_CHANNEL_LAYOUT_LFE2;
456 if( mask & (DTS_LOUDSPEAKER_MASK32_LHS | DTS_LOUDSPEAKER_MASK32_RHS) )
457 layout |= DTS_CHANNEL_LAYOUT_LHS_RHS;
458 if( mask & DTS_LOUDSPEAKER_MASK32_CHR )
459 layout |= DTS_CHANNEL_LAYOUT_CHR;
460 if( mask & (DTS_LOUDSPEAKER_MASK32_LHR | DTS_LOUDSPEAKER_MASK32_RHR) )
461 layout |= DTS_CHANNEL_LAYOUT_LHR_RHR;
462 return layout;
465 /* for channels which cannot be expressed by ChannelLayout; CL, LL and RL */
466 static inline uint8_t dts_get_lower_channels_from_ls_mask32( uint32_t mask )
468 return (mask >> 25) & 0x7;
471 static void dts_parse_xll_navigation( lsmash_bits_t *bits, dts_xll_info_t *xll, int nuBits4ExSSFsize, uint64_t *bits_pos )
473 xll->size = dts_bits_get( bits, nuBits4ExSSFsize, bits_pos ) + 1; /* nuExSSXLLFsize (nuBits4ExSSFsize) */
474 if( dts_bits_get( bits, 1, bits_pos ) ) /* bExSSXLLSyncPresent (1) */
476 dts_bits_get( bits, 4, bits_pos ); /* nuPeakBRCntrlBuffSzkB (4) */
477 int nuBitsInitDecDly = dts_bits_get( bits, 5, bits_pos ) + 1; /* nuBitsInitDecDly (5) */
478 dts_bits_get( bits, nuBitsInitDecDly, bits_pos ); /* nuInitLLDecDlyFrames (nuBitsInitDecDly) */
479 dts_bits_get( bits, nuBits4ExSSFsize, bits_pos ); /* nuExSSXLLSyncOffset (nuBits4ExSSFsize) */
483 static void dts_parse_lbr_navigation( lsmash_bits_t *bits, dts_lbr_info_t *lbr, uint64_t *bits_pos )
485 lbr->size = dts_bits_get( bits, 14, bits_pos ); /* nuExSSLBRFsize (14) */
486 if( dts_bits_get( bits, 1, bits_pos ) ) /* bExSSLBRSyncPresent (1) */
487 dts_bits_get( bits, 2, bits_pos ); /* nuExSSLBRSyncDistInFrames (2) */
490 static int dts_parse_asset_descriptor( dts_info_t *info, uint64_t *bits_pos )
492 lsmash_bits_t *bits = info->bits;
493 dts_extension_info_t *exss = &info->exss[ info->exss_index ];
494 /* Audio asset descriptor */
495 uint64_t asset_descriptor_pos = *bits_pos;
496 int nuAssetDescriptFsize = dts_bits_get( bits, 9, bits_pos ) + 1; /* nuAssetDescriptFsize (9) */
497 dts_audio_asset_t *asset = &exss->asset[ dts_bits_get( bits, 3, bits_pos ) ]; /* nuAssetIndex (3) */
498 /* Static metadata */
499 int bEmbeddedStereoFlag = 0;
500 int bEmbeddedSixChFlag = 0;
501 int nuTotalNumChs = 0;
502 if( exss->bStaticFieldsPresent )
504 if( dts_bits_get( bits, 1, bits_pos ) ) /* bAssetTypeDescrPresent (1)*/
505 dts_bits_get( bits, 4, bits_pos ); /* nuAssetTypeDescriptor (4) */
506 if( dts_bits_get( bits, 1, bits_pos ) ) /* bLanguageDescrPresent (1) */
507 dts_bits_get( bits, 24, bits_pos ); /* LanguageDescriptor (24) */
508 if( dts_bits_get( bits, 1, bits_pos ) )
510 int nuInfoTextByteSize = dts_bits_get( bits, 10, bits_pos ) + 1; /* nuInfoTextByteSize (10) */
511 dts_bits_get( bits, nuInfoTextByteSize * 8, bits_pos ); /* InfoTextString (nuInfoTextByteSize) */
513 int nuBitResolution = dts_bits_get( bits, 5, bits_pos ) + 1; /* nuBitResolution (5) */
514 exss->bit_resolution = LSMASH_MAX( exss->bit_resolution, nuBitResolution );
515 int nuMaxSampleRate = dts_bits_get( bits, 4, bits_pos ); /* nuMaxSampleRate (4) */
516 static const uint32_t source_sample_rate_table[16] =
518 8000, 16000, 32000, 64000, 128000,
519 22050, 44100, 88200, 176400, 352800,
520 12000, 24000, 48000, 96000, 192000, 384000
522 exss->sampling_frequency = LSMASH_MAX( exss->sampling_frequency, source_sample_rate_table[nuMaxSampleRate] );
523 nuTotalNumChs = dts_bits_get( bits, 8, bits_pos ) + 1; /* nuTotalNumChs (8) */
524 asset->bOne2OneMapChannels2Speakers = dts_bits_get( bits, 1, bits_pos ); /* bOne2OneMapChannels2Speakers (1) */
525 if( asset->bOne2OneMapChannels2Speakers )
527 if( nuTotalNumChs > 2 )
529 bEmbeddedStereoFlag = dts_bits_get( bits, 1, bits_pos ); /* bEmbeddedStereoFlag (1) */
530 exss->stereo_downmix |= bEmbeddedStereoFlag;
532 if( nuTotalNumChs > 6 )
533 bEmbeddedSixChFlag = dts_bits_get( bits, 1, bits_pos ); /* bEmbeddedSixChFlag (1) */
534 int nuNumBits4SAMask;
535 if( dts_bits_get( bits, 1, bits_pos ) ) /* bSpkrMaskEnabled (1) */
537 nuNumBits4SAMask = (dts_bits_get( bits, 2, bits_pos ) + 1) << 2; /* nuNumBits4SAMask (2) */
538 asset->channel_layout |= dts_bits_get( bits, nuNumBits4SAMask, bits_pos ); /* nuSpkrActivityMask (nuNumBits4SAMask) */
540 else
541 /* The specification doesn't mention the value of nuNumBits4SAMask if bSpkrMaskEnabled is set to 0. */
542 nuNumBits4SAMask = 16;
543 int nuNumSpkrRemapSets = dts_bits_get( bits, 3, bits_pos );
544 int nuStndrSpkrLayoutMask[8] = { 0 };
545 for( int ns = 0; ns < nuNumSpkrRemapSets; ns++ )
546 nuStndrSpkrLayoutMask[ns] = dts_bits_get( bits, nuNumBits4SAMask, bits_pos );
547 for( int ns = 0; ns < nuNumSpkrRemapSets; ns++ )
549 int nuNumSpeakers = dts_get_channel_count_from_channel_layout( nuStndrSpkrLayoutMask[ns] );
550 int nuNumDecCh4Remap = dts_bits_get( bits, 5, bits_pos ) + 1; /* nuNumDecCh4Remap[ns] (5) */
551 for( int nCh = 0; nCh < nuNumSpeakers; nCh++ )
553 uint32_t nuRemapDecChMask = dts_bits_get( bits, nuNumDecCh4Remap, bits_pos );
554 int nCoef = lsmash_count_bits( nuRemapDecChMask );
555 for( int nc = 0; nc < nCoef; nc++ )
556 dts_bits_get( bits, 5, bits_pos ); /* nuSpkrRemapCodes[ns][nCh][nc] (5) */
560 else
562 asset->nuRepresentationType = dts_bits_get( bits, 3, bits_pos ); /* nuRepresentationType (3) */
563 if( asset->nuRepresentationType == 2
564 || asset->nuRepresentationType == 3 )
565 nuTotalNumChs = 2;
568 /* Dynamic metadata */
569 int bDRCCoefPresent = dts_bits_get( bits, 1, bits_pos ); /* bDRCCoefPresent (1) */
570 if( bDRCCoefPresent )
571 dts_bits_get( bits, 8, bits_pos ); /* nuDRCCode (8) */
572 if( dts_bits_get( bits, 1, bits_pos ) ) /* bDialNormPresent (1) */
573 dts_bits_get( bits, 5, bits_pos ); /* nuDialNormCode (5) */
574 if( bDRCCoefPresent && bEmbeddedStereoFlag )
575 dts_bits_get( bits, 8, bits_pos ); /* nuDRC2ChDmixCode (8) */
576 int bMixMetadataPresent;
577 if( exss->bMixMetadataEnbl )
578 bMixMetadataPresent = dts_bits_get( bits, 1, bits_pos ); /* bMixMetadataPresent (1) */
579 else
580 bMixMetadataPresent = 0;
581 if( bMixMetadataPresent )
583 dts_bits_get( bits, 7, bits_pos ); /* bExternalMixFlag (1)
584 * nuPostMixGainAdjCode (7) */
585 if( dts_bits_get( bits, 2, bits_pos ) < 3 ) /* nuControlMixerDRC (2) */
586 dts_bits_get( bits, 3, bits_pos ); /* nuLimit4EmbeddedDRC (3) */
587 else
588 dts_bits_get( bits, 8, bits_pos ); /* nuCustomDRCCode (8) */
589 int bEnblPerChMainAudioScale = dts_bits_get( bits, 1, bits_pos ); /* bEnblPerChMainAudioScale (1) */
590 for( uint8_t ns = 0; ns < exss->nuNumMixOutConfigs; ns++ )
591 if( bEnblPerChMainAudioScale )
592 for( uint8_t nCh = 0; nCh < exss->nNumMixOutCh[ns]; nCh++ )
593 dts_bits_get( bits, 6, bits_pos ); /* nuMainAudioScaleCode[ns][nCh] (6) */
594 else
595 dts_bits_get( bits, 6, bits_pos ); /* nuMainAudioScaleCode[ns][0] (6) */
596 int nEmDM = 1;
597 int nDecCh[3] = { nuTotalNumChs, 0, 0 };
598 if( bEmbeddedSixChFlag )
600 nDecCh[nEmDM] = 6;
601 ++nEmDM;
603 if( bEmbeddedStereoFlag )
605 nDecCh[nEmDM] = 2;
606 ++nEmDM;
608 for( uint8_t ns = 0; ns < exss->nuNumMixOutConfigs; ns++ )
609 for( int nE = 0; nE < nEmDM; nE++ )
610 for( int nCh = 0; nCh < nDecCh[nE]; nCh++ )
612 int nuMixMapMask = dts_bits_get( bits, exss->nNumMixOutCh[ns], bits_pos ); /* nuMixMapMask (nNumMixOutCh[ns]) */
613 int nuNumMixCoefs = lsmash_count_bits( nuMixMapMask );
614 for( int nC = 0; nC < nuNumMixCoefs; nC++ )
615 dts_bits_get( bits, 6, bits_pos ); /* nuMixCoeffs[ns][nE][nCh][nC] (6) */
618 /* Decoder navigation data */
619 asset->nuCodingMode = dts_bits_get( bits, 2, bits_pos ); /* nuCodingMode (2) */
620 switch( asset->nuCodingMode )
622 case 0 : /* DTS-HD Coding Mode that may contain multiple coding components */
624 int nuCoreExtensionMask = dts_bits_get( bits, 12, bits_pos ); /* nuCoreExtensionMask (12) */
625 asset->nuCoreExtensionMask = nuCoreExtensionMask;
626 if( nuCoreExtensionMask & DTS_EXT_SUBSTREAM_CORE_FLAG )
628 asset->core.frame_size = dts_bits_get( bits, 14, bits_pos ) + 1; /* nuExSSCoreFsize (14) */
629 if( dts_bits_get( bits, 1, bits_pos ) ) /* bExSSCoreSyncPresent (1) */
630 dts_bits_get( bits, 2, bits_pos ); /* nuExSSCoreSyncDistInFrames (2) */
632 if( nuCoreExtensionMask & DTS_EXT_SUBSTREAM_XBR_FLAG )
633 asset->xbr_size = dts_bits_get( bits, 14, bits_pos ) + 1;
634 if( nuCoreExtensionMask & DTS_EXT_SUBSTREAM_XXCH_FLAG )
635 asset->core.xxch.size = dts_bits_get( bits, 14, bits_pos ) + 1;
636 if( nuCoreExtensionMask & DTS_EXT_SUBSTREAM_X96_FLAG )
637 asset->x96_size = dts_bits_get( bits, 12, bits_pos ) + 1;
638 if( nuCoreExtensionMask & DTS_EXT_SUBSTREAM_LBR_FLAG )
639 dts_parse_lbr_navigation( bits, &asset->lbr, bits_pos );
640 if( nuCoreExtensionMask & DTS_EXT_SUBSTREAM_XLL_FLAG )
641 dts_parse_xll_navigation( bits, &asset->xll, exss->nuBits4ExSSFsize, bits_pos );
642 break;
644 case 1 : /* DTS-HD Loss-less coding mode without CBR component */
645 dts_parse_xll_navigation( bits, &asset->xll, exss->nuBits4ExSSFsize, bits_pos );
646 break;
647 case 2 : /* DTS-HD Low bit-rate mode */
648 dts_parse_lbr_navigation( bits, &asset->lbr, bits_pos );
649 break;
650 case 3 : /* Auxiliary coding mode */
651 asset->aux_size = dts_bits_get( bits, 14, bits_pos ) + 1; /* nuExSSAuxFsize (14) */
652 break;
653 default :
654 assert( 0 );
655 break;
657 dts_bits_get( bits, nuAssetDescriptFsize * 8 - (*bits_pos - asset_descriptor_pos), bits_pos ); /* Skip remaining part of Audio asset descriptor. */
658 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
661 static int dts_parse_xxch( dts_info_t *info, uint64_t *bits_pos, dts_xxch_info_t *xxch )
663 lsmash_bits_t *bits = info->bits;
664 /* XXCH Frame Header */
665 uint64_t xxch_pos = *bits_pos - 32; /* SYNCXXCh (32) */
666 uint64_t nuHeaderSizeXXCh = dts_bits_get( bits, 6, bits_pos ) + 1; /* nuHeaderSizeXXCh (6) */
667 dts_bits_get( bits, 1, bits_pos ); /* bCRCPresent4ChSetHeaderXXCh (1) */
668 int nuBits4SpkrMaskXXCh = dts_bits_get( bits, 5, bits_pos ) + 1; /* nuBits4SpkrMaskXXCh (5) */
669 int nuNumChSetsInXXCh = dts_bits_get( bits, 2, bits_pos ) + 1; /* nuNumChSetsInXXCh (2) */
670 for( int nChSet = 0; nChSet < nuNumChSetsInXXCh; nChSet++ )
671 dts_bits_get( bits, 14, bits_pos ); /* pnuChSetFsizeXXCh[nChSet] - 1 (14) */
672 /* A 5.1 decoder uses this AMODE to configure its decoded outputs to C, L, R, Ls and Rs layout.
673 * On the other hand a 7.1 decoder ignores the AMODE information from the core stream and uses
674 * instead the nuCoreSpkrActivityMask (C, L, R, LFE1, Lss and Rss) and the nuXXChSpkrLayoutMask
675 * (Lsr and Rsr) from the XXCh stream to get the original 7.1 speaker layout (C, L, R, LFE1, Lss,
676 * Rsr, Lsr and Rsr) and configures its outputs accordingly. */
677 uint32_t xxch_mask = dts_bits_get( bits, nuBits4SpkrMaskXXCh, bits_pos ); /* nuCoreSpkrActivityMask (nuBits4SpkrMaskXXCh) */
678 xxch->channel_layout |= dts_get_channel_layout_from_ls_mask32( xxch_mask );
679 xxch->lower_planes = dts_get_lower_channels_from_ls_mask32( xxch_mask );
680 dts_bits_get( bits, nuHeaderSizeXXCh * 8 - (*bits_pos - xxch_pos), bits_pos ); /* Skip remaining part of XXCH Frame Header. */
681 for( int nChSet = 0; nChSet < nuNumChSetsInXXCh; nChSet++ )
683 /* XXCH Channel Set Header */
684 xxch_pos = *bits_pos;
685 uint64_t nuXXChChSetHeaderSize = dts_bits_get( bits, 7, bits_pos ) + 1; /* nuXXChChSetHeaderSize (7)*/
686 dts_bits_get( bits, 3, bits_pos ); /* nuChInChSetXXCh (3) */
687 if( nuBits4SpkrMaskXXCh > 6 )
689 xxch_mask = dts_bits_get( bits, nuBits4SpkrMaskXXCh - 6, bits_pos ) << 6; /* nuXXChSpkrLayoutMask (nuBits4SpkrMaskXXCh - 6) */
690 xxch->channel_layout |= dts_get_channel_layout_from_ls_mask32( xxch_mask );
691 xxch->lower_planes |= dts_get_lower_channels_from_ls_mask32( xxch_mask );
693 #if 0 /* FIXME: Can we detect stereo downmixing from only XXCH data within the core substream? */
694 if( dts_bits_get( bits, 1, bits_pos ) ) /* bDownMixCoeffCodeEmbedded (1) */
696 int bDownMixEmbedded = dts_bits_get( bits, 1, bits_pos ); /* bDownMixEmbedded (1) */
697 dts_bits_get( bits, 6, bits_pos ); /* nDmixScaleFactor (6) */
698 uint32_t DownMixChMapMask[8];
699 for( int nCh = 0; nCh < nuChInChSetXXCh; nCh++ )
700 DownMixChMapMask[nCh] = dts_bits_get( bits, nuBits4SpkrMaskXXCh, bits_pos );
702 #endif
703 dts_bits_get( bits, nuXXChChSetHeaderSize * 8 - (*bits_pos - xxch_pos), bits_pos ); /* Skip remaining part of XXCH Channel Set Header. */
705 return 0;
708 static int dts_parse_core_xxch( dts_info_t *info, uint64_t *bits_pos, dts_core_info_t *core )
710 if( core->extension_audio_descriptor == 0
711 || core->extension_audio_descriptor == 3 )
712 return LSMASH_ERR_INVALID_DATA;
713 int err = dts_parse_xxch( info, bits_pos, &core->xxch );
714 if( err < 0 )
715 return err;
716 info->flags |= DTS_CORE_SUBSTREAM_XXCH_FLAG;
717 return info->bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
720 static int dts_parse_exss_xxch( dts_info_t *info, uint64_t *bits_pos, dts_core_info_t *core )
722 lsmash_bits_t *bits = info->bits;
723 if( DTS_SYNCWORD_XXCH != dts_bits_get( bits, 32, bits_pos ) )
724 return LSMASH_ERR_INVALID_DATA;
725 int err = dts_parse_xxch( info, bits_pos, &core->xxch );
726 if( err < 0 )
727 return err;
728 info->flags |= DTS_EXT_SUBSTREAM_XXCH_FLAG;
729 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
732 static int dts_parse_core_x96( dts_info_t *info, uint64_t *bits_pos, dts_core_info_t *core )
734 if( core->extension_audio_descriptor != 2
735 && core->extension_audio_descriptor != 3 )
736 return 0; /* Probably this is not an X96 extension. We skip this anyway. */
737 lsmash_bits_t *bits = info->bits;
738 /* DTS_BCCORE_X96 Frame Header */
739 /* SYNCX96 (32) */
740 /* To reduce the probability of false synchronization caused by the presence of pseudo sync words, it is
741 * imperative to check the distance between the detected sync word and the end of current frame. This
742 * distance in bytes shall match the value of FSIZE96. */
743 uint64_t FSIZE96 = ((lsmash_bs_show_byte( bits->bs, 0 ) << 4)
744 | ((lsmash_bs_show_byte( bits->bs, 1 ) >> 4) & 0x0F)) + 1;
745 if( core->frame_size * 8 != (*bits_pos - 32 + FSIZE96 * 8) )
746 return 0; /* Encountered four emulation bytes (pseudo sync word). */
747 dts_bits_get( bits, 16, bits_pos ); /* FSIZE96 (12)
748 * REVNO (4) */
749 core->sampling_frequency *= 2;
750 core->frame_duration *= 2;
751 info->flags |= DTS_CORE_SUBSTREAM_X96_FLAG;
752 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
755 static int dts_parse_core_xch( dts_info_t *info, uint64_t *bits_pos, dts_core_info_t *core )
757 if( core->extension_audio_descriptor != 0
758 && core->extension_audio_descriptor != 3 )
759 return 0; /* Probably this is not an XCh extension. We skip this anyway. */
760 lsmash_bits_t *bits = info->bits;
761 /* XCH Frame Header */
762 /* XChSYNC (32) */
763 /* For compatibility reasons with legacy bitstreams the estimated distance in bytes is checked against
764 * the XChFSIZE+1 as well as the XChFSIZE. The XCh synchronization is pronounced if the distance matches
765 * either of these two values. */
766 uint64_t XChFSIZE = (lsmash_bs_show_byte( bits->bs, 0 ) << 2)
767 | ((lsmash_bs_show_byte( bits->bs, 1 ) >> 6) & 0x03);
768 if( core->frame_size * 8 != (*bits_pos - 32 + (XChFSIZE + 1) * 8)
769 && core->frame_size * 8 != (*bits_pos - 32 + XChFSIZE * 8) )
770 return 0; /* Encountered four emulation bytes (pseudo sync word). */
771 if( ((lsmash_bs_show_byte( bits->bs, 1 ) >> 2) & 0xF) != 1 )
772 return 0; /* A known value of AMODE is only 1. Otherwise just skip. */
773 dts_bits_get( bits, 16, bits_pos ); /* XChFSIZE (10)
774 * AMODE (4)
775 * byte align (2) */
776 core->channel_layout |= DTS_CHANNEL_LAYOUT_CS;
777 info->flags |= DTS_CORE_SUBSTREAM_XCH_FLAG;
778 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
781 static int dts_parse_exss_xbr( dts_info_t *info, uint64_t *bits_pos )
783 lsmash_bits_t *bits = info->bits;
784 /* XBR Frame Header */
785 uint64_t xbr_pos = *bits_pos;
786 if( DTS_SYNCWORD_XBR != dts_bits_get( bits, 32, bits_pos ) ) /* SYNCXBR (32) */
787 return LSMASH_ERR_INVALID_DATA;
788 uint64_t nHeaderSizeXBR = dts_bits_get( bits, 6, bits_pos ) + 1; /* nHeaderSizeXBR (6) */
789 dts_bits_get( bits, nHeaderSizeXBR * 8 - (*bits_pos - xbr_pos), bits_pos ); /* Skip the remaining bits in XBR Frame Header. */
790 info->flags |= DTS_EXT_SUBSTREAM_XBR_FLAG;
791 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
794 static int dts_parse_exss_x96( dts_info_t *info, uint64_t *bits_pos, dts_core_info_t *core )
796 lsmash_bits_t *bits = info->bits;
797 /* DTS_EXSUB_STREAM_X96 Frame Header */
798 uint64_t x96_pos = *bits_pos;
799 if( DTS_SYNCWORD_X96K != dts_bits_get( bits, 32, bits_pos ) ) /* SYNCX96 (32) */
800 return LSMASH_ERR_INVALID_DATA;
801 uint64_t nHeaderSizeX96 = dts_bits_get( bits, 6, bits_pos ) + 1; /* nHeaderSizeXBR (6) */
802 dts_bits_get( bits, nHeaderSizeX96 * 8 - (*bits_pos - x96_pos), bits_pos ); /* Skip the remaining bits in DTS_EXSUB_STREAM_X96 Frame Header. */
803 /* What the fuck! The specification drops 'if' sentence.
804 * We assume the same behaviour for core substream. */
805 core->sampling_frequency *= 2;
806 core->frame_duration *= 2;
807 info->flags |= DTS_EXT_SUBSTREAM_X96_FLAG;
808 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
811 static int dts_parse_exss_lbr( dts_info_t *info, uint64_t *bits_pos, dts_audio_asset_t *asset )
813 lsmash_bits_t *bits = info->bits;
814 dts_lbr_info_t *lbr = &asset->lbr;
815 if( DTS_SYNCWORD_LBR != dts_bits_get( bits, 32, bits_pos ) ) /* SYNCEXTLBR (32) */
816 return LSMASH_ERR_INVALID_DATA;
817 int ucFmtInfoCode = dts_bits_get( bits, 8, bits_pos );
818 if( ucFmtInfoCode == 2 )
820 /* LBR decoder initialization data */
821 int nLBRSampleRateCode = dts_bits_get( bits, 8, bits_pos ); /* nLBRSampleRateCode (8) */
822 int usLBRSpkrMask = dts_bits_get( bits, 16, bits_pos ); /* usLBRSpkrMask (16) */
823 dts_bits_get( bits, 16, bits_pos ); /* nLBRversion (16) */
824 int nLBRCompressedFlags = dts_bits_get( bits, 8, bits_pos ); /* nLBRCompressedFlags (8) */
825 dts_bits_get( bits, 40, bits_pos ); /* nLBRBitRateMSnybbles (8)
826 * nLBROriginalBitRate_LSW (16)
827 * nLBRScaledBitRate_LSW (16) */
828 static const uint32_t source_sample_rate_table[16] =
830 8000, 16000, 32000, 0, 0,
831 11025, 22050, 44100, 0, 0,
832 12000, 24000, 48000, 0, 0, 0
834 enum LBRFlags
836 LBR_FLAG_24_BIT_SAMPLES = 0x01, /* 0b00000001 */
837 LBR_FLAG_USE_LFE = 0x02, /* 0b00000010 */
838 LBR_FLAG_BANDLMT_MASK = 0x1C, /* 0b00011100 */
839 LBR_FLAG_STEREO_DOWNMIX = 0x20, /* 0b00100000 */
840 LBR_FLAG_MULTICHANNEL_DOWNMIX = 0x40, /* 0b01000000 */
842 lbr->sampling_frequency = source_sample_rate_table[nLBRSampleRateCode];
843 lbr->frame_duration = lbr->sampling_frequency < 16000 ? 1024
844 : lbr->sampling_frequency < 32000 ? 2048
845 : 4096;
846 lbr->channel_layout = ((usLBRSpkrMask >> 8) & 0xff) | ((usLBRSpkrMask << 8) & 0xff00); /* usLBRSpkrMask is little-endian. */
847 lbr->stereo_downmix |= !!(nLBRCompressedFlags & LBR_FLAG_STEREO_DOWNMIX);
848 lbr->lfe_present |= !!(nLBRCompressedFlags & LBR_FLAG_USE_LFE);
849 lbr->duration_modifier |= ((nLBRCompressedFlags & LBR_FLAG_BANDLMT_MASK) == 0x04)
850 || ((nLBRCompressedFlags & LBR_FLAG_BANDLMT_MASK) == 0x0C);
851 lbr->sample_size = (nLBRCompressedFlags & LBR_FLAG_24_BIT_SAMPLES) ? 24 : 16;
853 else if( ucFmtInfoCode != 1 )
854 return LSMASH_ERR_NAMELESS; /* unknown */
855 info->flags |= DTS_EXT_SUBSTREAM_LBR_FLAG;
856 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
859 static int dts_parse_exss_xll( dts_info_t *info, uint64_t *bits_pos, dts_audio_asset_t *asset )
861 lsmash_bits_t *bits = info->bits;
862 dts_xll_info_t *xll = &asset->xll;
863 /* Common Header */
864 uint64_t xll_pos = *bits_pos;
865 if( DTS_SYNCWORD_XLL != dts_bits_get( bits, 32, bits_pos ) ) /* SYNCXLL (32) */
866 return LSMASH_ERR_INVALID_DATA;
867 dts_bits_get( bits, 4, bits_pos ); /* nVersion (4) */
868 uint64_t nHeaderSize = dts_bits_get( bits, 8, bits_pos ) + 1; /* nHeaderSize (8) */
869 int nBits4FrameFsize = dts_bits_get( bits, 5, bits_pos ) + 1; /* nBits4FrameFsize (5) */
870 dts_bits_get( bits, nBits4FrameFsize, bits_pos ); /* nLLFrameSize (nBits4FrameFsize) */
871 int nNumChSetsInFrame = dts_bits_get( bits, 4, bits_pos ) + 1; /* nNumChSetsInFrame (4) */
872 uint16_t nSegmentsInFrame = 1 << dts_bits_get( bits, 4, bits_pos ); /* nSegmentsInFrame (4) */
873 uint16_t nSmplInSeg = 1 << dts_bits_get( bits, 4, bits_pos ); /* nSmplInSeg (4) */
874 int nBits4SSize = dts_bits_get( bits, 5, bits_pos ) + 1; /* nBits4SSize (5) */
875 dts_bits_get( bits, 3, bits_pos ); /* nBandDataCRCEn (2)
876 * bScalableLSBs (1) */
877 int nBits4ChMask = dts_bits_get( bits, 5, bits_pos ) + 1; /* nBits4ChMask (5) */
878 dts_bits_get( bits, nHeaderSize * 8 - (*bits_pos - xll_pos), bits_pos ); /* Skip the remaining bits in Common Header. */
879 int sum_nChSetLLChannel = 0;
880 uint32_t nFs1 = 0;
881 int number_of_frequency_bands = 0; /* the number of frequency bands is determined simply by the underlying maximum sampling
882 * frequency among all of the channel sets.
883 * For sampling frequency Fs,
884 * Number of frequency bands is 1 for Fs <= Base_Fs
885 * Number of frequency bands is 2 for Base_Fs < Fs <= 2 * Base_Fs
886 * Number of frequency bands is 2 for 2 * Base_Fs < Fs <= 4 * Base_Fs
887 * where Base_Fs denotes the base sampling frequency i.e. 64 kHz, 88.2 kHz, or 96 kHz. */
888 int nNumFreqBands1 = 0;
889 int nNumFreqBands[17] = { 0 };
890 xll->channel_layout = 0;
891 for( int nChSet = 0; nChSet < nNumChSetsInFrame; nChSet++ )
893 /* Channel Set Sub-Header */
894 xll_pos = *bits_pos;
895 uint64_t nChSetHeaderSize = dts_bits_get( bits, 10, bits_pos ) + 1; /* nChSetHeaderSize (10) */
896 int nChSetLLChannel = dts_bits_get( bits, 4, bits_pos ) + 1; /* nChSetLLChannel (4) */
897 dts_bits_get( bits, nChSetLLChannel, bits_pos ); /* nResidualChEncode (nChSetLLChannel) */
898 uint8_t nBitResolution = dts_bits_get( bits, 5, bits_pos ) + 1; /* nBitResolution (5) */
899 dts_bits_get( bits, 5, bits_pos ); /* nBitWidth (5) */
900 xll->pcm_resolution = LSMASH_MAX( xll->pcm_resolution, nBitResolution );
901 static const uint32_t source_sample_rate_table[16] =
903 8000, 16000, 32000, 64000, 128000,
904 22050, 44100, 88200, 176400, 352800,
905 12000, 24000, 48000, 96000, 192000, 384000
907 int sFreqIndex = dts_bits_get( bits, 4, bits_pos ); /* sFreqIndex (4) */
908 uint32_t nFs = source_sample_rate_table[sFreqIndex];
909 dts_bits_get( bits, 2, bits_pos ); /* nFsInterpolate (2) */
910 int nReplacementSet = dts_bits_get( bits, 2, bits_pos ); /* nReplacementSet (2) */
911 if( nReplacementSet > 0 )
912 dts_bits_get( bits, 1, bits_pos ); /* bActiveReplaceSet (1) */
913 if( asset->bOne2OneMapChannels2Speakers )
915 /* Downmix is allowed only when the encoded channel represents a signal feed to a corresponding loudspeaker. */
916 int bPrimaryChSet = dts_bits_get( bits, 1, bits_pos ); /* bPrimaryChSet (1) */
917 int bDownmixCoeffCodeEmbedded = dts_bits_get( bits, 1, bits_pos ); /* bDownmixCoeffCodeEmbedded (1) */
918 int nLLDownmixType = 0x7; /* 0b111: Unused */
919 if( bDownmixCoeffCodeEmbedded )
921 dts_bits_get( bits, 1, bits_pos ); /* bDownmixEmbedded (1) */
922 if( bPrimaryChSet )
923 nLLDownmixType = dts_bits_get( bits, 3, bits_pos ); /* nLLDownmixType (3) */
925 int bHierChSet = dts_bits_get( bits, 1, bits_pos ); /* bHierChSet (1) */
926 if( bDownmixCoeffCodeEmbedded )
928 /* N: the number of channels in the current channel set
929 * for non-primary channel set, adding +1 for the down scaling coefficients that prevent overflow
930 * M: the number of channels that the current channel set is mixed into
931 * Downmix coefficients are transmitted using 9-bit codes. */
932 static const int downmix_channel_count_table[8] = { 1, 2, 2, 3, 3, 4, 4, 0 };
933 int N = nChSetLLChannel + (bPrimaryChSet ? 0 : 1);
934 int M = bPrimaryChSet ? downmix_channel_count_table[nLLDownmixType] : sum_nChSetLLChannel;
935 int nDownmixCoeffs = N * M;
936 dts_bits_get( bits, nDownmixCoeffs * 9, bits_pos ); /* DownmixCoeffs (nDownmixCoeffs * 9) */
937 if( bPrimaryChSet && downmix_channel_count_table[nLLDownmixType] == 2 )
938 xll->stereo_downmix |= 1;
940 if( bHierChSet )
941 sum_nChSetLLChannel += nChSetLLChannel;
942 if( dts_bits_get( bits, 1, bits_pos ) ) /* bChMaskEnabled (1) */
944 uint32_t nChMask = dts_bits_get( bits, nBits4ChMask, bits_pos ); /* nChMask (nBits4ChMask) */
945 xll->channel_layout |= dts_get_channel_layout_from_ls_mask32( nChMask );
946 xll->lower_planes |= dts_get_lower_channels_from_ls_mask32( nChMask );
948 else
949 dts_bits_get( bits, 25 * nChSetLLChannel, bits_pos ); /* RadiusDelta[ch] (9)
950 * Theta[ch] (9)
951 * Phi[ch] (7)
952 * per channel */
954 else
956 /* No downmixing is allowed and each channel set is the primary channel set. */
957 if( dts_bits_get( bits, 1, bits_pos ) ) /* bMappingCoeffsPresent (1) */
959 int nBitsCh2SpkrCoef = 6 + 2 * dts_bits_get( bits, 3, bits_pos ); /* nBitsCh2SpkrCoef (3) */
960 int nNumSpeakerConfigs = dts_bits_get( bits, 2, bits_pos ) + 1; /* nNumSpeakerConfigs (2) */
961 for( int nSpkrConf = 0; nSpkrConf < nNumSpeakerConfigs; nSpkrConf++ )
963 int pnActiveChannelMask = dts_bits_get( bits, nChSetLLChannel, bits_pos ); /* pnActiveChannelMask[nSpkrConf] (nChSetLLChannel) */
964 int pnNumSpeakers = dts_bits_get( bits, 6, bits_pos ) + 1; /* pnNumSpeakers[nSpkrConf] (6) */
965 int bSpkrMaskEnabled = dts_bits_get( bits, 1, bits_pos ); /* bSpkrMaskEnabled (1) */
966 if( bSpkrMaskEnabled )
968 uint32_t nSpkrMask = dts_bits_get( bits, nBits4ChMask, bits_pos ); /* nSpkrMask[nSpkrConf] (nBits4ChMask) */
969 xll->channel_layout |= dts_get_channel_layout_from_ls_mask32( nSpkrMask );
970 xll->lower_planes |= dts_get_lower_channels_from_ls_mask32( nSpkrMask );
972 for( int nSpkr = 0; nSpkr < pnNumSpeakers; nSpkr++ )
974 if( !bSpkrMaskEnabled )
975 dts_bits_get( bits, 25, bits_pos ); /* ChSetSpeakerConfiguration (25) */
976 for( int nCh = 0; nCh < nChSetLLChannel; nCh++ )
977 if( pnActiveChannelMask & (1 << nCh) )
978 dts_bits_get( bits, nBitsCh2SpkrCoef, bits_pos ); /* pnCh2SpkrMapCoeff (nBitsCh2SpkrCoef) */
983 int full_bandwidth;
984 if( nFs > 96000 )
986 /* When bXtraFreqBands is equal to 0, only one-half of the original bandwidth is preserved and, thus, the number
987 * of frequency bands is also one-half of the number in the case where full bandwidth is preserved. Apparently,
988 * nSmplInSeg is the number of samples in a segment per one frequency band when full bandwidth is preserved.
989 * Because of this, to get the correct number of samples per frame, multiply the result by 2 when bXtraFreqBands
990 * is equal to 0. */
991 full_bandwidth = dts_bits_get( bits, 1, bits_pos ); /* bXtraFreqBands (1) */
992 nNumFreqBands[nChSet] = (1 + full_bandwidth) << (nFs > 192000);
994 else
996 full_bandwidth = 1;
997 nNumFreqBands[nChSet] = 1;
999 uint32_t nSmplInSeg_nChSet;
1000 if( nChSet == 0 )
1002 nFs1 = nFs;
1003 nNumFreqBands1 = nNumFreqBands[nChSet];
1004 nSmplInSeg_nChSet = nSmplInSeg;
1006 else
1007 nSmplInSeg_nChSet = (nSmplInSeg * (nFs * nNumFreqBands1)) / (nFs1 * nNumFreqBands[nChSet]);
1008 if( xll->sampling_frequency < nFs )
1010 xll->sampling_frequency = nFs;
1011 uint32_t samples_per_band_in_frame = nSegmentsInFrame * nSmplInSeg_nChSet;
1012 xll->frame_duration = samples_per_band_in_frame * nNumFreqBands[nChSet] * (2 - full_bandwidth);
1014 if( number_of_frequency_bands < nNumFreqBands[nChSet] )
1015 number_of_frequency_bands = nNumFreqBands[nChSet];
1016 dts_bits_get( bits, nChSetHeaderSize * 8 - (*bits_pos - xll_pos), bits_pos ); /* Skip the remaining bits in Channel Set Sub-Header. */
1018 /* NAVI */
1019 uint64_t FreqBandDataSize = 0;
1020 for( int Band = 0; Band < number_of_frequency_bands; Band++ )
1021 for( int Seg = 0; Seg < nSegmentsInFrame; Seg++ )
1023 /* The spec pseudocode extracts bits and initialize SegmentSize[Band][Seg] here. This may be one of lies in the spec.
1024 * According to 8.3.2 Stream Navigation in ETSI TS 102 114 V1.4.1, sum of all band data for all channel set in a segments is
1025 * the size of that segment. In addition there are no headers associated with segment and channel set of abstraction layer.
1026 * Obviously, the extraction is meaningless and the navigation should works without it. */
1027 // SegmentSize[Band][Seg] = dts_bits_get( bits, nBits4SSize, bits_pos );
1028 for( int nChSet = 0; nChSet < nNumChSetsInFrame; nChSet++ )
1029 if( nNumFreqBands[nChSet] > Band )
1030 FreqBandDataSize += dts_bits_get( bits, nBits4SSize, bits_pos ) + 1; /* BandChSetSize[Band][Seg][nChSet] (nBits4SSize) */
1032 dts_bits_align( bits, bits_pos );
1033 dts_bits_get( bits, 16, bits_pos ); /* Checksum (16) */
1034 /* Skip band data. */
1035 dts_bits_get( bits, FreqBandDataSize * 8, bits_pos );
1036 dts_bits_align4( bits, bits_pos );
1037 if( lsmash_bs_show_be32( bits->bs, 0 ) == DTS_SYNCWORD_X )
1038 xll->dtsx_extension_present = 1;
1039 info->flags |= DTS_EXT_SUBSTREAM_XLL_FLAG;
1040 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
1043 static uint16_t dts_generate_channel_layout_from_core( int channel_arrangement )
1045 static const uint16_t channel_layout_map_table[] =
1047 DTS_CHANNEL_LAYOUT_C,
1048 DTS_CHANNEL_LAYOUT_L_R, /* dual mono */
1049 DTS_CHANNEL_LAYOUT_L_R, /* stereo */
1050 DTS_CHANNEL_LAYOUT_L_R, /* sum-difference */
1051 DTS_CHANNEL_LAYOUT_L_R, /* Lt/Rt */
1052 DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R,
1053 DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_CS,
1054 DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_CS,
1055 DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LS_RS,
1056 DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LS_RS,
1057 DTS_CHANNEL_LAYOUT_LC_RC | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LS_RS,
1058 DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LSR_RSR | DTS_CHANNEL_LAYOUT_OH,
1059 DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_CS | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LSR_RSR,
1060 DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LC_RC | DTS_CHANNEL_LAYOUT_LS_RS,
1061 DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LC_RC | DTS_CHANNEL_LAYOUT_LS_RS | DTS_CHANNEL_LAYOUT_LSR_RSR,
1062 DTS_CHANNEL_LAYOUT_C | DTS_CHANNEL_LAYOUT_CS | DTS_CHANNEL_LAYOUT_L_R | DTS_CHANNEL_LAYOUT_LC_RC | DTS_CHANNEL_LAYOUT_LS_RS
1064 return channel_arrangement < 16 ? channel_layout_map_table[channel_arrangement] : 0;
1067 static int dts_parse_core( dts_info_t *info, uint64_t *bits_pos, dts_core_info_t *core )
1069 lsmash_bits_t *bits = info->bits;
1070 memset( core, 0, sizeof(dts_core_info_t) );
1071 /* SYNC (32) */
1072 int frame_type = dts_bits_get( bits, 1, bits_pos ); /* FTYPE (1) */
1073 int deficit_sample_count = dts_bits_get( bits, 5, bits_pos ); /* SHORT (5) */
1074 if( frame_type == 1 && deficit_sample_count != 31 )
1075 return LSMASH_ERR_INVALID_DATA; /* Any normal frame (FTYPE == 1) must have SHORT == 31. */
1076 int crc_present_flag = dts_bits_get( bits, 1, bits_pos ); /* CPF (1) */
1077 int num_of_pcm_sample_blocks = dts_bits_get( bits, 7, bits_pos ) + 1; /* NBLKS (7) */
1078 if( num_of_pcm_sample_blocks <= 5 )
1079 return LSMASH_ERR_INVALID_DATA;
1080 core->frame_duration = 32 * num_of_pcm_sample_blocks;
1081 if( frame_type == 1
1082 && core->frame_duration != 256
1083 && core->frame_duration != 512 && core->frame_duration != 1024
1084 && core->frame_duration != 2048 && core->frame_duration != 4096 )
1085 return LSMASH_ERR_INVALID_DATA; /* For any normal frame, the actual number of PCM core samples per channel must be
1086 * either 4096, 2048, 1024, 512, or 256 samples per channel. */
1087 core->frame_size = dts_bits_get( bits, 14, bits_pos ) + 1; /* FSIZE (14) */
1088 if( core->frame_size < DTS_MIN_CORE_SIZE )
1089 return LSMASH_ERR_INVALID_DATA;
1090 core->channel_arrangement = dts_bits_get( bits, 6, bits_pos ); /* AMODE (6) */
1091 core->channel_layout = dts_generate_channel_layout_from_core( core->channel_arrangement );
1092 int core_audio_sampling_frequency = dts_bits_get( bits, 4, bits_pos ); /* SFREQ (4) */
1093 static const uint32_t sampling_frequency_table[16] =
1096 8000, 16000, 32000, 0, 0,
1097 11025, 22050, 44100, 0, 0,
1098 12000, 24000, 48000, 0, 0
1100 core->sampling_frequency = sampling_frequency_table[core_audio_sampling_frequency];
1101 if( core->sampling_frequency == 0 )
1102 return LSMASH_ERR_INVALID_DATA; /* invalid */
1103 dts_bits_get( bits, 10, bits_pos ); /* Skip remainder 10 bits.
1104 * RATE (5)
1105 * MIX (1)
1106 * DYNF (1)
1107 * TIMEF (1)
1108 * AUXF (1)
1109 * HDCD (1) */
1110 core->extension_audio_descriptor = dts_bits_get( bits, 3, bits_pos ); /* EXT_AUDIO_ID (3)
1111 * Note: EXT_AUDIO_ID == 3 is defined in V1.2.1.
1112 * However, its definition disappears and is reserved in V1.3.1. */
1113 int extended_coding_flag = dts_bits_get( bits, 1, bits_pos ); /* EXT_AUDIO (1) */
1114 dts_bits_get( bits, 1, bits_pos ); /* ASPF (1) */
1115 int low_frequency_effects_flag = dts_bits_get( bits, 2, bits_pos ); /* LFF (2) */
1116 if( low_frequency_effects_flag == 0x3 )
1117 return LSMASH_ERR_INVALID_DATA; /* invalid */
1118 if( low_frequency_effects_flag )
1119 core->channel_layout |= DTS_CHANNEL_LAYOUT_LFE1;
1120 dts_bits_get( bits, 8 + crc_present_flag * 16, bits_pos ); /* HFLAG (1)
1121 * HCRC (16)
1122 * FILTS (1)
1123 * VERNUM (4)
1124 * CHIST (2) */
1125 int PCMR = dts_bits_get( bits, 3, bits_pos ); /* PCMR (3) */
1126 static const uint8_t source_resolution_table[8] = { 16, 16, 20, 20, 0, 24, 24, 0 };
1127 core->pcm_resolution = source_resolution_table[PCMR];
1128 if( core->pcm_resolution == 0 )
1129 return LSMASH_ERR_INVALID_DATA; /* invalid */
1130 dts_bits_get( bits, 6, bits_pos ); /* SUMF (1)
1131 * SUMS (1)
1132 * DIALNORM/UNSPEC (4) */
1133 if( extended_coding_flag )
1135 uint32_t syncword = dts_bits_get( bits, 24, bits_pos );
1136 uint64_t frame_size_bits = core->frame_size * 8;
1137 while( (*bits_pos + 24) < frame_size_bits )
1139 int err;
1140 syncword = ((syncword << 8) & 0xffffff00) | dts_bits_get( bits, 8, bits_pos );
1141 switch( syncword )
1143 case DTS_SYNCWORD_XXCH :
1144 if( (err = dts_parse_core_xxch( info, bits_pos, core )) < 0 )
1145 return err;
1146 syncword = dts_bits_get( bits, 24, bits_pos );
1147 break;
1148 case DTS_SYNCWORD_X96K :
1149 if( (err = dts_parse_core_x96( info, bits_pos, core )) < 0 )
1150 return err;
1151 syncword = dts_bits_get( bits, 24, bits_pos );
1152 break;
1153 case DTS_SYNCWORD_XCH :
1154 if( (err = dts_parse_core_xch( info, bits_pos, core )) < 0 )
1155 return err;
1156 break;
1157 default :
1158 continue;
1162 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
1165 static int dts_parse_exss_core( dts_info_t *info, uint64_t *bits_pos, dts_audio_asset_t *asset )
1167 lsmash_bits_t *bits = info->bits;
1168 if( DTS_SYNCWORD_SUBSTREAM_CORE != dts_bits_get( bits, 32, bits_pos ) )
1169 return LSMASH_ERR_INVALID_DATA;
1170 int err = dts_parse_core( info, bits_pos, &asset->core );
1171 if( err < 0 )
1172 return err;
1173 info->flags |= DTS_EXT_SUBSTREAM_CORE_FLAG;
1174 return bits->bs->error ? LSMASH_ERR_NAMELESS : 0;
1177 int dts_parse_core_substream( dts_info_t *info )
1179 lsmash_bits_t *bits = info->bits;
1180 uint64_t bits_pos = 0;
1181 int err;
1182 if( DTS_SYNCWORD_CORE != dts_bits_get( bits, 32, &bits_pos ) )
1184 err = LSMASH_ERR_INVALID_DATA;
1185 goto parse_fail;
1187 /* By default the core substream data, if present, has the nuBcCoreExtSSIndex = 0 and the nuBcCoreAssetIndex = 0. */
1188 dts_extension_info_t *exss = &info->exss[0];
1189 if( (err = dts_parse_core( info, &bits_pos, &exss->asset[0].core )) < 0 )
1190 goto parse_fail;
1191 exss->bBcCorePresent [0] = 1;
1192 exss->nuBcCoreExtSSIndex[0] = 0;
1193 exss->nuBcCoreAssetIndex[0] = 0;
1194 info->flags |= DTS_CORE_SUBSTREAM_CORE_FLAG;
1195 info->exss_count = 0;
1196 info->core = exss->asset[0].core;
1197 info->frame_size = exss->asset[0].core.frame_size;
1198 lsmash_bits_get_align( bits );
1199 return 0;
1200 parse_fail:
1201 lsmash_bits_get_align( bits );
1202 return err;
1205 int dts_parse_extension_substream( dts_info_t *info )
1207 lsmash_bits_t *bits = info->bits;
1208 uint64_t bits_pos = 0;
1209 dts_bits_get( bits, 40, &bits_pos ); /* SYNCEXTSSH (32)
1210 * UserDefinedBits (8) */
1211 int nExtSSIndex = dts_bits_get( bits, 2, &bits_pos ); /* nExtSSIndex (2) */
1212 info->exss_index = nExtSSIndex;
1213 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1214 memset( exss, 0, sizeof(dts_extension_info_t) );
1215 int bHeaderSizeType = dts_bits_get( bits, 1, &bits_pos ); /* bHeaderSizeType (1) */
1216 int nuBits4Header = 8 + bHeaderSizeType * 4;
1217 int nuBits4ExSSFsize = 16 + bHeaderSizeType * 4;
1218 exss->nuBits4ExSSFsize = nuBits4ExSSFsize;
1219 uint32_t nuExtSSHeaderSize = dts_bits_get( bits, nuBits4Header, &bits_pos ) + 1; /* nuExtSSHeaderSize (8 or 12) */
1220 info->frame_size = dts_bits_get( bits, nuBits4ExSSFsize, &bits_pos ) + 1; /* nuExtSSFsize (16 or 20) */
1221 if( info->frame_size < 10 )
1222 return LSMASH_ERR_INVALID_DATA;
1223 exss->bStaticFieldsPresent = dts_bits_get( bits, 1, &bits_pos ); /* bStaticFieldsPresent (1) */
1224 if( exss->bStaticFieldsPresent )
1226 dts_bits_get( bits, 2, &bits_pos ); /* nuRefClockCode (2) */
1227 exss->frame_duration = 512 * (dts_bits_get( bits, 3, &bits_pos ) + 1); /* nuExSSFrameDurationCode (3) */
1228 if( dts_bits_get( bits, 1, &bits_pos ) ) /* bTimeStampFlag (1) */
1229 dts_bits_get( bits, 36, &bits_pos ); /* nuTimeStamp (32)
1230 * nLSB (4) */
1231 exss->nuNumAudioPresnt = dts_bits_get( bits, 3, &bits_pos ) + 1; /* nuNumAudioPresnt (3) */
1232 exss->nuNumAssets = dts_bits_get( bits, 3, &bits_pos ) + 1; /* nuNumAssets (3) */
1233 /* The extension substreams with indexes lower than or equal to the index of the current extension substream can
1234 * be activated in the audio presentations indicated within the current extension substream. */
1235 for( uint8_t nAuPr = 0; nAuPr < exss->nuNumAudioPresnt; nAuPr++ )
1236 exss->nuActiveExSSMask[nAuPr]
1237 = dts_bits_get( bits, nExtSSIndex + 1, &bits_pos ); /* nuActiveExSSMask[nAuPr] (nExtSSIndex + 1) */
1238 for( uint8_t nAuPr = 0; nAuPr < exss->nuNumAudioPresnt; nAuPr++ )
1239 for( uint8_t nSS = 0; nSS <= nExtSSIndex; nSS++ )
1240 exss->nuActiveAssetMask[nAuPr][nSS]
1241 = ((exss->nuActiveExSSMask[nAuPr] >> nSS) & 0x1)
1242 ? dts_bits_get( bits, 8, &bits_pos ) /* nuActiveAssetMask[nAuPr][nSS] (8) */
1243 : 0;
1244 exss->bMixMetadataEnbl = dts_bits_get( bits, 1, &bits_pos ); /* bMixMetadataEnbl (1) */
1245 if( exss->bMixMetadataEnbl )
1247 dts_bits_get( bits, 2, &bits_pos ); /* nuMixMetadataAdjLevel (2) */
1248 int nuBits4MixOutMask = (dts_bits_get( bits, 2, &bits_pos ) + 1) << 2; /* nuBits4MixOutMask (2) */
1249 exss->nuNumMixOutConfigs = dts_bits_get( bits, 2, &bits_pos ) + 1; /* nuNumMixOutConfigs (2) */
1250 for( int ns = 0; ns < exss->nuNumMixOutConfigs; ns++ )
1252 int nuMixOutChMask = dts_bits_get( bits, nuBits4MixOutMask, &bits_pos ); /* nuMixOutChMask[ns] (nuBits4MixOutMask) */
1253 exss->nNumMixOutCh[ns] = dts_get_channel_count_from_channel_layout( nuMixOutChMask );
1257 else
1259 exss->nuNumAudioPresnt = 1;
1260 exss->nuNumAssets = 1;
1261 exss->bMixMetadataEnbl = 0;
1262 exss->nuNumMixOutConfigs = 0;
1264 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1265 exss->asset[nAst].size = dts_bits_get( bits, nuBits4ExSSFsize, &bits_pos ) + 1; /* nuAssetFsize[nAst] - 1 (nuBits4ExSSFsize) */
1266 int err;
1267 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1268 if( (err = dts_parse_asset_descriptor( info, &bits_pos )) < 0 )
1269 goto parse_fail;
1270 for( uint8_t nAuPr = 0; nAuPr < exss->nuNumAudioPresnt; nAuPr++ )
1271 exss->bBcCorePresent[nAuPr] = dts_bits_get( bits, 1, &bits_pos );
1272 for( uint8_t nAuPr = 0; nAuPr < exss->nuNumAudioPresnt; nAuPr++ )
1273 if( exss->bBcCorePresent[nAuPr] )
1275 exss->nuBcCoreExtSSIndex[nAuPr] = dts_bits_get( bits, 2, &bits_pos );
1276 exss->nuBcCoreAssetIndex[nAuPr] = dts_bits_get( bits, 3, &bits_pos );
1278 dts_bits_get( bits, nuExtSSHeaderSize * 8 - bits_pos, &bits_pos );
1279 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1281 /* Asset Data */
1282 dts_audio_asset_t *asset = &exss->asset[nAst];
1283 uint32_t asset_pos = bits_pos;
1284 switch( asset->nuCodingMode )
1286 case 0 : /* DTS-HD Coding Mode that may contain multiple coding components */
1288 if( asset->nuCoreExtensionMask & DTS_EXT_SUBSTREAM_CORE_FLAG )
1290 /* Core component */
1291 uint64_t core_pos = bits_pos;
1292 if( (err = dts_parse_exss_core( info, &bits_pos, asset )) < 0 )
1293 goto parse_fail;
1294 dts_bits_get( bits, asset->core.frame_size * 8 - (bits_pos - core_pos), &bits_pos );
1296 if( asset->nuCoreExtensionMask & DTS_EXT_SUBSTREAM_XBR_FLAG )
1298 /* XBR extension */
1299 uint64_t xbr_pos = bits_pos;
1300 if( (err = dts_parse_exss_xbr( info, &bits_pos )) < 0 )
1301 goto parse_fail;
1302 dts_bits_get( bits, asset->xbr_size * 8 - (bits_pos - xbr_pos), &bits_pos );
1304 if( asset->nuCoreExtensionMask & DTS_EXT_SUBSTREAM_XXCH_FLAG )
1306 /* XXCH extension */
1307 uint64_t xxch_pos = bits_pos;
1308 if( (err = dts_parse_exss_xxch( info, &bits_pos, &asset->core )) < 0 )
1309 goto parse_fail;
1310 dts_bits_get( bits, asset->core.xxch.size * 8 - (bits_pos - xxch_pos), &bits_pos );
1312 if( asset->nuCoreExtensionMask & DTS_EXT_SUBSTREAM_X96_FLAG )
1314 /* X96 extension */
1315 uint64_t x96_pos = bits_pos;
1316 if( (err = dts_parse_exss_x96( info, &bits_pos, &asset->core )) < 0 )
1317 goto parse_fail;
1318 dts_bits_get( bits, asset->x96_size * 8 - (bits_pos - x96_pos), &bits_pos );
1320 if( asset->nuCoreExtensionMask & DTS_EXT_SUBSTREAM_LBR_FLAG )
1322 /* LBR component */
1323 uint64_t lbr_pos = bits_pos;
1324 if( (err = dts_parse_exss_lbr( info, &bits_pos, asset )) < 0 )
1325 goto parse_fail;
1326 dts_bits_get( bits, asset->lbr.size * 8 - (bits_pos - lbr_pos), &bits_pos );
1328 if( asset->nuCoreExtensionMask & DTS_EXT_SUBSTREAM_XLL_FLAG )
1330 /* Lossless extension */
1331 uint64_t xll_pos = bits_pos;
1332 if( (err = dts_parse_exss_xll( info, &bits_pos, asset )) < 0 )
1333 goto parse_fail;
1334 dts_bits_get( bits, asset->xll.size * 8 - (bits_pos - xll_pos), &bits_pos );
1336 break;
1338 case 1 : /* DTS-HD Loss-less coding mode without CBR component */
1339 if( (err = dts_parse_exss_xll( info, &bits_pos, asset )) < 0 )
1340 goto parse_fail;
1341 break;
1342 case 2 : /* DTS-HD Low bit-rate mode */
1343 if( (err = dts_parse_exss_lbr( info, &bits_pos, asset )) < 0 )
1344 goto parse_fail;
1345 break;
1346 case 3 : /* Auxiliary coding mode */
1347 dts_bits_get( bits, asset->aux_size * 8, &bits_pos );
1348 break;
1350 dts_bits_get( bits, asset->size * 8 - (bits_pos - asset_pos), &bits_pos );
1352 dts_bits_get( bits, info->frame_size * 8 - bits_pos, &bits_pos );
1353 lsmash_bits_get_align( bits );
1354 if( info->exss_count < DTS_MAX_NUM_EXSS )
1355 info->exss_count += 1;
1356 return 0;
1357 parse_fail:
1358 lsmash_bits_get_align( bits );
1359 return err;
1362 dts_substream_type dts_get_substream_type( dts_info_t *info )
1364 if( lsmash_bs_get_remaining_buffer_size( info->bits->bs ) < 4 )
1365 return DTS_SUBSTREAM_TYPE_NONE;
1366 uint8_t *buffer = lsmash_bs_get_buffer_data( info->bits->bs );
1367 uint32_t syncword = LSMASH_4CC( buffer[0], buffer[1], buffer[2], buffer[3] );
1368 switch( syncword )
1370 case DTS_SYNCWORD_CORE :
1371 return DTS_SUBSTREAM_TYPE_CORE;
1372 case DTS_SYNCWORD_SUBSTREAM :
1373 return DTS_SUBSTREAM_TYPE_EXTENSION;
1374 default :
1375 return DTS_SUBSTREAM_TYPE_NONE;
1379 int dts_get_exss_index( dts_info_t *info, uint8_t *exss_index )
1381 if( lsmash_bs_get_remaining_buffer_size( info->bits->bs ) < 6 )
1382 return LSMASH_ERR_INVALID_DATA;
1383 *exss_index = lsmash_bs_show_byte( info->bits->bs, 5 ) >> 6;
1384 return 0;
1387 int dts_get_max_channel_count( dts_info_t *info )
1389 int max_channel_count = 0;
1390 for( int nExtSSIndex = 0; nExtSSIndex < DTS_MAX_NUM_EXSS; nExtSSIndex++ )
1392 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1393 for( uint8_t nAuPr = 0; nAuPr < exss->nuNumAudioPresnt; nAuPr++ )
1395 /* Get the channel layout of an audio presentation from a core component. */
1396 uint16_t channel_layout = 0;
1397 int channel_count = 0;
1398 if( exss->bBcCorePresent [nAuPr]
1399 && exss->nuBcCoreAssetIndex[nAuPr] < exss->nuNumAssets )
1401 dts_core_info_t *core = &info->exss[ exss->nuBcCoreExtSSIndex[nAuPr] ].asset[ exss->nuBcCoreAssetIndex[nAuPr] ].core;
1402 if( core->xxch.channel_layout | core->xxch.lower_planes )
1404 channel_layout = core->xxch.channel_layout;
1405 channel_count = lsmash_count_bits( core->xxch.lower_planes ); /* FIXME: Should we count these channels? */
1407 else
1408 channel_layout = core->channel_layout;
1410 channel_count += dts_get_channel_count_from_channel_layout( channel_layout );
1411 max_channel_count = LSMASH_MAX( max_channel_count, channel_count );
1412 /* Get the channel layouts of an audio presentation from extension substreams. */
1413 uint16_t ext_channel_layout = 0;
1414 uint16_t lbr_channel_layout = 0;
1415 uint16_t xll_channel_layout = 0;
1416 uint8_t xll_lower_channels = 0;
1417 for( int nSS = 0; nSS <= nExtSSIndex; nSS++ )
1418 if( (exss->nuActiveExSSMask[nAuPr] >> nSS) & 0x1 )
1419 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1420 if( (exss->nuActiveAssetMask[nAuPr][nSS] >> nAst) & 0x1 )
1422 dts_audio_asset_t *asset = &exss->asset[nAst];
1423 ext_channel_layout |= asset->channel_layout;
1424 lbr_channel_layout |= asset->lbr.channel_layout;
1425 xll_channel_layout |= asset->xll.channel_layout;
1426 xll_lower_channels |= asset->xll.lower_planes;
1428 /* Audio asset descriptors */
1429 channel_count = dts_get_channel_count_from_channel_layout( ext_channel_layout );
1430 max_channel_count = LSMASH_MAX( max_channel_count, channel_count );
1431 /* LBR components */
1432 channel_count = dts_get_channel_count_from_channel_layout( lbr_channel_layout );
1433 max_channel_count = LSMASH_MAX( max_channel_count, channel_count );
1434 /* Lossless extensions */
1435 channel_count = dts_get_channel_count_from_channel_layout( xll_channel_layout )
1436 + lsmash_count_bits( xll_lower_channels );
1437 max_channel_count = LSMASH_MAX( max_channel_count, channel_count );
1440 return max_channel_count;
1443 void dts_update_specific_param( dts_info_t *info )
1445 lsmash_dts_specific_parameters_t *param = &info->ddts_param;
1446 int exss_index_start = 0;
1447 for( int nExtSSIndex = 0; nExtSSIndex < DTS_MAX_NUM_EXSS; nExtSSIndex++ )
1449 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1450 if( exss->nuNumAudioPresnt && exss->nuNumAssets )
1452 exss_index_start = nExtSSIndex;
1453 break;
1456 /* DTSSamplingFrequency and FrameDuration */
1457 for( int nExtSSIndex = exss_index_start; nExtSSIndex < DTS_MAX_NUM_EXSS; nExtSSIndex++ )
1459 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1460 if( exss->nuNumAudioPresnt == 0 || exss->nuNumAssets == 0 )
1461 continue;
1462 if( param->DTSSamplingFrequency <= exss->sampling_frequency )
1464 param->DTSSamplingFrequency = exss->sampling_frequency;
1465 info->frame_duration = exss->frame_duration;
1467 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1469 dts_audio_asset_t *asset = &exss->asset[nAst];
1470 if( param->DTSSamplingFrequency <= asset->core.sampling_frequency )
1472 param->DTSSamplingFrequency = asset->core.sampling_frequency;
1473 info->frame_duration = asset->core.frame_duration;
1475 if( param->DTSSamplingFrequency <= asset->lbr.sampling_frequency )
1477 param->DTSSamplingFrequency = asset->lbr.sampling_frequency;
1478 info->frame_duration = asset->lbr.frame_duration;
1480 if( param->DTSSamplingFrequency <= asset->xll.sampling_frequency )
1482 param->DTSSamplingFrequency = asset->xll.sampling_frequency;
1483 info->frame_duration = asset->xll.frame_duration;
1487 param->FrameDuration = 0;
1488 for( uint32_t frame_duration = info->frame_duration >> 10; frame_duration; frame_duration >>= 1 )
1489 ++ param->FrameDuration;
1490 /* pcmSampleDepth */
1491 param->pcmSampleDepth = 0;
1492 for( int nExtSSIndex = exss_index_start; nExtSSIndex < DTS_MAX_NUM_EXSS; nExtSSIndex++ )
1494 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1495 if( exss->nuNumAudioPresnt == 0 || exss->nuNumAssets == 0 )
1496 continue;
1497 param->pcmSampleDepth = LSMASH_MAX( param->pcmSampleDepth, exss->bit_resolution );
1498 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1500 dts_audio_asset_t *asset = &exss->asset[nAst];
1501 param->pcmSampleDepth = LSMASH_MAX( param->pcmSampleDepth, asset->core.pcm_resolution );
1502 param->pcmSampleDepth = LSMASH_MAX( param->pcmSampleDepth, asset->lbr.sample_size );
1503 param->pcmSampleDepth = LSMASH_MAX( param->pcmSampleDepth, asset->xll.pcm_resolution );
1506 param->pcmSampleDepth = param->pcmSampleDepth > 16 ? 24 : 16;
1507 /* StreamConstruction */
1508 param->StreamConstruction = lsmash_dts_get_stream_construction( info->flags );
1509 /* CoreLFEPresent */
1510 param->CoreLFEPresent = !!(info->core.channel_layout & DTS_CHANNEL_LAYOUT_LFE1);
1511 /* CoreLayout */
1512 if( param->StreamConstruction == 0 /* Unknown */
1513 || param->StreamConstruction >= 17 /* No core substream */ )
1514 /* Use ChannelLayout. */
1515 param->CoreLayout = 31;
1516 else
1518 if( info->core.channel_arrangement != 1
1519 && info->core.channel_arrangement != 3
1520 && info->core.channel_arrangement <= 9 )
1521 param->CoreLayout = info->core.channel_arrangement;
1522 else
1523 /* Use ChannelLayout. */
1524 param->CoreLayout = 31;
1526 /* CoreSize
1527 * The specification says this field is the size of a core substream AU in bytes.
1528 * If we don't assume CoreSize is the copy of FSIZE, when FSIZE equals 0x3FFF, this field overflows and becomes 0. */
1529 param->CoreSize = info->core.frame_size ? LSMASH_MIN( info->core.frame_size - 1, 0x3FFF ) : 0;
1530 /* StereoDownmix */
1531 param->StereoDownmix = 0;
1532 for( int nExtSSIndex = exss_index_start; nExtSSIndex < DTS_MAX_NUM_EXSS; nExtSSIndex++ )
1534 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1535 param->StereoDownmix |= exss->stereo_downmix;
1536 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1538 param->StereoDownmix |= exss->asset[nAst].lbr.stereo_downmix;
1539 param->StereoDownmix |= exss->asset[nAst].xll.stereo_downmix;
1542 /* RepresentationType
1543 * Available only when core substream is absent and ChannelLayout is set to 0. */
1544 for( int nExtSSIndex = exss_index_start; nExtSSIndex < DTS_MAX_NUM_EXSS; nExtSSIndex++ )
1546 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1547 if( exss->nuNumAudioPresnt == 0 || exss->nuNumAssets == 0 )
1548 continue;
1549 for( uint8_t nAuPr = 0; nAuPr < exss->nuNumAudioPresnt; nAuPr++ )
1551 int asset_count = 0;
1552 for( int nSS = 0; nSS <= nExtSSIndex; nSS++ )
1553 if( (exss->nuActiveExSSMask[nAuPr] >> nSS) & 0x1 )
1554 asset_count += lsmash_count_bits( exss->nuActiveAssetMask[nAuPr][nSS] );
1555 if( asset_count > 1 )
1557 /* An audio presentation has mulple audio assets.
1558 * Audio asset designated for mixing with another audio asset. */
1559 param->RepresentationType = 0;
1560 nExtSSIndex = DTS_MAX_NUM_EXSS;
1561 break;
1563 for( int nSS = 0; nSS <= nExtSSIndex; nSS++ )
1564 if( (exss->nuActiveExSSMask[nAuPr] >> nSS) & 0x1 )
1565 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1566 if( (exss->nuActiveAssetMask[nAuPr][nSS] >> nAst) & 0x1 )
1568 dts_audio_asset_t *asset = &exss->asset[nAst];
1569 if( asset->nuRepresentationType == info->exss[exss_index_start].asset[0].nuRepresentationType )
1570 param->RepresentationType = asset->nuRepresentationType;
1571 else
1573 /* Detected different representation types. Use ChannelLayout. */
1574 param->RepresentationType = 0;
1575 nAuPr = exss->nuNumAudioPresnt;
1576 nExtSSIndex = DTS_MAX_NUM_EXSS;
1577 break;
1582 /* ChannelLayout
1583 * complete information on channels coded in the audio stream including core and extensions */
1584 param->ChannelLayout = 0;
1585 if( param->RepresentationType == 0 )
1586 for( int nExtSSIndex = exss_index_start; nExtSSIndex < DTS_MAX_NUM_EXSS; nExtSSIndex++ )
1588 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1589 if( exss->nuNumAudioPresnt == 0 || exss->nuNumAssets == 0 )
1590 continue;
1591 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1593 dts_audio_asset_t *asset = &exss->asset[nAst];
1594 param->ChannelLayout |= asset->channel_layout;
1595 param->ChannelLayout |= asset->core.channel_layout;
1596 param->ChannelLayout |= asset->core.xxch.channel_layout;
1597 param->ChannelLayout |= asset->lbr.channel_layout;
1598 param->ChannelLayout |= asset->xll.channel_layout;
1601 /* MultiAssetFlag
1602 * When multiple assets exist, the remaining parameters in the DTSSpecificBox only reflect the coding parameters of the first asset. */
1603 param->MultiAssetFlag = ((info->exss[0].nuNumAssets
1604 + info->exss[1].nuNumAssets
1605 + info->exss[2].nuNumAssets
1606 + info->exss[3].nuNumAssets) > 1);
1607 /* LBRDurationMod */
1608 param->LBRDurationMod = info->exss[exss_index_start].asset[0].lbr.duration_modifier;
1609 info->ddts_param_initialized = 1;
1610 /* DTSExpansionBox[] */
1611 for( int nExtSSIndex = 0; nExtSSIndex < DTS_MAX_NUM_EXSS; nExtSSIndex++ )
1613 dts_extension_info_t *exss = &info->exss[nExtSSIndex];
1614 for( uint8_t nAst = 0; nAst < exss->nuNumAssets; nAst++ )
1616 dts_audio_asset_t *asset = &exss->asset[nAst];
1617 if( asset->xll.dtsx_extension_present )
1619 /* Add DTSXParameters Box so that its presence indicates DTS:X extensions are present in the bitstream.
1620 * Here, treat as unknown whether dialog level control for dialog objects in the bitstream is present or not. */
1621 static const uint8_t dxpb[] =
1623 0x00, 0x00, 0x00, 0x0c, /* size = 12 */
1624 0x64, 0x78, 0x70, 0x62, /* type = 'dxpb' */
1625 0x00, 0x00, 0x00, 0x00 /* version = 0, flags = 0x000000 (no dialog_control_info_present flag) */
1627 lsmash_remove_dts_reserved_box( param );
1628 lsmash_append_dts_reserved_box( param, dxpb, sizeof(dxpb) );
1629 /* No error checks and just return. */
1630 return;
1636 int dts_construct_specific_parameters( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src )
1638 assert( dst && dst->data.structured && src && src->data.unstructured );
1639 if( src->size < DTS_SPECIFIC_BOX_MIN_LENGTH )
1640 return LSMASH_ERR_INVALID_DATA;
1641 lsmash_dts_specific_parameters_t *param = (lsmash_dts_specific_parameters_t *)dst->data.structured;
1642 uint8_t *data = src->data.unstructured;
1643 uint64_t size = LSMASH_GET_BE32( data );
1644 int dts_specific_box_min_length = DTS_SPECIFIC_BOX_MIN_LENGTH;
1645 data += ISOM_BASEBOX_COMMON_SIZE;
1646 if( size == 1 )
1648 size = LSMASH_GET_BE64( data );
1649 dts_specific_box_min_length += 8;
1650 data += 8;
1652 if( size != src->size )
1653 return LSMASH_ERR_INVALID_DATA;
1654 param->DTSSamplingFrequency = LSMASH_GET_BE32( &data[0] );
1655 param->maxBitrate = LSMASH_GET_BE32( &data[4] );
1656 param->avgBitrate = LSMASH_GET_BE32( &data[8] );
1657 param->pcmSampleDepth = LSMASH_GET_BYTE( &data[12] );
1658 param->FrameDuration = (data[13] >> 6) & 0x03;
1659 param->StreamConstruction = (data[13] >> 1) & 0x1F;
1660 param->CoreLFEPresent = data[13] & 0x01;
1661 param->CoreLayout = (data[14] >> 2) & 0x3F;
1662 param->CoreSize = ((data[14] & 0x03) << 12) | (data[15] << 4) | ((data[16] >> 4) & 0x0F);
1663 param->StereoDownmix = (data[16] >> 3) & 0x01;
1664 param->RepresentationType = data[16] & 0x07;
1665 param->ChannelLayout = (data[17] << 8) | data[18];
1666 param->MultiAssetFlag = (data[19] >> 7) & 0x01;
1667 param->LBRDurationMod = (data[19] >> 6) & 0x01;
1668 int reserved_box_present = ((data[19] >> 5) & 0x01) && (size > DTS_SPECIFIC_BOX_MIN_LENGTH);
1669 if( reserved_box_present )
1670 lsmash_append_dts_reserved_box( param, data + 20, size - DTS_SPECIFIC_BOX_MIN_LENGTH );
1671 return 0;
1674 int dts_copy_codec_specific( lsmash_codec_specific_t *dst, lsmash_codec_specific_t *src )
1676 assert( src && src->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED && src->data.structured );
1677 assert( dst && dst->format == LSMASH_CODEC_SPECIFIC_FORMAT_STRUCTURED && dst->data.structured );
1678 lsmash_dts_specific_parameters_t *src_data = (lsmash_dts_specific_parameters_t *)src->data.structured;
1679 lsmash_dts_specific_parameters_t *dst_data = (lsmash_dts_specific_parameters_t *)dst->data.structured;
1680 lsmash_remove_dts_reserved_box( dst_data );
1681 *dst_data = *src_data;
1682 if( !src_data->box || !src_data->box->data || src_data->box->size == 0 )
1684 lsmash_remove_dts_reserved_box( dst_data );
1685 return 0;
1687 return lsmash_append_dts_reserved_box( dst_data, src_data->box->data, src_data->box->size );
1690 int dts_print_codec_specific( FILE *fp, lsmash_file_t *file, isom_box_t *box, int level )
1692 assert( fp && file && box && (box->manager & LSMASH_BINARY_CODED_BOX) );
1693 int indent = level;
1694 lsmash_ifprintf( fp, indent++, "[%s: DTS Specific Box]\n", isom_4cc2str( box->type.fourcc ) );
1695 lsmash_ifprintf( fp, indent, "position = %"PRIu64"\n", box->pos );
1696 lsmash_ifprintf( fp, indent, "size = %"PRIu64"\n", box->size );
1697 if( box->size < DTS_SPECIFIC_BOX_MIN_LENGTH )
1698 return LSMASH_ERR_INVALID_DATA;
1699 uint8_t *data = box->binary;
1700 isom_skip_box_common( &data );
1701 uint32_t DTSSamplingFrequency = LSMASH_GET_BE32( &data[0] );
1702 uint32_t maxBitrate = LSMASH_GET_BE32( &data[4] );
1703 uint32_t avgBitrate = LSMASH_GET_BE32( &data[8] );
1704 uint8_t pcmSampleDepth = LSMASH_GET_BYTE( &data[12] );
1705 uint8_t FrameDuration = (data[13] >> 6) & 0x03;
1706 uint8_t StreamConstruction = (data[13] >> 1) & 0x1F;
1707 uint8_t CoreLFEPresent = data[13] & 0x01;
1708 uint8_t CoreLayout = (data[14] >> 2) & 0x3F;
1709 uint16_t CoreSize = ((data[14] & 0x03) << 12) | (data[15] << 4) | ((data[16] >> 4) & 0x0F);
1710 uint8_t StereoDownmix = (data[16] >> 3) & 0x01;
1711 uint8_t RepresentationType = data[16] & 0x07;
1712 uint16_t ChannelLayout = (data[17] << 8) | data[18];
1713 uint8_t MultiAssetFlag = (data[19] >> 7) & 0x01;
1714 uint8_t LBRDurationMod = (data[19] >> 6) & 0x01;
1715 uint8_t ReservedBoxPresent = (data[19] >> 5) & 0x01;
1716 uint8_t Reserved = data[19] & 0x1F;
1717 uint32_t frame_duration = 512 << FrameDuration;
1718 int construction_flags = StreamConstruction <= DTS_MAX_STREAM_CONSTRUCTION ? construction_info[StreamConstruction] : 0;
1719 static const char *core_layout_description[64] =
1721 "Mono (1/0)",
1722 "Undefined",
1723 "Stereo (2/0)",
1724 "Undefined",
1725 "LT,RT (2/0)",
1726 "L, C, R (3/0)",
1727 "L, R, S (2/1)",
1728 "L, C, R, S (3/1)",
1729 "L, R, LS, RS (2/2)",
1730 "L, C, R, LS, RS (3/2)",
1731 [31] = "use ChannelLayout"
1733 static const char *representation_type_description[8] =
1735 "Audio asset designated for mixing with another audio asset",
1736 "Reserved",
1737 "Lt/Rt Encoded for matrix surround decoding",
1738 "Audio processed for headphone playback",
1739 "Reserved",
1740 "Reserved",
1741 "Reserved",
1742 "Reserved"
1744 static const char *channel_layout_description[16] =
1746 "Center in front of listener",
1747 "Left/Right in front",
1748 "Left/Right surround on side in rear",
1749 "Low frequency effects subwoofer",
1750 "Center surround in rear",
1751 "Left/Right height in front",
1752 "Left/Right surround in rear",
1753 "Center Height in front",
1754 "Over the listener's head",
1755 "Between left/right and center in front",
1756 "Left/Right on side in front",
1757 "Left/Right surround on side",
1758 "Second low frequency effects subwoofer",
1759 "Left/Right height on side",
1760 "Center height in rear",
1761 "Left/Right height in rear"
1763 lsmash_ifprintf( fp, indent, "DTSSamplingFrequency = %"PRIu32" Hz\n", DTSSamplingFrequency );
1764 lsmash_ifprintf( fp, indent, "maxBitrate = %"PRIu32" bit/s\n", maxBitrate );
1765 lsmash_ifprintf( fp, indent, "avgBitrate = %"PRIu32" bit/s\n", avgBitrate );
1766 lsmash_ifprintf( fp, indent, "pcmSampleDepth = %"PRIu8" bits\n", pcmSampleDepth );
1767 lsmash_ifprintf( fp, indent, "FrameDuration = %"PRIu8" (%"PRIu32" samples)\n", FrameDuration, frame_duration );
1768 lsmash_ifprintf( fp, indent, "StreamConstruction = 0x%02"PRIx8"\n", StreamConstruction );
1769 if( construction_flags & (DTS_CORE_SUBSTREAM_CORE_FLAG | DTS_CORE_SUBSTREAM_XCH_FLAG | DTS_CORE_SUBSTREAM_X96_FLAG | DTS_CORE_SUBSTREAM_XXCH_FLAG) )
1771 lsmash_ifprintf( fp, indent + 1, "Core substream\n" );
1772 if( construction_flags & DTS_CORE_SUBSTREAM_CORE_FLAG )
1773 lsmash_ifprintf( fp, indent + 2, "Core\n" );
1774 if( construction_flags & DTS_CORE_SUBSTREAM_XCH_FLAG )
1775 lsmash_ifprintf( fp, indent + 2, "XCH\n" );
1776 if( construction_flags & DTS_CORE_SUBSTREAM_X96_FLAG )
1777 lsmash_ifprintf( fp, indent + 2, "X96\n" );
1778 if( construction_flags & DTS_CORE_SUBSTREAM_XXCH_FLAG )
1779 lsmash_ifprintf( fp, indent + 2, "XXCH\n" );
1781 if( construction_flags & (DTS_EXT_SUBSTREAM_CORE_FLAG | DTS_EXT_SUBSTREAM_XXCH_FLAG | DTS_EXT_SUBSTREAM_X96_FLAG
1782 | DTS_EXT_SUBSTREAM_XBR_FLAG | DTS_EXT_SUBSTREAM_XLL_FLAG | DTS_EXT_SUBSTREAM_LBR_FLAG) )
1784 lsmash_ifprintf( fp, indent + 1, "Extension substream\n" );
1785 if( construction_flags & DTS_EXT_SUBSTREAM_CORE_FLAG )
1786 lsmash_ifprintf( fp, indent + 2, "Core\n" );
1787 if( construction_flags & DTS_EXT_SUBSTREAM_XXCH_FLAG )
1788 lsmash_ifprintf( fp, indent + 2, "XXCH\n" );
1789 if( construction_flags & DTS_EXT_SUBSTREAM_X96_FLAG )
1790 lsmash_ifprintf( fp, indent + 2, "X96\n" );
1791 if( construction_flags & DTS_EXT_SUBSTREAM_XBR_FLAG )
1792 lsmash_ifprintf( fp, indent + 2, "XBR\n" );
1793 if( construction_flags & DTS_EXT_SUBSTREAM_XLL_FLAG )
1794 lsmash_ifprintf( fp, indent + 2, "XLL\n" );
1795 if( construction_flags & DTS_EXT_SUBSTREAM_LBR_FLAG )
1796 lsmash_ifprintf( fp, indent + 2, "LBR\n" );
1798 lsmash_ifprintf( fp, indent, "CoreLFEPresent = %s\n", CoreLFEPresent ? "1 (LFE exists)" : "0 (no LFE)" );
1799 if( core_layout_description[CoreLayout] )
1800 lsmash_ifprintf( fp, indent, "CoreLayout = %"PRIu8" (%s)\n", CoreLayout, core_layout_description[CoreLayout] );
1801 else
1802 lsmash_ifprintf( fp, indent, "CoreLayout = %"PRIu8" (Undefined)\n", CoreLayout );
1803 if( CoreSize )
1804 lsmash_ifprintf( fp, indent, "CoreSize = %"PRIu16"\n", CoreSize );
1805 else
1806 lsmash_ifprintf( fp, indent, "CoreSize = 0 (no core substream exists)\n" );
1807 lsmash_ifprintf( fp, indent, "StereoDownmix = %s\n", StereoDownmix ? "1 (embedded downmix present)" : "0 (no embedded downmix)" );
1808 lsmash_ifprintf( fp, indent, "RepresentationType = %"PRIu8" (%s)\n", RepresentationType, representation_type_description[RepresentationType] );
1809 lsmash_ifprintf( fp, indent, "ChannelLayout = 0x%04"PRIx16"\n", ChannelLayout );
1810 if( ChannelLayout )
1811 for( int i = 0; i < 16; i++ )
1812 if( (ChannelLayout >> i) & 0x01 )
1813 lsmash_ifprintf( fp, indent + 1, "%s\n", channel_layout_description[i] );
1814 lsmash_ifprintf( fp, indent, "MultiAssetFlag = %s\n", MultiAssetFlag ? "1 (multiple asset)" : "0 (single asset)" );
1815 if( LBRDurationMod )
1816 lsmash_ifprintf( fp, indent, "LBRDurationMod = 1 (%"PRIu32" -> %"PRIu32" samples)\n", frame_duration, (frame_duration * 3) / 2 );
1817 else
1818 lsmash_ifprintf( fp, indent, "LBRDurationMod = 0 (no LBR duration modifier)\n" );
1819 lsmash_ifprintf( fp, indent, "ReservedBoxPresent = %s\n", ReservedBoxPresent ? "1 (ReservedBox present)" : "0 (no ReservedBox)" );
1820 lsmash_ifprintf( fp, indent, "Reserved = 0x%02"PRIx8"\n", Reserved );
1821 return 0;
1824 int dts_update_bitrate( isom_stbl_t *stbl, isom_mdhd_t *mdhd, uint32_t sample_description_index )
1826 isom_audio_entry_t *dts_audio = (isom_audio_entry_t *)lsmash_get_entry_data( &stbl->stsd->list, sample_description_index );
1827 if( !dts_audio )
1828 return LSMASH_ERR_INVALID_DATA;
1829 isom_box_t *ext = isom_get_extension_box( &dts_audio->extensions, ISOM_BOX_TYPE_DDTS );
1830 if( !(ext && (ext->manager & LSMASH_BINARY_CODED_BOX) && ext->binary && ext->size >= 28) )
1831 return LSMASH_ERR_INVALID_DATA;
1832 uint32_t bufferSizeDB;
1833 uint32_t maxBitrate;
1834 uint32_t avgBitrate;
1835 int err = isom_calculate_bitrate_description( stbl, mdhd, &bufferSizeDB, &maxBitrate, &avgBitrate, sample_description_index );
1836 if( err < 0 )
1837 return err;
1838 if( !isom_is_variable_size( stbl ) )
1839 maxBitrate = avgBitrate;
1840 uint8_t *exdata = ext->binary + 12;
1841 LSMASH_SET_BE32( &exdata[0], maxBitrate );
1842 LSMASH_SET_BE32( &exdata[4], avgBitrate );
1843 return 0;