1 /*****************************************************************************
2 * ttml.c : TTML subtitles demux
3 *****************************************************************************
4 * Copyright (C) 2015-2017 VLC authors and VideoLAN
6 * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr>
7 * Sushma Reddy <sushma.reddy@research.iiit.ac.in>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #include <vlc_common.h>
29 #include <vlc_demux.h>
31 #include <vlc_strings.h>
32 #include <vlc_memory.h>
33 #include <vlc_memstream.h>
34 #include <vlc_es_out.h>
35 #include <vlc_charset.h> /* FromCharset */
41 #include "../codec/ttml/ttml.h"
43 //#define TTML_DEMUX_DEBUG
48 xml_reader_t
* p_reader
;
50 int64_t i_next_demux_time
;
54 tt_node_t
*p_rootnode
;
56 tt_timings_t temporal_extent
;
59 * All timings are stored unique and ordered.
60 * Being begin or end times of sub sequence,
61 * we use them as 'point of change' for output filtering.
71 static char *tt_genTiming( tt_time_t t
)
73 if( !tt_time_Valid( &t
) )
75 unsigned f
= t
.base
% CLOCK_FREQ
;
77 unsigned h
= t
.base
/ 3600;
78 unsigned m
= t
.base
% 3600 / 60;
79 unsigned s
= t
.base
% 60;
85 const char *lz
= "000000";
86 const char *psz_lz
= &lz
[6];
87 /* add leading zeroes */
88 for( unsigned i
=10*f
; i
<CLOCK_FREQ
; i
*= 10 )
90 /* strip trailing zeroes */
91 for( ; f
> 0 && (f
% 10) == 0; f
/= 10 );
92 i_ret
= asprintf( &psz
, "%02u:%02u:%02u.%s%u",
97 i_ret
= asprintf( &psz
, "%02u:%02u:%02u:%s%u",
98 h
, m
, s
, t
.frames
< 10 ? "0" : "", t
.frames
);
102 i_ret
= asprintf( &psz
, "%02u:%02u:%02u",
106 return i_ret
< 0 ? NULL
: psz
;
109 static void tt_node_AttributesToText( struct vlc_memstream
*p_stream
, const tt_node_t
* p_node
)
111 bool b_timed_node
= false;
112 const vlc_dictionary_t
* p_attr_dict
= &p_node
->attr_dict
;
113 for( int i
= 0; i
< p_attr_dict
->i_size
; ++i
)
115 for ( vlc_dictionary_entry_t
* p_entry
= p_attr_dict
->p_entries
[i
];
116 p_entry
!= NULL
; p_entry
= p_entry
->p_next
)
118 const char *psz_value
= NULL
;
120 if( !strcmp(p_entry
->psz_key
, "begin") ||
121 !strcmp(p_entry
->psz_key
, "end") ||
122 !strcmp(p_entry
->psz_key
, "dur") )
125 /* will remove duration */
128 else if( !strcmp(p_entry
->psz_key
, "timeContainer") )
130 /* also remove sequential timings info (all abs now) */
135 psz_value
= (char const*)p_entry
->p_value
;
138 if( psz_value
== NULL
)
141 vlc_memstream_printf( p_stream
, " %s=\"%s\"",
142 p_entry
->psz_key
, psz_value
);
148 if( tt_time_Valid( &p_node
->timings
.begin
) )
150 char *psz
= tt_genTiming( p_node
->timings
.begin
);
151 vlc_memstream_printf( p_stream
, " begin=\"%s\"", psz
);
155 if( tt_time_Valid( &p_node
->timings
.end
) )
157 char *psz
= tt_genTiming( p_node
->timings
.end
);
158 vlc_memstream_printf( p_stream
, " end=\"%s\"", psz
);
164 static void tt_node_ToText( struct vlc_memstream
*p_stream
, const tt_basenode_t
*p_basenode
,
165 const tt_time_t
*playbacktime
)
167 if( p_basenode
->i_type
== TT_NODE_TYPE_ELEMENT
)
169 const tt_node_t
*p_node
= (const tt_node_t
*) p_basenode
;
171 if( tt_time_Valid( playbacktime
) &&
172 !tt_timings_Contains( &p_node
->timings
, playbacktime
) )
175 vlc_memstream_putc( p_stream
, '<' );
176 vlc_memstream_puts( p_stream
, p_node
->psz_node_name
);
178 tt_node_AttributesToText( p_stream
, p_node
);
180 if( tt_node_HasChild( p_node
) )
182 vlc_memstream_putc( p_stream
, '>' );
184 #ifdef TTML_DEMUX_DEBUG
185 vlc_memstream_printf( p_stream
, "<!-- starts %ld ends %ld -->",
186 tt_time_Convert( &p_node
->timings
.begin
),
187 tt_time_Convert( &p_node
->timings
.end
) );
190 for( const tt_basenode_t
*p_child
= p_node
->p_child
;
191 p_child
; p_child
= p_child
->p_next
)
193 tt_node_ToText( p_stream
, p_child
, playbacktime
);
196 vlc_memstream_printf( p_stream
, "</%s>", p_node
->psz_node_name
);
199 vlc_memstream_puts( p_stream
, "/>" );
203 const tt_textnode_t
*p_textnode
= (const tt_textnode_t
*) p_basenode
;
204 vlc_memstream_puts( p_stream
, p_textnode
->psz_text
);
208 static int Control( demux_t
* p_demux
, int i_query
, va_list args
)
210 demux_sys_t
*p_sys
= p_demux
->p_sys
;
218 *va_arg( args
, bool * ) = true;
221 pi64
= va_arg( args
, int64_t * );
222 *pi64
= p_sys
->i_next_demux_time
;
225 i64
= va_arg( args
, int64_t );
226 if( p_sys
->times
.i_count
)
228 tt_time_t t
= tt_time_Create( i64
- VLC_TS_0
);
229 size_t i_index
= tt_timings_FindLowerIndex( p_sys
->times
.p_array
,
230 p_sys
->times
.i_count
, t
, &b
);
231 p_sys
->times
.i_current
= i_index
;
232 p_sys
->b_first_time
= true;
236 case DEMUX_SET_NEXT_DEMUX_TIME
:
237 i64
= va_arg( args
, int64_t );
238 p_sys
->i_next_demux_time
= i64
;
239 p_sys
->b_slave
= true;
241 case DEMUX_GET_LENGTH
:
242 pi64
= va_arg( args
, int64_t * );
243 if( p_sys
->times
.i_count
)
245 tt_time_t t
= tt_time_Sub( p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1],
246 p_sys
->temporal_extent
.begin
);
247 *pi64
= tt_time_Convert( &t
);
251 case DEMUX_GET_POSITION
:
252 pf
= va_arg( args
, double * );
253 if( p_sys
->times
.i_current
>= p_sys
->times
.i_count
)
257 else if( p_sys
->times
.i_count
> 0 )
259 i64
= tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1] );
260 *pf
= (double) p_sys
->i_next_demux_time
/ (i64
+ 0.5);
267 case DEMUX_SET_POSITION
:
268 f
= va_arg( args
, double );
269 if( p_sys
->times
.i_count
)
271 i64
= f
* tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1] );
272 tt_time_t t
= tt_time_Create( i64
);
273 size_t i_index
= tt_timings_FindLowerIndex( p_sys
->times
.p_array
,
274 p_sys
->times
.i_count
, t
, &b
);
275 p_sys
->times
.i_current
= i_index
;
276 p_sys
->b_first_time
= true;
280 case DEMUX_CAN_PAUSE
:
281 case DEMUX_SET_PAUSE_STATE
:
282 case DEMUX_CAN_CONTROL_PACE
:
283 return demux_vaControlHelper( p_demux
->s
, 0, -1, 0, 1, i_query
, args
);
285 case DEMUX_GET_PTS_DELAY
:
288 case DEMUX_GET_ATTACHMENTS
:
289 case DEMUX_GET_TITLE_INFO
:
290 case DEMUX_HAS_UNSUPPORTED_META
:
291 case DEMUX_CAN_RECORD
:
299 static int ReadTTML( demux_t
* p_demux
)
301 demux_sys_t
* p_sys
= p_demux
->p_sys
;
302 const char* psz_node_name
;
306 int i_type
= xml_ReaderNextNode( p_sys
->p_reader
, &psz_node_name
);
307 bool b_empty
= xml_ReaderIsEmptyElement( p_sys
->p_reader
);
309 if( i_type
<= XML_READER_NONE
)
317 case XML_READER_STARTELEM
:
318 if( tt_node_NameCompare( psz_node_name
, "tt" ) ||
319 p_sys
->p_rootnode
!= NULL
)
322 p_sys
->p_rootnode
= tt_node_New( p_sys
->p_reader
, NULL
, psz_node_name
);
325 if( !p_sys
->p_rootnode
||
326 tt_nodes_Read( p_sys
->p_reader
, p_sys
->p_rootnode
) != VLC_SUCCESS
)
330 case XML_READER_ENDELEM
:
331 if( !p_sys
->p_rootnode
||
332 tt_node_NameCompare( psz_node_name
, p_sys
->p_rootnode
->psz_node_name
) )
339 if( p_sys
->p_rootnode
== NULL
)
345 static int Demux( demux_t
* p_demux
)
347 demux_sys_t
* p_sys
= p_demux
->p_sys
;
349 /* Last one must be an end time */
350 while( p_sys
->times
.i_current
+ 1 < p_sys
->times
.i_count
&&
351 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
] ) <= p_sys
->i_next_demux_time
)
353 const int64_t i_playbacktime
=
354 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
] );
355 const int64_t i_playbackendtime
=
356 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
+ 1] ) - 1;
358 if ( !p_sys
->b_slave
&& p_sys
->b_first_time
)
360 es_out_SetPCR( p_demux
->out
, VLC_TS_0
+ i_playbacktime
);
361 p_sys
->b_first_time
= false;
364 struct vlc_memstream stream
;
366 if( vlc_memstream_open( &stream
) )
367 return VLC_DEMUXER_EGENERIC
;
369 tt_node_ToText( &stream
, (tt_basenode_t
*) p_sys
->p_rootnode
,
370 &p_sys
->times
.p_array
[p_sys
->times
.i_current
] );
372 if( vlc_memstream_close( &stream
) == VLC_SUCCESS
)
374 block_t
* p_block
= block_heap_Alloc( stream
.ptr
, stream
.length
);
378 p_block
->i_pts
= VLC_TS_0
+ i_playbacktime
;
379 p_block
->i_length
= i_playbackendtime
- i_playbacktime
;
381 es_out_Send( p_demux
->out
, p_sys
->p_es
, p_block
);
385 p_sys
->times
.i_current
++;
388 if ( !p_sys
->b_slave
)
390 es_out_SetPCR( p_demux
->out
, VLC_TS_0
+ p_sys
->i_next_demux_time
);
391 p_sys
->i_next_demux_time
+= CLOCK_FREQ
/ 8;
394 if( p_sys
->times
.i_current
+ 1 >= p_sys
->times
.i_count
)
395 return VLC_DEMUXER_EOF
;
397 return VLC_DEMUXER_SUCCESS
;
400 int tt_OpenDemux( vlc_object_t
* p_this
)
402 demux_t
*p_demux
= (demux_t
*)p_this
;
405 const uint8_t *p_peek
;
406 ssize_t i_peek
= vlc_stream_Peek( p_demux
->s
, &p_peek
, 2048 );
407 if( unlikely( i_peek
<= 32 ) )
410 const char *psz_xml
= (const char *) p_peek
;
411 size_t i_xml
= i_peek
;
413 /* Try to probe without xml module/loading the full document */
414 char *psz_alloc
= NULL
;
415 switch( GetQWBE(p_peek
) )
417 /* See RFC 3023 Part 4 */
418 case UINT64_C(0xFFFE3C003F007800): /* UTF16 BOM<? */
419 case UINT64_C(0xFFFE3C003F007400): /* UTF16 BOM<t */
420 case UINT64_C(0xFEFF003C003F0078): /* UTF16 BOM<? */
421 case UINT64_C(0xFEFF003C003F0074): /* UTF16 BOM<t */
422 psz_alloc
= FromCharset( "UTF-16", p_peek
, i_peek
);
424 case UINT64_C(0x3C003F0078006D00): /* UTF16-LE <?xm */
425 case UINT64_C(0x3C003F0074007400): /* UTF16-LE <tt */
426 psz_alloc
= FromCharset( "UTF-16LE", p_peek
, i_peek
);
428 case UINT64_C(0x003C003F0078006D): /* UTF16-BE <?xm */
429 case UINT64_C(0x003C003F00740074): /* UTF16-BE <tt */
430 psz_alloc
= FromCharset( "UTF-16BE", p_peek
, i_peek
);
432 case UINT64_C(0xEFBBBF3C3F786D6C): /* UTF8 BOM<?xml */
433 case UINT64_C(0x3C3F786D6C207665): /* UTF8 <?xml ve */
434 case UINT64_C(0xEFBBBF3C74742078): /* UTF8 BOM<tt x*/
437 if(GetDWBE(p_peek
) != UINT32_C(0x3C747420)) /* tt node without xml document marker */
444 i_xml
= strlen( psz_alloc
);
447 /* Simplified probing. Valid TTML must have a namespace declaration */
448 const char *psz_tt
= strnstr( psz_xml
, "tt", i_xml
);
449 if( !psz_tt
|| psz_tt
== psz_xml
||
450 ((size_t)(&psz_tt
[2] - (const char*)p_peek
)) == i_xml
|| isalpha(psz_tt
[2]) ||
451 (psz_tt
[-1] != ':' && psz_tt
[-1] != '<') )
458 const char * const rgsz
[] =
460 "=\"http://www.w3.org/ns/ttml\"",
461 "=\"http://www.w3.org/2004/11/ttaf1\"",
462 "=\"http://www.w3.org/2006/04/ttaf1\"",
463 "=\"http://www.w3.org/2006/10/ttaf1\"",
465 const char *psz_ns
= NULL
;
466 for( size_t i
=0; i
<ARRAY_SIZE(rgsz
) && !psz_ns
; i
++ )
468 psz_ns
= strnstr( psz_xml
, rgsz
[i
],
469 i_xml
- (psz_tt
- psz_xml
) );
476 p_demux
->p_sys
= p_sys
= calloc( 1, sizeof( *p_sys
) );
477 if( unlikely( p_sys
== NULL
) )
480 p_sys
->b_first_time
= true;
481 p_sys
->temporal_extent
.i_type
= TT_TIMINGS_PARALLEL
;
482 tt_time_Init( &p_sys
->temporal_extent
.begin
);
483 tt_time_Init( &p_sys
->temporal_extent
.end
);
484 tt_time_Init( &p_sys
->temporal_extent
.dur
);
485 p_sys
->temporal_extent
.begin
.base
= 0;
487 p_sys
->p_xml
= xml_Create( p_demux
);
491 p_sys
->p_reader
= xml_ReaderCreate( p_sys
->p_xml
, p_demux
->s
);
492 if( !p_sys
->p_reader
)
495 #ifndef TTML_DEMUX_DEBUG
496 p_sys
->p_reader
->obj
.flags
|= OBJECT_FLAGS_QUIET
;
499 if( ReadTTML( p_demux
) != VLC_SUCCESS
)
502 tt_timings_Resolve( (tt_basenode_t
*) p_sys
->p_rootnode
, &p_sys
->temporal_extent
,
503 &p_sys
->times
.p_array
, &p_sys
->times
.i_count
);
505 #ifdef TTML_DEMUX_DEBUG
507 struct vlc_memstream stream
;
509 if( vlc_memstream_open( &stream
) )
514 tt_node_ToText( &stream
, (tt_basenode_t
*)p_sys
->p_rootnode
, &t
/* invalid */ );
516 vlc_memstream_putc( &stream
, '\0' );
518 if( vlc_memstream_close( &stream
) == VLC_SUCCESS
)
520 msg_Dbg( p_demux
, "%s", stream
.ptr
);
526 p_demux
->pf_demux
= Demux
;
527 p_demux
->pf_control
= Control
;
530 es_format_Init( &fmt
, SPU_ES
, VLC_CODEC_TTML
);
531 p_sys
->p_es
= es_out_Add( p_demux
->out
, &fmt
);
535 es_format_Clean( &fmt
);
540 tt_CloseDemux( p_demux
);
545 void tt_CloseDemux( demux_t
* p_demux
)
547 demux_sys_t
* p_sys
= p_demux
->p_sys
;
549 if( p_sys
->p_rootnode
)
550 tt_node_RecursiveDelete( p_sys
->p_rootnode
);
553 es_out_Del( p_demux
->out
, p_sys
->p_es
);
555 if( p_sys
->p_reader
)
556 xml_ReaderDelete( p_sys
->p_reader
);
559 xml_Delete( p_sys
->p_xml
);
561 free( p_sys
->times
.p_array
);