1 /*****************************************************************************
2 * ttml.c : TTML subtitles demux
3 *****************************************************************************
4 * Copyright (C) 2015-2017 VLC authors and VideoLAN
6 * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr>
7 * Sushma Reddy <sushma.reddy@research.iiit.ac.in>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #include <vlc_common.h>
29 #include <vlc_demux.h>
31 #include <vlc_strings.h>
32 #include <vlc_memstream.h>
33 #include <vlc_es_out.h>
34 #include <vlc_charset.h> /* FromCharset */
40 #include "../codec/ttml/ttml.h"
42 //#define TTML_DEMUX_DEBUG
47 xml_reader_t
* p_reader
;
49 vlc_tick_t i_next_demux_time
;
53 tt_node_t
*p_rootnode
;
55 tt_timings_t temporal_extent
;
58 * All timings are stored unique and ordered.
59 * Being begin or end times of sub sequence,
60 * we use them as 'point of change' for output filtering.
70 static char *tt_genTiming( tt_time_t t
)
72 if( !tt_time_Valid( &t
) )
74 unsigned f
= t
.base
% CLOCK_FREQ
;
76 unsigned h
= t
.base
/ 3600;
77 unsigned m
= t
.base
% 3600 / 60;
78 unsigned s
= t
.base
% 60;
84 const char *lz
= "000000";
85 const char *psz_lz
= &lz
[6];
86 /* add leading zeroes */
87 for( unsigned i
=10*f
; i
<CLOCK_FREQ
; i
*= 10 )
89 /* strip trailing zeroes */
90 for( ; f
> 0 && (f
% 10) == 0; f
/= 10 );
91 i_ret
= asprintf( &psz
, "%02u:%02u:%02u.%s%u",
96 i_ret
= asprintf( &psz
, "%02u:%02u:%02u:%s%u",
97 h
, m
, s
, t
.frames
< 10 ? "0" : "", t
.frames
);
101 i_ret
= asprintf( &psz
, "%02u:%02u:%02u",
105 return i_ret
< 0 ? NULL
: psz
;
108 static void tt_MemstreamPutEntities( struct vlc_memstream
*p_stream
, const char *psz
)
110 char *psz_entities
= vlc_xml_encode( psz
);
113 vlc_memstream_puts( p_stream
, psz_entities
);
114 free( psz_entities
);
118 static void tt_node_AttributesToText( struct vlc_memstream
*p_stream
, const tt_node_t
* p_node
)
120 bool b_timed_node
= false;
121 const vlc_dictionary_t
* p_attr_dict
= &p_node
->attr_dict
;
122 for( int i
= 0; i
< p_attr_dict
->i_size
; ++i
)
124 for ( vlc_dictionary_entry_t
* p_entry
= p_attr_dict
->p_entries
[i
];
125 p_entry
!= NULL
; p_entry
= p_entry
->p_next
)
127 const char *psz_value
= NULL
;
129 if( !strcmp(p_entry
->psz_key
, "begin") ||
130 !strcmp(p_entry
->psz_key
, "end") ||
131 !strcmp(p_entry
->psz_key
, "dur") )
134 /* will remove duration */
137 else if( !strcmp(p_entry
->psz_key
, "timeContainer") )
139 /* also remove sequential timings info (all abs now) */
144 psz_value
= p_entry
->p_value
;
147 if( psz_value
== NULL
)
150 vlc_memstream_printf( p_stream
, " %s=\"", p_entry
->psz_key
);
151 tt_MemstreamPutEntities( p_stream
, psz_value
);
152 vlc_memstream_putc( p_stream
, '"' );
158 if( tt_time_Valid( &p_node
->timings
.begin
) )
160 char *psz
= tt_genTiming( p_node
->timings
.begin
);
161 vlc_memstream_printf( p_stream
, " begin=\"%s\"", psz
);
165 if( tt_time_Valid( &p_node
->timings
.end
) )
167 char *psz
= tt_genTiming( p_node
->timings
.end
);
168 vlc_memstream_printf( p_stream
, " end=\"%s\"", psz
);
174 static void tt_node_ToText( struct vlc_memstream
*p_stream
, const tt_basenode_t
*p_basenode
,
175 const tt_time_t
*playbacktime
)
177 if( p_basenode
->i_type
== TT_NODE_TYPE_ELEMENT
)
179 const tt_node_t
*p_node
= (const tt_node_t
*) p_basenode
;
181 if( tt_time_Valid( playbacktime
) &&
182 !tt_timings_Contains( &p_node
->timings
, playbacktime
) )
185 vlc_memstream_putc( p_stream
, '<' );
186 tt_MemstreamPutEntities( p_stream
, p_node
->psz_node_name
);
188 tt_node_AttributesToText( p_stream
, p_node
);
190 if( tt_node_HasChild( p_node
) )
192 vlc_memstream_putc( p_stream
, '>' );
194 #ifdef TTML_DEMUX_DEBUG
195 vlc_memstream_printf( p_stream
, "<!-- starts %ld ends %ld -->",
196 tt_time_Convert( &p_node
->timings
.begin
),
197 tt_time_Convert( &p_node
->timings
.end
) );
200 for( const tt_basenode_t
*p_child
= p_node
->p_child
;
201 p_child
; p_child
= p_child
->p_next
)
203 tt_node_ToText( p_stream
, p_child
, playbacktime
);
206 vlc_memstream_puts( p_stream
, "</" );
207 tt_MemstreamPutEntities( p_stream
, p_node
->psz_node_name
);
208 vlc_memstream_putc( p_stream
, '>' );
211 vlc_memstream_puts( p_stream
, "/>" );
215 const tt_textnode_t
*p_textnode
= (const tt_textnode_t
*) p_basenode
;
216 tt_MemstreamPutEntities( p_stream
, p_textnode
->psz_text
);
220 static int Control( demux_t
* p_demux
, int i_query
, va_list args
)
222 demux_sys_t
*p_sys
= p_demux
->p_sys
;
230 *va_arg( args
, bool * ) = true;
233 *va_arg( args
, vlc_tick_t
* ) = p_sys
->i_next_demux_time
;
236 if( p_sys
->times
.i_count
)
238 tt_time_t t
= tt_time_Create( va_arg( args
, vlc_tick_t
) - VLC_TICK_0
);
239 size_t i_index
= tt_timings_FindLowerIndex( p_sys
->times
.p_array
,
240 p_sys
->times
.i_count
, t
, &b
);
241 p_sys
->times
.i_current
= i_index
;
242 p_sys
->b_first_time
= true;
246 case DEMUX_SET_NEXT_DEMUX_TIME
:
247 p_sys
->i_next_demux_time
= va_arg( args
, vlc_tick_t
);
248 p_sys
->b_slave
= true;
250 case DEMUX_GET_LENGTH
:
251 if( p_sys
->times
.i_count
)
253 tt_time_t t
= tt_time_Sub( p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1],
254 p_sys
->temporal_extent
.begin
);
255 *va_arg( args
, vlc_tick_t
* ) = tt_time_Convert( &t
);
259 case DEMUX_GET_POSITION
:
260 pf
= va_arg( args
, double * );
261 if( p_sys
->times
.i_current
>= p_sys
->times
.i_count
)
265 else if( p_sys
->times
.i_count
> 0 )
267 i64
= tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1] );
268 *pf
= (double) p_sys
->i_next_demux_time
/ (i64
+ VLC_TICK_FROM_MS(500));
275 case DEMUX_SET_POSITION
:
276 f
= va_arg( args
, double );
277 if( p_sys
->times
.i_count
)
279 i64
= f
* tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1] );
280 tt_time_t t
= tt_time_Create( i64
);
281 size_t i_index
= tt_timings_FindLowerIndex( p_sys
->times
.p_array
,
282 p_sys
->times
.i_count
, t
, &b
);
283 p_sys
->times
.i_current
= i_index
;
284 p_sys
->b_first_time
= true;
288 case DEMUX_CAN_PAUSE
:
289 case DEMUX_SET_PAUSE_STATE
:
290 case DEMUX_CAN_CONTROL_PACE
:
291 return demux_vaControlHelper( p_demux
->s
, 0, -1, 0, 1, i_query
, args
);
293 case DEMUX_GET_PTS_DELAY
:
296 case DEMUX_GET_ATTACHMENTS
:
297 case DEMUX_GET_TITLE_INFO
:
298 case DEMUX_HAS_UNSUPPORTED_META
:
299 case DEMUX_CAN_RECORD
:
307 static int ReadTTML( demux_t
* p_demux
)
309 demux_sys_t
* p_sys
= p_demux
->p_sys
;
310 const char* psz_node_name
;
314 int i_type
= xml_ReaderNextNode( p_sys
->p_reader
, &psz_node_name
);
315 bool b_empty
= xml_ReaderIsEmptyElement( p_sys
->p_reader
);
317 if( i_type
<= XML_READER_NONE
)
325 case XML_READER_STARTELEM
:
326 if( tt_node_NameCompare( psz_node_name
, "tt" ) ||
327 p_sys
->p_rootnode
!= NULL
)
330 p_sys
->p_rootnode
= tt_node_New( p_sys
->p_reader
, NULL
, psz_node_name
);
333 if( !p_sys
->p_rootnode
||
334 tt_nodes_Read( p_sys
->p_reader
, p_sys
->p_rootnode
) != VLC_SUCCESS
)
338 case XML_READER_ENDELEM
:
339 if( !p_sys
->p_rootnode
||
340 tt_node_NameCompare( psz_node_name
, p_sys
->p_rootnode
->psz_node_name
) )
347 if( p_sys
->p_rootnode
== NULL
)
353 static int Demux( demux_t
* p_demux
)
355 demux_sys_t
* p_sys
= p_demux
->p_sys
;
357 /* Last one must be an end time */
358 while( p_sys
->times
.i_current
+ 1 < p_sys
->times
.i_count
&&
359 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
] ) <= p_sys
->i_next_demux_time
)
361 const vlc_tick_t i_playbacktime
=
362 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
] );
363 const vlc_tick_t i_playbackendtime
=
364 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
+ 1] ) - 1;
366 if ( !p_sys
->b_slave
&& p_sys
->b_first_time
)
368 es_out_SetPCR( p_demux
->out
, VLC_TICK_0
+ i_playbacktime
);
369 p_sys
->b_first_time
= false;
372 struct vlc_memstream stream
;
374 if( vlc_memstream_open( &stream
) )
375 return VLC_DEMUXER_EGENERIC
;
377 tt_node_ToText( &stream
, (tt_basenode_t
*) p_sys
->p_rootnode
,
378 &p_sys
->times
.p_array
[p_sys
->times
.i_current
] );
380 if( vlc_memstream_close( &stream
) == VLC_SUCCESS
)
382 block_t
* p_block
= block_heap_Alloc( stream
.ptr
, stream
.length
);
386 p_block
->i_pts
= VLC_TICK_0
+ i_playbacktime
;
387 p_block
->i_length
= i_playbackendtime
- i_playbacktime
;
389 es_out_Send( p_demux
->out
, p_sys
->p_es
, p_block
);
393 p_sys
->times
.i_current
++;
396 if ( !p_sys
->b_slave
)
398 es_out_SetPCR( p_demux
->out
, VLC_TICK_0
+ p_sys
->i_next_demux_time
);
399 p_sys
->i_next_demux_time
+= VLC_TICK_FROM_MS(125);
402 if( p_sys
->times
.i_current
+ 1 >= p_sys
->times
.i_count
)
403 return VLC_DEMUXER_EOF
;
405 return VLC_DEMUXER_SUCCESS
;
408 int tt_OpenDemux( vlc_object_t
* p_this
)
410 demux_t
*p_demux
= (demux_t
*)p_this
;
413 const uint8_t *p_peek
;
414 ssize_t i_peek
= vlc_stream_Peek( p_demux
->s
, &p_peek
, 2048 );
415 if( unlikely( i_peek
<= 32 ) )
418 const char *psz_xml
= (const char *) p_peek
;
419 size_t i_xml
= i_peek
;
421 /* Try to probe without xml module/loading the full document */
422 char *psz_alloc
= NULL
;
423 switch( GetQWBE(p_peek
) )
425 /* See RFC 3023 Part 4 */
426 case UINT64_C(0xFFFE3C003F007800): /* UTF16 BOM<? */
427 case UINT64_C(0xFFFE3C003F007400): /* UTF16 BOM<t */
428 case UINT64_C(0xFEFF003C003F0078): /* UTF16 BOM<? */
429 case UINT64_C(0xFEFF003C003F0074): /* UTF16 BOM<t */
430 psz_alloc
= FromCharset( "UTF-16", p_peek
, i_peek
);
432 case UINT64_C(0x3C003F0078006D00): /* UTF16-LE <?xm */
433 case UINT64_C(0x3C003F0074007400): /* UTF16-LE <tt */
434 psz_alloc
= FromCharset( "UTF-16LE", p_peek
, i_peek
);
436 case UINT64_C(0x003C003F0078006D): /* UTF16-BE <?xm */
437 case UINT64_C(0x003C003F00740074): /* UTF16-BE <tt */
438 psz_alloc
= FromCharset( "UTF-16BE", p_peek
, i_peek
);
440 case UINT64_C(0xEFBBBF3C3F786D6C): /* UTF8 BOM<?xml */
441 case UINT64_C(0x3C3F786D6C207665): /* UTF8 <?xml ve */
442 case UINT64_C(0xEFBBBF3C74742078): /* UTF8 BOM<tt x*/
445 if(GetDWBE(p_peek
) != UINT32_C(0x3C747420)) /* tt node without xml document marker */
452 i_xml
= strlen( psz_alloc
);
455 /* Simplified probing. Valid TTML must have a namespace declaration */
456 const char *psz_tt
= strnstr( psz_xml
, "tt", i_xml
);
457 if( !psz_tt
|| psz_tt
== psz_xml
||
458 ((size_t)(&psz_tt
[2] - (const char*)p_peek
)) == i_xml
|| isalpha(psz_tt
[2]) ||
459 (psz_tt
[-1] != ':' && psz_tt
[-1] != '<') )
466 const char * const rgsz
[] =
468 "=\"http://www.w3.org/ns/ttml\"",
469 "=\"http://www.w3.org/2004/11/ttaf1\"",
470 "=\"http://www.w3.org/2006/04/ttaf1\"",
471 "=\"http://www.w3.org/2006/10/ttaf1\"",
473 const char *psz_ns
= NULL
;
474 for( size_t i
=0; i
<ARRAY_SIZE(rgsz
) && !psz_ns
; i
++ )
476 psz_ns
= strnstr( psz_xml
, rgsz
[i
],
477 i_xml
- (psz_tt
- psz_xml
) );
484 p_demux
->p_sys
= p_sys
= calloc( 1, sizeof( *p_sys
) );
485 if( unlikely( p_sys
== NULL
) )
488 p_sys
->b_first_time
= true;
489 p_sys
->temporal_extent
.i_type
= TT_TIMINGS_PARALLEL
;
490 tt_time_Init( &p_sys
->temporal_extent
.begin
);
491 tt_time_Init( &p_sys
->temporal_extent
.end
);
492 tt_time_Init( &p_sys
->temporal_extent
.dur
);
493 p_sys
->temporal_extent
.begin
.base
= 0;
495 p_sys
->p_xml
= xml_Create( p_demux
);
499 p_sys
->p_reader
= xml_ReaderCreate( p_sys
->p_xml
, p_demux
->s
);
500 if( !p_sys
->p_reader
)
503 #ifndef TTML_DEMUX_DEBUG
504 p_sys
->p_reader
->obj
.logger
= NULL
;
507 if( ReadTTML( p_demux
) != VLC_SUCCESS
)
510 tt_timings_Resolve( (tt_basenode_t
*) p_sys
->p_rootnode
, &p_sys
->temporal_extent
,
511 &p_sys
->times
.p_array
, &p_sys
->times
.i_count
);
513 #ifdef TTML_DEMUX_DEBUG
515 struct vlc_memstream stream
;
517 if( vlc_memstream_open( &stream
) )
522 tt_node_ToText( &stream
, (tt_basenode_t
*)p_sys
->p_rootnode
, &t
/* invalid */ );
524 vlc_memstream_putc( &stream
, '\0' );
526 if( vlc_memstream_close( &stream
) == VLC_SUCCESS
)
528 msg_Dbg( p_demux
, "%s", stream
.ptr
);
534 p_demux
->pf_demux
= Demux
;
535 p_demux
->pf_control
= Control
;
538 es_format_Init( &fmt
, SPU_ES
, VLC_CODEC_TTML
);
540 p_sys
->p_es
= es_out_Add( p_demux
->out
, &fmt
);
544 es_format_Clean( &fmt
);
549 tt_CloseDemux( p_this
);
554 void tt_CloseDemux( vlc_object_t
* p_this
)
556 demux_t
*p_demux
= (demux_t
*)p_this
;
557 demux_sys_t
* p_sys
= p_demux
->p_sys
;
559 if( p_sys
->p_rootnode
)
560 tt_node_RecursiveDelete( p_sys
->p_rootnode
);
563 es_out_Del( p_demux
->out
, p_sys
->p_es
);
565 if( p_sys
->p_reader
)
566 xml_ReaderDelete( p_sys
->p_reader
);
569 xml_Delete( p_sys
->p_xml
);
571 free( p_sys
->times
.p_array
);