1 /*****************************************************************************
2 * ttml.c : TTML subtitles demux
3 *****************************************************************************
4 * Copyright (C) 2015-2017 VLC authors and VideoLAN
6 * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr>
7 * Sushma Reddy <sushma.reddy@research.iiit.ac.in>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #include <vlc_common.h>
29 #include <vlc_demux.h>
31 #include <vlc_strings.h>
32 #include <vlc_memory.h>
33 #include <vlc_memstream.h>
34 #include <vlc_es_out.h>
35 #include <vlc_charset.h> /* FromCharset */
40 #include "../codec/ttml/ttml.h"
42 //#define TTML_DEMUX_DEBUG
47 xml_reader_t
* p_reader
;
49 int64_t i_next_demux_time
;
53 tt_node_t
*p_rootnode
;
55 tt_timings_t temporal_extent
;
58 * All timings are stored unique and ordered.
59 * Being begin or end times of sub sequence,
60 * we use them as 'point of change' for output filtering.
70 static char *tt_genTiming( tt_time_t t
)
72 if( !tt_time_Valid( &t
) )
74 unsigned f
= t
.base
% CLOCK_FREQ
;
76 unsigned h
= t
.base
/ 3600;
77 unsigned m
= t
.base
% 3600 / 60;
78 unsigned s
= t
.base
% 60;
84 const char *lz
= "000000";
85 const char *psz_lz
= &lz
[6];
86 /* add leading zeroes */
87 for( unsigned i
=10*f
; i
<CLOCK_FREQ
; i
*= 10 )
89 /* strip trailing zeroes */
90 for( ; f
> 0 && (f
% 10) == 0; f
/= 10 );
91 i_ret
= asprintf( &psz
, "%02u:%02u:%02u.%s%u",
96 i_ret
= asprintf( &psz
, "%02u:%02u:%02u:%s%u",
97 h
, m
, s
, t
.frames
< 10 ? "0" : "", t
.frames
);
101 i_ret
= asprintf( &psz
, "%02u:%02u:%02u",
105 return i_ret
< 0 ? NULL
: psz
;
108 static void tt_node_AttributesToText( struct vlc_memstream
*p_stream
, const tt_node_t
* p_node
)
110 bool b_timed_node
= false;
111 const vlc_dictionary_t
* p_attr_dict
= &p_node
->attr_dict
;
112 for( int i
= 0; i
< p_attr_dict
->i_size
; ++i
)
114 for ( vlc_dictionary_entry_t
* p_entry
= p_attr_dict
->p_entries
[i
];
115 p_entry
!= NULL
; p_entry
= p_entry
->p_next
)
117 const char *psz_value
= NULL
;
119 if( !strcmp(p_entry
->psz_key
, "begin") ||
120 !strcmp(p_entry
->psz_key
, "end") ||
121 !strcmp(p_entry
->psz_key
, "dur") )
124 /* will remove duration */
127 else if( !strcmp(p_entry
->psz_key
, "timeContainer") )
129 /* also remove sequential timings info (all abs now) */
134 psz_value
= (char const*)p_entry
->p_value
;
137 if( psz_value
== NULL
)
140 vlc_memstream_printf( p_stream
, " %s=\"%s\"",
141 p_entry
->psz_key
, psz_value
);
147 if( tt_time_Valid( &p_node
->timings
.begin
) )
149 char *psz
= tt_genTiming( p_node
->timings
.begin
);
150 vlc_memstream_printf( p_stream
, " begin=\"%s\"", psz
);
154 if( tt_time_Valid( &p_node
->timings
.end
) )
156 char *psz
= tt_genTiming( p_node
->timings
.end
);
157 vlc_memstream_printf( p_stream
, " end=\"%s\"", psz
);
163 static void tt_node_ToText( struct vlc_memstream
*p_stream
, const tt_basenode_t
*p_basenode
,
164 const tt_time_t
*playbacktime
)
166 if( p_basenode
->i_type
== TT_NODE_TYPE_ELEMENT
)
168 const tt_node_t
*p_node
= (const tt_node_t
*) p_basenode
;
170 if( tt_time_Valid( playbacktime
) &&
171 !tt_timings_Contains( &p_node
->timings
, playbacktime
) )
174 vlc_memstream_putc( p_stream
, '<' );
175 vlc_memstream_puts( p_stream
, p_node
->psz_node_name
);
177 tt_node_AttributesToText( p_stream
, p_node
);
179 if( tt_node_HasChild( p_node
) )
181 vlc_memstream_putc( p_stream
, '>' );
183 #ifdef TTML_DEMUX_DEBUG
184 vlc_memstream_printf( p_stream
, "<!-- starts %ld ends %ld -->",
185 tt_time_Convert( &p_node
->timings
.begin
),
186 tt_time_Convert( &p_node
->timings
.end
) );
189 for( const tt_basenode_t
*p_child
= p_node
->p_child
;
190 p_child
; p_child
= p_child
->p_next
)
192 tt_node_ToText( p_stream
, p_child
, playbacktime
);
195 vlc_memstream_printf( p_stream
, "</%s>", p_node
->psz_node_name
);
198 vlc_memstream_puts( p_stream
, "/>" );
202 const tt_textnode_t
*p_textnode
= (const tt_textnode_t
*) p_basenode
;
203 vlc_memstream_puts( p_stream
, p_textnode
->psz_text
);
207 static int Control( demux_t
* p_demux
, int i_query
, va_list args
)
209 demux_sys_t
*p_sys
= p_demux
->p_sys
;
217 *va_arg( args
, bool * ) = true;
220 pi64
= va_arg( args
, int64_t * );
221 *pi64
= p_sys
->i_next_demux_time
;
224 i64
= va_arg( args
, int64_t );
225 if( p_sys
->times
.i_count
)
227 tt_time_t t
= tt_time_Create( i64
- VLC_TS_0
);
228 size_t i_index
= tt_timings_FindLowerIndex( p_sys
->times
.p_array
,
229 p_sys
->times
.i_count
, t
, &b
);
230 p_sys
->times
.i_current
= i_index
;
231 p_sys
->b_first_time
= true;
235 case DEMUX_SET_NEXT_DEMUX_TIME
:
236 i64
= va_arg( args
, int64_t );
237 p_sys
->i_next_demux_time
= i64
;
238 p_sys
->b_slave
= true;
240 case DEMUX_GET_LENGTH
:
241 pi64
= va_arg( args
, int64_t * );
242 if( p_sys
->times
.i_count
)
244 tt_time_t t
= tt_time_Sub( p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1],
245 p_sys
->temporal_extent
.begin
);
246 *pi64
= tt_time_Convert( &t
);
250 case DEMUX_GET_POSITION
:
251 pf
= va_arg( args
, double * );
252 if( p_sys
->times
.i_current
>= p_sys
->times
.i_count
)
256 else if( p_sys
->times
.i_count
> 0 )
258 i64
= tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1] );
259 *pf
= (double) p_sys
->i_next_demux_time
/ (i64
+ 0.5);
266 case DEMUX_SET_POSITION
:
267 f
= va_arg( args
, double );
268 if( p_sys
->times
.i_count
)
270 i64
= f
* tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_count
- 1] );
271 tt_time_t t
= tt_time_Create( i64
);
272 size_t i_index
= tt_timings_FindLowerIndex( p_sys
->times
.p_array
,
273 p_sys
->times
.i_count
, t
, &b
);
274 p_sys
->times
.i_current
= i_index
;
275 p_sys
->b_first_time
= true;
279 case DEMUX_GET_PTS_DELAY
:
282 case DEMUX_GET_ATTACHMENTS
:
283 case DEMUX_GET_TITLE_INFO
:
284 case DEMUX_HAS_UNSUPPORTED_META
:
285 case DEMUX_CAN_RECORD
:
293 static int ReadTTML( demux_t
* p_demux
)
295 demux_sys_t
* p_sys
= p_demux
->p_sys
;
296 const char* psz_node_name
;
300 int i_type
= xml_ReaderNextNode( p_sys
->p_reader
, &psz_node_name
);
301 bool b_empty
= xml_ReaderIsEmptyElement( p_sys
->p_reader
);
303 if( i_type
<= XML_READER_NONE
)
311 case XML_READER_STARTELEM
:
312 if( tt_node_NameCompare( psz_node_name
, "tt" ) ||
313 p_sys
->p_rootnode
!= NULL
)
316 p_sys
->p_rootnode
= tt_node_New( p_sys
->p_reader
, NULL
, psz_node_name
);
319 if( !p_sys
->p_rootnode
||
320 tt_nodes_Read( p_sys
->p_reader
, p_sys
->p_rootnode
) != VLC_SUCCESS
)
324 case XML_READER_ENDELEM
:
325 if( !p_sys
->p_rootnode
||
326 tt_node_NameCompare( psz_node_name
, p_sys
->p_rootnode
->psz_node_name
) )
333 if( p_sys
->p_rootnode
== NULL
)
339 static int Demux( demux_t
* p_demux
)
341 demux_sys_t
* p_sys
= p_demux
->p_sys
;
343 /* Last one must be an end time */
344 while( p_sys
->times
.i_current
+ 1 < p_sys
->times
.i_count
&&
345 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
] ) <= p_sys
->i_next_demux_time
)
347 const int64_t i_playbacktime
=
348 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
] );
349 const int64_t i_playbackendtime
=
350 tt_time_Convert( &p_sys
->times
.p_array
[p_sys
->times
.i_current
+ 1] ) - 1;
352 if ( !p_sys
->b_slave
&& p_sys
->b_first_time
)
354 es_out_SetPCR( p_demux
->out
, VLC_TS_0
+ i_playbacktime
);
355 p_sys
->b_first_time
= false;
358 struct vlc_memstream stream
;
360 if( vlc_memstream_open( &stream
) )
361 return VLC_DEMUXER_EGENERIC
;
363 tt_node_ToText( &stream
, (tt_basenode_t
*) p_sys
->p_rootnode
,
364 &p_sys
->times
.p_array
[p_sys
->times
.i_current
] );
366 if( vlc_memstream_close( &stream
) == VLC_SUCCESS
)
368 block_t
* p_block
= block_heap_Alloc( stream
.ptr
, stream
.length
);
372 p_block
->i_pts
= VLC_TS_0
+ i_playbacktime
;
373 p_block
->i_length
= i_playbackendtime
- i_playbacktime
;
375 es_out_Send( p_demux
->out
, p_sys
->p_es
, p_block
);
379 p_sys
->times
.i_current
++;
382 if ( !p_sys
->b_slave
)
384 es_out_SetPCR( p_demux
->out
, VLC_TS_0
+ p_sys
->i_next_demux_time
);
385 p_sys
->i_next_demux_time
+= CLOCK_FREQ
/ 8;
388 if( p_sys
->times
.i_current
+ 1 >= p_sys
->times
.i_count
)
389 return VLC_DEMUXER_EOF
;
391 return VLC_DEMUXER_SUCCESS
;
394 int tt_OpenDemux( vlc_object_t
* p_this
)
396 demux_t
*p_demux
= (demux_t
*)p_this
;
399 const uint8_t *p_peek
;
400 ssize_t i_peek
= vlc_stream_Peek( p_demux
->s
, &p_peek
, 2048 );
401 if( unlikely( i_peek
<= 32 ) )
404 const char *psz_xml
= (const char *) p_peek
;
405 size_t i_xml
= i_peek
;
407 /* Try to probe without xml module/loading the full document */
408 char *psz_alloc
= NULL
;
409 switch( GetQWBE(p_peek
) )
411 /* See RFC 3023 Part 4 */
412 case UINT64_C(0xFFFE3C003F007800): /* UTF16 BOM<? */
413 case UINT64_C(0xFFFE3C003F007400): /* UTF16 BOM<t */
414 case UINT64_C(0xFEFF003C003F0078): /* UTF16 BOM<? */
415 case UINT64_C(0xFEFF003C003F0074): /* UTF16 BOM<t */
416 psz_alloc
= FromCharset( "UTF-16", p_peek
, i_peek
);
418 case UINT64_C(0x3C003F0078006D00): /* UTF16-LE <?xm */
419 case UINT64_C(0x3C003F0074007400): /* UTF16-LE <tt */
420 psz_alloc
= FromCharset( "UTF-16LE", p_peek
, i_peek
);
422 case UINT64_C(0x003C003F0078006D): /* UTF16-BE <?xm */
423 case UINT64_C(0x003C003F00740074): /* UTF16-BE <tt */
424 psz_alloc
= FromCharset( "UTF-16BE", p_peek
, i_peek
);
426 case UINT64_C(0xEFBBBF3C3F786D6C): /* UTF8 BOM<?xml */
427 case UINT64_C(0x3C3F786D6C207665): /* UTF8 <?xml ve */
428 case UINT64_C(0xEFBBBF3C74742078): /* UTF8 BOM<tt x*/
431 if(GetDWBE(p_peek
) != UINT32_C(0x3C747420)) /* tt node without xml document marker */
438 i_xml
= strlen( psz_alloc
);
441 /* Simplified probing. Valid TTML must have a namespace declaration */
442 const char *psz_tt
= strnstr( psz_xml
, "tt ", i_xml
);
443 if( !psz_tt
|| psz_tt
== psz_xml
||
444 (psz_tt
[-1] != ':' && psz_tt
[-1] != '<') )
451 const char * const rgsz
[] =
453 "=\"http://www.w3.org/ns/ttml\"",
454 "=\"http://www.w3.org/2004/11/ttaf1\"",
455 "=\"http://www.w3.org/2006/04/ttaf1\"",
456 "=\"http://www.w3.org/2006/10/ttaf1\"",
458 const char *psz_ns
= NULL
;
459 for( size_t i
=0; i
<ARRAY_SIZE(rgsz
) && !psz_ns
; i
++ )
461 psz_ns
= strnstr( psz_xml
, rgsz
[i
],
462 i_xml
- (psz_tt
- psz_xml
) );
469 p_demux
->p_sys
= p_sys
= calloc( 1, sizeof( *p_sys
) );
470 if( unlikely( p_sys
== NULL
) )
473 p_sys
->b_first_time
= true;
474 p_sys
->temporal_extent
.i_type
= TT_TIMINGS_PARALLEL
;
475 tt_time_Init( &p_sys
->temporal_extent
.begin
);
476 tt_time_Init( &p_sys
->temporal_extent
.end
);
477 tt_time_Init( &p_sys
->temporal_extent
.dur
);
478 p_sys
->temporal_extent
.begin
.base
= 0;
480 p_sys
->p_xml
= xml_Create( p_demux
);
484 p_sys
->p_reader
= xml_ReaderCreate( p_sys
->p_xml
, p_demux
->s
);
485 if( !p_sys
->p_reader
)
488 #ifndef TTML_DEMUX_DEBUG
489 p_sys
->p_reader
->obj
.flags
|= OBJECT_FLAGS_QUIET
;
492 if( ReadTTML( p_demux
) != VLC_SUCCESS
)
495 tt_timings_Resolve( (tt_basenode_t
*) p_sys
->p_rootnode
, &p_sys
->temporal_extent
,
496 &p_sys
->times
.p_array
, &p_sys
->times
.i_count
);
498 #ifdef TTML_DEMUX_DEBUG
500 struct vlc_memstream stream
;
502 if( vlc_memstream_open( &stream
) )
507 tt_node_ToText( &stream
, (tt_basenode_t
*)p_sys
->p_rootnode
, &t
/* invalid */ );
509 vlc_memstream_putc( &stream
, '\0' );
511 if( vlc_memstream_close( &stream
) == VLC_SUCCESS
)
513 msg_Dbg( p_demux
, "%s", stream
.ptr
);
519 p_demux
->pf_demux
= Demux
;
520 p_demux
->pf_control
= Control
;
523 es_format_Init( &fmt
, SPU_ES
, VLC_CODEC_TTML
);
524 p_sys
->p_es
= es_out_Add( p_demux
->out
, &fmt
);
528 es_format_Clean( &fmt
);
533 tt_CloseDemux( p_demux
);
538 void tt_CloseDemux( demux_t
* p_demux
)
540 demux_sys_t
* p_sys
= p_demux
->p_sys
;
542 if( p_sys
->p_rootnode
)
543 tt_node_RecursiveDelete( p_sys
->p_rootnode
);
546 es_out_Del( p_demux
->out
, p_sys
->p_es
);
548 if( p_sys
->p_reader
)
549 xml_ReaderDelete( p_sys
->p_reader
);
552 xml_Delete( p_sys
->p_xml
);
554 free( p_sys
->times
.p_array
);