1 /*****************************************************************************
2 * substtml.c : TTML subtitles decoder
3 *****************************************************************************
4 * Copyright (C) 2015-2017 VLC authors and VideoLAN
6 * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr>
7 * Sushma Reddy <sushma.reddy@research.iiit.ac.in>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
27 #include <vlc_common.h>
28 #include <vlc_codec.h>
30 #include <vlc_stream.h>
31 #include <vlc_text_style.h>
32 #include <vlc_charset.h>
42 /*****************************************************************************
44 *****************************************************************************/
50 TTML_UNIT_UNKNOWN
= 0,
57 #define TTML_DEFAULT_CELL_RESOLUTION_H 32
58 #define TTML_DEFAULT_CELL_RESOLUTION_V 15
59 #define TTML_LINE_TO_HEIGHT_RATIO 1.06
63 text_style_t
* font_style
;
64 ttml_length_t font_size
;
65 ttml_length_t extent_h
, extent_v
;
69 bool b_preserve_space
;
72 TTML_DISPLAY_UNKNOWN
= 0,
80 vlc_dictionary_t regions
;
81 tt_node_t
* p_rootnode
; /* for now. FIXME: split header */
82 ttml_length_t root_extent_h
, root_extent_v
;
83 unsigned i_cell_resolution_v
;
88 subpicture_updater_sys_region_t updt
;
89 text_segment_t
**pp_last_segment
;
100 UNICODE_BIDI_RTL
= 1,
101 UNICODE_BIDI_EMBEDDED
= 2,
102 UNICODE_BIDI_OVERRIDE
= 4,
106 * TTML Parsing and inheritance order:
107 * Each time a text node is found and belongs to out time interval,
108 * we backward merge attributes dictionnary up to root.
109 * Then we convert attributes, merging with style by id or region
110 * style, and sets from parent node.
112 static tt_node_t
*ParseTTML( decoder_t
*, const uint8_t *, size_t );
114 static void ttml_style_Delete( ttml_style_t
* p_ttml_style
)
116 text_style_Delete( p_ttml_style
->font_style
);
117 free( p_ttml_style
);
120 static ttml_style_t
* ttml_style_New( )
122 ttml_style_t
*p_ttml_style
= calloc( 1, sizeof( ttml_style_t
) );
123 if( unlikely( !p_ttml_style
) )
126 p_ttml_style
->extent_h
.i_value
= 100;
127 p_ttml_style
->extent_h
.unit
= TTML_UNIT_PERCENT
;
128 p_ttml_style
->extent_v
.i_value
= 100;
129 p_ttml_style
->extent_v
.unit
= TTML_UNIT_PERCENT
;
130 p_ttml_style
->font_size
.i_value
= 1.0;
131 p_ttml_style
->font_size
.unit
= TTML_UNIT_CELL
;
132 p_ttml_style
->font_style
= text_style_Create( STYLE_NO_DEFAULTS
);
133 if( unlikely( !p_ttml_style
->font_style
) )
135 free( p_ttml_style
);
141 static void ttml_region_Delete( ttml_region_t
*p_region
)
143 SubpictureUpdaterSysRegionClean( &p_region
->updt
);
147 static ttml_style_t
* ttml_style_Duplicate( const ttml_style_t
*p_src
)
149 ttml_style_t
*p_dup
= ttml_style_New( );
153 p_dup
->font_style
= text_style_Duplicate( p_src
->font_style
);
158 static void ttml_style_Merge( const ttml_style_t
*p_src
, ttml_style_t
*p_dst
)
162 if( p_src
->font_style
)
164 if( p_dst
->font_style
)
165 text_style_Merge( p_dst
->font_style
, p_src
->font_style
, true );
167 p_dst
->font_style
= text_style_Duplicate( p_src
->font_style
);
170 if( p_src
->b_direction_set
)
172 p_dst
->b_direction_set
= true;
173 p_dst
->i_direction
= p_src
->i_direction
;
176 if( p_src
->display
!= TTML_DISPLAY_UNKNOWN
)
177 p_dst
->display
= p_src
->display
;
181 static ttml_region_t
*ttml_region_New( )
183 ttml_region_t
*p_ttml_region
= calloc( 1, sizeof( ttml_region_t
) );
184 if( unlikely( !p_ttml_region
) )
187 SubpictureUpdaterSysRegionInit( &p_ttml_region
->updt
);
188 p_ttml_region
->pp_last_segment
= &p_ttml_region
->updt
.p_segments
;
189 /* Align to bottom by default. !Warn: center align is obtained with NO flags */
190 p_ttml_region
->updt
.align
= SUBPICTURE_ALIGN_BOTTOM
;
192 return p_ttml_region
;
195 static ttml_length_t
ttml_read_length( const char *psz
)
197 ttml_length_t len
= { 0.0, TTML_UNIT_UNKNOWN
};
199 char* psz_end
= NULL
;
200 float size
= us_strtof( psz
, &psz_end
);
204 if( *psz_end
== 'c' || *psz_end
== 'r' )
205 len
.unit
= TTML_UNIT_CELL
;
206 else if( *psz_end
== '%' )
207 len
.unit
= TTML_UNIT_PERCENT
;
208 else if( *psz_end
== 'p' && *(psz_end
+ 1) == 'x' )
209 len
.unit
= TTML_UNIT_PIXELS
;
214 static ttml_length_t
ttml_rebase_length( ttml_length_t value
,
215 ttml_length_t reference
,
216 unsigned i_cell_resolution
)
218 if( value
.unit
== TTML_UNIT_PERCENT
)
220 value
.i_value
*= reference
.i_value
/ 100.0;
221 value
.unit
= reference
.unit
;
223 else if( value
.unit
== TTML_UNIT_CELL
)
225 value
.i_value
*= reference
.i_value
/ i_cell_resolution
;
226 value
.unit
= reference
.unit
;
232 static tt_node_t
* FindNode( tt_node_t
*p_node
, const char *psz_nodename
,
233 size_t i_maxdepth
, const char *psz_id
)
235 if( !tt_node_NameCompare( p_node
->psz_node_name
, psz_nodename
) )
239 char *psz
= vlc_dictionary_value_for_key( &p_node
->attr_dict
, "xml:id" );
240 if( psz
&& !strcmp( psz
, psz_id
) )
246 if( i_maxdepth
== 0 )
249 for( tt_basenode_t
*p_child
= p_node
->p_child
;
250 p_child
; p_child
= p_child
->p_next
)
252 if( p_child
->i_type
== TT_NODE_TYPE_TEXT
)
255 p_node
= FindNode( (tt_node_t
*) p_child
, psz_nodename
, i_maxdepth
- 1, psz_id
);
263 static void FillTextStyle( const char *psz_attr
, const char *psz_val
,
264 text_style_t
*p_text_style
)
266 if( !strcasecmp ( "tts:fontFamily", psz_attr
) )
268 free( p_text_style
->psz_fontname
);
269 p_text_style
->psz_fontname
= strdup( psz_val
);
271 else if( !strcasecmp( "tts:opacity", psz_attr
) )
273 p_text_style
->i_background_alpha
= atoi( psz_val
);
274 p_text_style
->i_font_alpha
= atoi( psz_val
);
275 p_text_style
->i_features
|= STYLE_HAS_BACKGROUND_ALPHA
| STYLE_HAS_FONT_ALPHA
;
277 else if( !strcasecmp( "tts:color", psz_attr
) )
279 unsigned int i_color
= vlc_html_color( psz_val
, NULL
);
280 p_text_style
->i_font_color
= (i_color
& 0xffffff);
281 p_text_style
->i_font_alpha
= (i_color
& 0xFF000000) >> 24;
282 p_text_style
->i_features
|= STYLE_HAS_FONT_COLOR
| STYLE_HAS_FONT_ALPHA
;
284 else if( !strcasecmp( "tts:backgroundColor", psz_attr
) )
286 unsigned int i_color
= vlc_html_color( psz_val
, NULL
);
287 p_text_style
->i_background_color
= i_color
& 0xFFFFFF;
288 p_text_style
->i_background_alpha
= (i_color
& 0xFF000000) >> 24;
289 p_text_style
->i_features
|= STYLE_HAS_BACKGROUND_COLOR
290 | STYLE_HAS_BACKGROUND_ALPHA
;
291 p_text_style
->i_style_flags
|= STYLE_BACKGROUND
;
293 else if( !strcasecmp( "tts:fontStyle", psz_attr
) )
295 if( !strcasecmp ( "italic", psz_val
) || !strcasecmp ( "oblique", psz_val
) )
296 p_text_style
->i_style_flags
|= STYLE_ITALIC
;
298 p_text_style
->i_style_flags
&= ~STYLE_ITALIC
;
299 p_text_style
->i_features
|= STYLE_HAS_FLAGS
;
301 else if( !strcasecmp ( "tts:fontWeight", psz_attr
) )
303 if( !strcasecmp ( "bold", psz_val
) )
304 p_text_style
->i_style_flags
|= STYLE_BOLD
;
306 p_text_style
->i_style_flags
&= ~STYLE_BOLD
;
307 p_text_style
->i_features
|= STYLE_HAS_FLAGS
;
309 else if( !strcasecmp ( "tts:textDecoration", psz_attr
) )
311 if( !strcasecmp ( "underline", psz_val
) )
312 p_text_style
->i_style_flags
|= STYLE_UNDERLINE
;
313 else if( !strcasecmp ( "noUnderline", psz_val
) )
314 p_text_style
->i_style_flags
&= ~STYLE_UNDERLINE
;
315 if( !strcasecmp ( "lineThrough", psz_val
) )
316 p_text_style
->i_style_flags
|= STYLE_STRIKEOUT
;
317 else if( !strcasecmp ( "noLineThrough", psz_val
) )
318 p_text_style
->i_style_flags
&= ~STYLE_STRIKEOUT
;
319 p_text_style
->i_features
|= STYLE_HAS_FLAGS
;
321 else if( !strcasecmp( "tts:textOutline", psz_attr
) )
323 char *value
= strdup( psz_val
);
324 char* psz_saveptr
= NULL
;
325 char* token
= (value
) ? strtok_r( value
, " ", &psz_saveptr
) : NULL
;
326 // <color>? <length> <length>?
330 unsigned int color
= vlc_html_color( token
, &b_ok
);
333 p_text_style
->i_outline_color
= color
& 0xFFFFFF;
334 p_text_style
->i_outline_alpha
= (color
& 0xFF000000) >> 24;
335 token
= strtok_r( NULL
, " ", &psz_saveptr
);
338 char* psz_end
= NULL
;
339 int i_outline_width
= strtol( token
, &psz_end
, 10 );
340 if( psz_end
!= token
)
342 // Assume unit is pixel, and ignore border radius
343 p_text_style
->i_outline_width
= i_outline_width
;
352 static void FillRegionStyle( const char *psz_attr
, const char *psz_val
,
353 ttml_region_t
*p_region
)
355 if( !strcasecmp( "tts:displayAlign", psz_attr
) )
357 if( !strcasecmp ( "top", psz_val
) )
358 p_region
->updt
.align
= SUBPICTURE_ALIGN_TOP
;
359 else if( !strcasecmp ( "center", psz_val
) )
360 p_region
->updt
.align
= 0;
362 p_region
->updt
.align
= SUBPICTURE_ALIGN_BOTTOM
;
364 else if( !strcasecmp ( "tts:origin", psz_attr
) )
366 const char *psz_token
= psz_val
;
367 while( isspace( *psz_token
) )
370 const char *psz_separator
= strchr( psz_token
, ' ' );
371 if( psz_separator
== NULL
)
373 const char *psz_percent_sign
= strchr( psz_token
, '%' );
375 p_region
->updt
.origin
.x
= atoi( psz_token
);
376 if( psz_percent_sign
!= NULL
&& psz_percent_sign
< psz_separator
)
378 p_region
->updt
.origin
.x
/= 100.0;
379 p_region
->updt
.flags
|= UPDT_REGION_ORIGIN_X_IS_PERCENTILE
;
382 while( isspace( *psz_separator
) )
384 psz_token
= psz_separator
;
385 psz_percent_sign
= strchr( psz_token
, '%' );
387 p_region
->updt
.origin
.y
= atoi( psz_token
);
388 if( psz_percent_sign
!= NULL
)
390 p_region
->updt
.origin
.y
/= 100.0;
391 p_region
->updt
.flags
|= UPDT_REGION_ORIGIN_Y_IS_PERCENTILE
;
393 p_region
->updt
.align
= SUBPICTURE_ALIGN_TOP
|SUBPICTURE_ALIGN_LEFT
;
397 static void ReadTTMLExtent( const char *value
, ttml_length_t
*h
, ttml_length_t
*v
)
399 ttml_length_t vals
[2] = { { 0.0, TTML_UNIT_UNKNOWN
},
400 { 0.0, TTML_UNIT_UNKNOWN
} };
401 char *dup
= strdup( value
);
402 char* psz_saveptr
= NULL
;
403 char* token
= (dup
) ? strtok_r( dup
, " ", &psz_saveptr
) : NULL
;
404 for(int i
=0; i
<2 && token
!= NULL
; i
++)
406 token
= strtok_r( NULL
, " ", &psz_saveptr
);
408 vals
[i
] = ttml_read_length( token
);
412 if( vals
[0].unit
!= TTML_UNIT_UNKNOWN
&&
413 vals
[1].unit
!= TTML_UNIT_UNKNOWN
)
420 static void ComputeTTMLStyles( ttml_context_t
*p_ctx
, const vlc_dictionary_t
*p_dict
,
421 ttml_style_t
*p_ttml_style
)
424 /* Values depending on multiple others are converted last
425 * Default value conversion must also not depend on attribute presence */
426 text_style_t
*p_text_style
= p_ttml_style
->font_style
;
427 ttml_length_t len
= p_ttml_style
->font_size
;
428 len
= ttml_rebase_length( len
, p_ctx
->root_extent_h
,
429 p_ctx
->i_cell_resolution_v
);
430 if( len
.unit
== TTML_UNIT_CELL
)
431 p_text_style
->f_font_relsize
= 100.0 * len
.i_value
/
432 (p_ctx
->i_cell_resolution_v
/ TTML_LINE_TO_HEIGHT_RATIO
);
433 else if( len
.unit
== TTML_UNIT_PERCENT
)
434 p_text_style
->f_font_relsize
= len
.i_value
;
435 else if( len
.unit
== TTML_UNIT_PIXELS
)
436 p_text_style
->i_font_size
= (int)( len
.i_value
+ 0.5 );
439 static void FillTTMLStyle( const char *psz_attr
, const char *psz_val
,
440 ttml_style_t
*p_ttml_style
)
442 if( !strcasecmp( "tts:extent", psz_attr
) )
444 ReadTTMLExtent( psz_attr
, &p_ttml_style
->extent_h
,
445 &p_ttml_style
->extent_v
);
447 else if( !strcasecmp( "tts:textAlign", psz_attr
) )
449 if( !strcasecmp ( "left", psz_val
) )
450 p_ttml_style
->i_text_align
= SUBPICTURE_ALIGN_LEFT
;
451 else if( !strcasecmp ( "right", psz_val
) )
452 p_ttml_style
->i_text_align
= SUBPICTURE_ALIGN_RIGHT
;
453 else if( !strcasecmp ( "center", psz_val
) )
454 p_ttml_style
->i_text_align
= 0;
455 else if( !strcasecmp ( "start", psz_val
) ) /* FIXME: should be BIDI based */
456 p_ttml_style
->i_text_align
= SUBPICTURE_ALIGN_LEFT
;
457 else if( !strcasecmp ( "end", psz_val
) ) /* FIXME: should be BIDI based */
458 p_ttml_style
->i_text_align
= SUBPICTURE_ALIGN_RIGHT
;
460 else if( !strcasecmp( "tts:fontSize", psz_attr
) )
462 ttml_length_t len
= ttml_read_length( psz_val
);
463 if( len
.unit
!= TTML_UNIT_UNKNOWN
&& len
.i_value
> 0.0 )
464 p_ttml_style
->font_size
= len
;
466 else if( !strcasecmp( "tts:direction", psz_attr
) )
468 if( !strcasecmp( "rtl", psz_val
) )
470 p_ttml_style
->i_direction
|= UNICODE_BIDI_RTL
;
471 p_ttml_style
->b_direction_set
= true;
473 else if( !strcasecmp( "ltr", psz_val
) )
475 p_ttml_style
->i_direction
|= UNICODE_BIDI_LTR
;
476 p_ttml_style
->b_direction_set
= true;
479 else if( !strcasecmp( "tts:unicodeBidi", psz_attr
) )
481 if( !strcasecmp( "bidiOverride", psz_val
) )
482 p_ttml_style
->i_direction
|= UNICODE_BIDI_OVERRIDE
& ~UNICODE_BIDI_EMBEDDED
;
483 else if( !strcasecmp( "embed", psz_val
) )
484 p_ttml_style
->i_direction
|= UNICODE_BIDI_EMBEDDED
& ~UNICODE_BIDI_OVERRIDE
;
486 else if( !strcasecmp( "tts:writingMode", psz_attr
) )
488 if( !strcasecmp( "rl", psz_val
) || !strcasecmp( "rltb", psz_val
) )
490 p_ttml_style
->i_direction
= UNICODE_BIDI_RTL
| UNICODE_BIDI_OVERRIDE
;
491 //p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
492 p_ttml_style
->b_direction_set
= true;
494 else if( !strcasecmp( "lr", psz_val
) || !strcasecmp( "lrtb", psz_val
) )
496 p_ttml_style
->i_direction
= UNICODE_BIDI_LTR
| UNICODE_BIDI_OVERRIDE
;
497 //p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
498 p_ttml_style
->b_direction_set
= true;
501 else if( !strcmp( "tts:display", psz_attr
) )
503 if( !strcmp( "none", psz_val
) )
504 p_ttml_style
->display
= TTML_DISPLAY_NONE
;
506 p_ttml_style
->display
= TTML_DISPLAY_AUTO
;
508 else if( !strcasecmp( "xml:space", psz_attr
) )
510 p_ttml_style
->b_preserve_space
= !strcmp( "preserve", psz_val
);
512 else FillTextStyle( psz_attr
, psz_val
, p_ttml_style
->font_style
);
515 static void DictionaryMerge( const vlc_dictionary_t
*p_src
, vlc_dictionary_t
*p_dst
)
517 for( int i
= 0; i
< p_src
->i_size
; ++i
)
519 for ( const vlc_dictionary_entry_t
* p_entry
= p_src
->p_entries
[i
];
520 p_entry
!= NULL
; p_entry
= p_entry
->p_next
)
522 if( ( !strncmp( "tts:", p_entry
->psz_key
, 4 ) ||
523 !strncmp( "ttp:", p_entry
->psz_key
, 4 ) ||
524 !strcmp( "xml:space", p_entry
->psz_key
) ) &&
525 !vlc_dictionary_has_key( p_dst
, p_entry
->psz_key
) )
526 vlc_dictionary_insert( p_dst
, p_entry
->psz_key
, p_entry
->p_value
);
531 static void DictMergeWithStyleID( ttml_context_t
*p_ctx
, const char *psz_id
,
532 vlc_dictionary_t
*p_dst
)
534 assert(p_ctx
->p_rootnode
);
535 if( psz_id
&& p_ctx
->p_rootnode
)
537 /* Lookup referenced style ID */
538 const tt_node_t
*p_node
= FindNode( p_ctx
->p_rootnode
,
539 "style", -1, psz_id
);
541 DictionaryMerge( &p_node
->attr_dict
, p_dst
);
545 static void DictMergeWithRegionID( ttml_context_t
*p_ctx
, const char *psz_id
,
546 vlc_dictionary_t
*p_dst
)
548 assert(p_ctx
->p_rootnode
);
549 if( psz_id
&& p_ctx
->p_rootnode
)
551 const tt_node_t
*p_regionnode
= FindNode( p_ctx
->p_rootnode
,
552 "region", -1, psz_id
);
556 DictionaryMerge( &p_regionnode
->attr_dict
, p_dst
);
558 const char *psz_styleid
= (const char *)
559 vlc_dictionary_value_for_key( &p_regionnode
->attr_dict
, "style" );
561 DictMergeWithStyleID( p_ctx
, psz_styleid
, p_dst
);
563 for( const tt_basenode_t
*p_child
= p_regionnode
->p_child
;
564 p_child
; p_child
= p_child
->p_next
)
566 if( unlikely( p_child
->i_type
== TT_NODE_TYPE_TEXT
) )
569 const tt_node_t
*p_node
= (const tt_node_t
*) p_child
;
570 if( !tt_node_NameCompare( p_node
->psz_node_name
, "style" ) )
572 DictionaryMerge( &p_node
->attr_dict
, p_dst
);
578 static void DictToTTMLStyle( ttml_context_t
*p_ctx
, const vlc_dictionary_t
*p_dict
,
579 ttml_style_t
*p_ttml_style
)
581 for( int i
= 0; i
< p_dict
->i_size
; ++i
)
583 for ( vlc_dictionary_entry_t
* p_entry
= p_dict
->p_entries
[i
];
584 p_entry
!= NULL
; p_entry
= p_entry
->p_next
)
586 FillTTMLStyle( p_entry
->psz_key
, p_entry
->p_value
, p_ttml_style
);
589 ComputeTTMLStyles( p_ctx
, p_dict
, p_ttml_style
);
592 static ttml_style_t
* InheritTTMLStyles( ttml_context_t
*p_ctx
, tt_node_t
*p_node
)
595 ttml_style_t
*p_ttml_style
= NULL
;
596 vlc_dictionary_t merged
;
597 vlc_dictionary_init( &merged
, 0 );
599 /* Merge dics backwards without overwriting */
600 for( ; p_node
; p_node
= p_node
->p_parent
)
602 DictionaryMerge( &p_node
->attr_dict
, &merged
);
604 const char *psz_styleid
= (const char *)
605 vlc_dictionary_value_for_key( &p_node
->attr_dict
, "style" );
607 DictMergeWithStyleID( p_ctx
, psz_styleid
, &merged
);
609 const char *psz_regionid
= (const char *)
610 vlc_dictionary_value_for_key( &p_node
->attr_dict
, "region" );
612 DictMergeWithRegionID( p_ctx
, psz_regionid
, &merged
);
615 if( !vlc_dictionary_is_empty( &merged
) && (p_ttml_style
= ttml_style_New()) )
617 DictToTTMLStyle( p_ctx
, &merged
, p_ttml_style
);
620 vlc_dictionary_clear( &merged
, NULL
, NULL
);
625 static int ParseTTMLChunk( xml_reader_t
*p_reader
, tt_node_t
**pp_rootnode
)
627 const char* psz_node_name
;
631 int i_type
= xml_ReaderNextNode( p_reader
, &psz_node_name
);
633 if( i_type
<= XML_READER_NONE
)
641 case XML_READER_STARTELEM
:
642 if( tt_node_NameCompare( psz_node_name
, "tt" ) ||
643 *pp_rootnode
!= NULL
)
646 *pp_rootnode
= tt_node_New( p_reader
, NULL
, psz_node_name
);
648 tt_nodes_Read( p_reader
, *pp_rootnode
) != VLC_SUCCESS
)
652 case XML_READER_ENDELEM
:
654 tt_node_NameCompare( psz_node_name
, (*pp_rootnode
)->psz_node_name
) )
661 if( *pp_rootnode
== NULL
)
667 static void BIDIConvert( text_segment_t
*p_segment
, int i_direction
)
670 * For bidirectionnal support, we use different enum
671 * to recognize different cases, en then we add the
672 * corresponding unicode character to the text of
677 const char* psz_uni_start
;
678 const char* psz_uni_end
;
680 { "\u2066", "\u2069" },
681 { "\u2067", "\u2069" },
682 { "\u202A", "\u202C" },
683 { "\u202B", "\u202C" },
684 { "\u202D", "\u202C" },
685 { "\u202E", "\u202C" },
688 if( unlikely((size_t)i_direction
>= ARRAY_SIZE(p_bidi
)) )
691 char *psz_text
= NULL
;
692 if( asprintf( &psz_text
, "%s%s%s", p_bidi
[i_direction
].psz_uni_start
,
693 p_segment
->psz_text
, p_bidi
[i_direction
].psz_uni_end
) < 0 )
695 free( p_segment
->psz_text
);
696 p_segment
->psz_text
= psz_text
;
700 static void StripSpacing( text_segment_t
*p_segment
)
702 /* Newlines must be replaced */
703 char *p
= p_segment
->psz_text
;
704 while( (p
= strchr( p
, '\n' )) )
708 static ttml_region_t
*GetTTMLRegion( ttml_context_t
*p_ctx
, const char *psz_region_id
)
710 ttml_region_t
*p_region
= ( ttml_region_t
* )
711 vlc_dictionary_value_for_key( &p_ctx
->regions
, psz_region_id
? psz_region_id
: "" );
712 if( p_region
== NULL
)
714 if( psz_region_id
&& strcmp( psz_region_id
, "" ) ) /* not default region */
716 /* Create region if if missing */
718 vlc_dictionary_t merged
;
719 vlc_dictionary_init( &merged
, 0 );
720 /* Get all attributes, including region > style */
721 DictMergeWithRegionID( p_ctx
, psz_region_id
, &merged
);
722 if( (p_region
= ttml_region_New()) )
724 /* Fill from its own attributes */
725 for( int i
= 0; i
< merged
.i_size
; ++i
)
727 for ( vlc_dictionary_entry_t
* p_entry
= merged
.p_entries
[i
];
728 p_entry
!= NULL
; p_entry
= p_entry
->p_next
)
730 FillRegionStyle( p_entry
->psz_key
, p_entry
->p_value
, p_region
);
734 vlc_dictionary_clear( &merged
, NULL
, NULL
);
736 vlc_dictionary_insert( &p_ctx
->regions
, psz_region_id
, p_region
);
738 else if( (p_region
= ttml_region_New()) ) /* create default */
740 vlc_dictionary_insert( &p_ctx
->regions
, "", p_region
);
746 static void AppendLineBreakToRegion( ttml_region_t
*p_region
)
748 text_segment_t
*p_segment
= text_segment_New( "\n" );
751 *p_region
->pp_last_segment
= p_segment
;
752 p_region
->pp_last_segment
= &p_segment
->p_next
;
756 static void AppendTextToRegion( ttml_context_t
*p_ctx
, const tt_textnode_t
*p_ttnode
,
757 const ttml_style_t
*p_set_styles
, ttml_region_t
*p_region
)
759 text_segment_t
*p_segment
;
761 if( p_region
== NULL
)
764 p_segment
= text_segment_New( p_ttnode
->psz_text
);
767 bool b_preserve_space
= false;
768 ttml_style_t
*s
= InheritTTMLStyles( p_ctx
, p_ttnode
->p_parent
);
772 ttml_style_Merge( p_set_styles
, s
);
774 p_segment
->style
= s
->font_style
;
775 s
->font_style
= NULL
;
777 b_preserve_space
= s
->b_preserve_space
;
778 if( s
->b_direction_set
)
779 BIDIConvert( p_segment
, s
->i_direction
);
781 if( s
->display
== TTML_DISPLAY_NONE
)
783 /* Must not display, but still occupies space */
784 p_segment
->style
->i_features
&= ~(STYLE_BACKGROUND
|STYLE_OUTLINE
|STYLE_STRIKEOUT
|STYLE_SHADOW
);
785 p_segment
->style
->i_font_alpha
= STYLE_ALPHA_TRANSPARENT
;
786 p_segment
->style
->i_features
|= STYLE_HAS_FONT_ALPHA
;
789 ttml_style_Delete( s
);
792 if( !b_preserve_space
)
793 StripSpacing( p_segment
);
796 *p_region
->pp_last_segment
= p_segment
;
797 p_region
->pp_last_segment
= &p_segment
->p_next
;
800 static void ConvertNodesToRegionContent( ttml_context_t
*p_ctx
, const tt_node_t
*p_node
,
801 ttml_region_t
*p_region
,
802 const ttml_style_t
*p_upper_set_styles
,
803 tt_time_t playbacktime
)
805 if( tt_time_Valid( &playbacktime
) &&
806 !tt_timings_Contains( &p_node
->timings
, &playbacktime
) )
809 const char *psz_regionid
= (const char *)
810 vlc_dictionary_value_for_key( &p_node
->attr_dict
, "region" );
812 /* Region isn't set or is changing */
813 if( psz_regionid
|| p_region
== NULL
)
814 p_region
= GetTTMLRegion( p_ctx
, psz_regionid
);
816 /* awkward paragraph handling */
817 if( !tt_node_NameCompare( p_node
->psz_node_name
, "p" ) &&
818 p_region
->updt
.p_segments
)
820 AppendLineBreakToRegion( p_region
);
823 /* Styles from <set> element */
824 ttml_style_t
*p_set_styles
= (p_upper_set_styles
)
825 ? ttml_style_Duplicate( p_upper_set_styles
)
828 for( const tt_basenode_t
*p_child
= p_node
->p_child
;
829 p_child
; p_child
= p_child
->p_next
)
831 if( p_child
->i_type
== TT_NODE_TYPE_TEXT
)
833 AppendTextToRegion( p_ctx
, (const tt_textnode_t
*) p_child
,
834 p_set_styles
, p_region
);
836 else if( !tt_node_NameCompare( ((const tt_node_t
*)p_child
)->psz_node_name
, "set" ) )
838 const tt_node_t
*p_set
= (const tt_node_t
*)p_child
;
839 if( !tt_time_Valid( &playbacktime
) ||
840 tt_timings_Contains( &p_set
->timings
, &playbacktime
) )
842 if( p_set_styles
!= NULL
|| (p_set_styles
= ttml_style_New()) )
844 /* Merge with or create a local set of styles to apply to following childs */
845 DictToTTMLStyle( p_ctx
, &p_set
->attr_dict
, p_set_styles
);
849 else if( !tt_node_NameCompare( ((const tt_node_t
*)p_child
)->psz_node_name
, "br" ) )
851 AppendLineBreakToRegion( p_region
);
855 ConvertNodesToRegionContent( p_ctx
, (const tt_node_t
*) p_child
,
856 p_region
, p_set_styles
, playbacktime
);
861 ttml_style_Delete( p_set_styles
);
864 static tt_node_t
*ParseTTML( decoder_t
*p_dec
, const uint8_t *p_buffer
, size_t i_buffer
)
867 xml_reader_t
* p_xml_reader
;
869 p_sub
= vlc_stream_MemoryNew( p_dec
, (uint8_t*) p_buffer
, i_buffer
, true );
870 if( unlikely( p_sub
== NULL
) )
873 p_xml_reader
= xml_ReaderCreate( p_dec
, p_sub
);
874 if( unlikely( p_xml_reader
== NULL
) )
876 vlc_stream_Delete( p_sub
);
880 tt_node_t
*p_rootnode
= NULL
;
881 if( ParseTTMLChunk( p_xml_reader
, &p_rootnode
) != VLC_SUCCESS
)
884 tt_node_RecursiveDelete( p_rootnode
);
888 xml_ReaderDelete( p_xml_reader
);
889 vlc_stream_Delete( p_sub
);
894 static void InitTTMLContext( tt_node_t
*p_rootnode
, ttml_context_t
*p_ctx
)
896 p_ctx
->p_rootnode
= p_rootnode
;
897 /* set defaults required for size/cells computation */
898 p_ctx
->root_extent_h
.i_value
= 100;
899 p_ctx
->root_extent_h
.unit
= TTML_UNIT_PERCENT
;
900 p_ctx
->root_extent_v
.i_value
= 100;
901 p_ctx
->root_extent_v
.unit
= TTML_UNIT_PERCENT
;
902 p_ctx
->i_cell_resolution_v
= TTML_DEFAULT_CELL_RESOLUTION_V
;
903 /* and override them */
904 const char *value
= vlc_dictionary_value_for_key( &p_rootnode
->attr_dict
,
906 if( value
!= kVLCDictionaryNotFound
)
908 ReadTTMLExtent( value
, &p_ctx
->root_extent_h
,
909 &p_ctx
->root_extent_v
);
911 value
= vlc_dictionary_value_for_key( &p_rootnode
->attr_dict
,
912 "ttp:cellResolution" );
913 if( value
!= kVLCDictionaryNotFound
)
916 if( sscanf( value
, "%u %u", &w
, &h
) == 2 && w
&& h
)
917 p_ctx
->i_cell_resolution_v
= h
;
921 static ttml_region_t
*GenerateRegions( tt_node_t
*p_rootnode
, tt_time_t playbacktime
)
923 ttml_region_t
* p_regions
= NULL
;
924 ttml_region_t
** pp_region_last
= &p_regions
;
926 if( !tt_node_NameCompare( p_rootnode
->psz_node_name
, "tt" ) )
928 const tt_node_t
*p_bodynode
= FindNode( p_rootnode
, "body", 1, NULL
);
931 ttml_context_t context
;
932 InitTTMLContext( p_rootnode
, &context
);
933 context
.p_rootnode
= p_rootnode
;
935 vlc_dictionary_init( &context
.regions
, 1 );
936 ConvertNodesToRegionContent( &context
, p_bodynode
, NULL
, NULL
, playbacktime
);
938 for( int i
= 0; i
< context
.regions
.i_size
; ++i
)
940 for ( const vlc_dictionary_entry_t
* p_entry
= context
.regions
.p_entries
[i
];
941 p_entry
!= NULL
; p_entry
= p_entry
->p_next
)
943 *pp_region_last
= (ttml_region_t
*) p_entry
->p_value
;
944 pp_region_last
= (ttml_region_t
**) &(*pp_region_last
)->updt
.p_next
;
948 vlc_dictionary_clear( &context
.regions
, NULL
, NULL
);
951 else if ( !tt_node_NameCompare( p_rootnode
->psz_node_name
, "div" ) ||
952 !tt_node_NameCompare( p_rootnode
->psz_node_name
, "p" ) )
960 static int ParseBlock( decoder_t
*p_dec
, const block_t
*p_block
)
962 tt_time_t
*p_timings_array
= NULL
;
963 size_t i_timings_count
= 0;
965 /* We Only support absolute timings */
966 tt_timings_t temporal_extent
;
967 temporal_extent
.i_type
= TT_TIMINGS_PARALLEL
;
968 tt_time_Init( &temporal_extent
.begin
);
969 tt_time_Init( &temporal_extent
.end
);
970 tt_time_Init( &temporal_extent
.dur
);
971 temporal_extent
.begin
.base
= 0;
973 if( p_block
->i_flags
& BLOCK_FLAG_CORRUPTED
)
974 return VLCDEC_SUCCESS
;
976 /* We cannot display a subpicture with no date */
977 if( p_block
->i_pts
<= VLC_TS_INVALID
)
979 msg_Warn( p_dec
, "subtitle without a date" );
980 return VLCDEC_SUCCESS
;
983 tt_node_t
*p_rootnode
= ParseTTML( p_dec
, p_block
->p_buffer
, p_block
->i_buffer
);
985 return VLCDEC_SUCCESS
;
987 tt_timings_Resolve( (tt_basenode_t
*) p_rootnode
, &temporal_extent
,
988 &p_timings_array
, &i_timings_count
);
991 for( size_t i
=0; i
<i_timings_count
; i
++ )
992 printf("%ld ", tt_time_Convert( &p_timings_array
[i
] ) );
996 for( size_t i
=0; i
+1 < i_timings_count
; i
++ )
998 /* We Only support absolute timings (2) */
999 if( tt_time_Convert( &p_timings_array
[i
] ) + VLC_TS_0
< p_block
->i_dts
)
1002 if( tt_time_Convert( &p_timings_array
[i
] ) + VLC_TS_0
> p_block
->i_dts
+ p_block
->i_length
)
1005 subpicture_t
*p_spu
= NULL
;
1006 ttml_region_t
*p_regions
= GenerateRegions( p_rootnode
, p_timings_array
[i
] );
1007 if( p_regions
&& ( p_spu
= decoder_NewSubpictureText( p_dec
) ) )
1009 p_spu
->i_start
= VLC_TS_0
+ tt_time_Convert( &p_timings_array
[i
] );
1010 p_spu
->i_stop
= VLC_TS_0
+ tt_time_Convert( &p_timings_array
[i
+1] ) - 1;
1011 p_spu
->b_ephemer
= true;
1012 p_spu
->b_absolute
= false;
1014 subpicture_updater_sys_t
*p_spu_sys
= p_spu
->updater
.p_sys
;
1015 subpicture_updater_sys_region_t
*p_updtregion
= NULL
;
1017 /* Create region update info from each ttml region */
1018 for( ttml_region_t
*p_region
= p_regions
;
1019 p_region
; p_region
= (ttml_region_t
*) p_region
->updt
.p_next
)
1021 if( p_updtregion
== NULL
)
1023 p_updtregion
= &p_spu_sys
->region
;
1027 p_updtregion
= SubpictureUpdaterSysRegionNew();
1028 if( p_updtregion
== NULL
)
1030 SubpictureUpdaterSysRegionAdd( &p_spu_sys
->region
, p_updtregion
);
1033 /* broken legacy align var (can't handle center...) */
1034 if( p_dec
->p_sys
->i_align
& SUBPICTURE_ALIGN_MASK
)
1036 p_spu_sys
->region
.align
= p_dec
->p_sys
->i_align
& (SUBPICTURE_ALIGN_BOTTOM
|SUBPICTURE_ALIGN_TOP
);
1037 p_spu_sys
->region
.inner_align
= p_dec
->p_sys
->i_align
& (SUBPICTURE_ALIGN_LEFT
|SUBPICTURE_ALIGN_RIGHT
);
1040 /* copy and take ownership of pointeds */
1041 *p_updtregion
= p_region
->updt
;
1042 p_updtregion
->p_next
= NULL
;
1043 p_region
->updt
.p_region_style
= NULL
;
1044 p_region
->updt
.p_segments
= NULL
;
1052 ttml_region_t
*p_nextregion
= (ttml_region_t
*) p_regions
->updt
.p_next
;
1053 ttml_region_Delete( p_regions
);
1054 p_regions
= p_nextregion
;
1058 decoder_QueueSub( p_dec
, p_spu
);
1061 tt_node_RecursiveDelete( p_rootnode
);
1063 free( p_timings_array
);
1065 return VLCDEC_SUCCESS
;
1070 /****************************************************************************
1071 * DecodeBlock: the whole thing
1072 ****************************************************************************/
1073 static int DecodeBlock( decoder_t
*p_dec
, block_t
*p_block
)
1075 if( p_block
== NULL
) /* No Drain */
1076 return VLCDEC_SUCCESS
;
1078 int ret
= ParseBlock( p_dec
, p_block
);
1080 if( p_block
->i_buffer
)
1082 p_block
->p_buffer
[p_block
->i_buffer
- 1] = 0;
1083 msg_Dbg(p_dec
,"time %ld %s", p_block
->i_dts
, p_block
->p_buffer
);
1086 block_Release( p_block
);
1090 /*****************************************************************************
1091 * OpenDecoder: probe the decoder and return score
1092 *****************************************************************************/
1093 int OpenDecoder( vlc_object_t
*p_this
)
1095 decoder_t
*p_dec
= (decoder_t
*)p_this
;
1096 decoder_sys_t
*p_sys
;
1098 if( p_dec
->fmt_in
.i_codec
!= VLC_CODEC_TTML
)
1099 return VLC_EGENERIC
;
1101 /* Allocate the memory needed to store the decoder's structure */
1102 p_dec
->p_sys
= p_sys
= calloc( 1, sizeof( *p_sys
) );
1103 if( unlikely( p_sys
== NULL
) )
1106 p_dec
->pf_decode
= DecodeBlock
;
1107 p_sys
->i_align
= var_InheritInteger( p_dec
, "ttml-align" );
1112 /*****************************************************************************
1113 * CloseDecoder: clean up the decoder
1114 *****************************************************************************/
1115 void CloseDecoder( vlc_object_t
*p_this
)
1117 decoder_t
*p_dec
= (decoder_t
*)p_this
;
1118 decoder_sys_t
*p_sys
= p_dec
->p_sys
;