1 /*****************************************************************************
2 * substtml.c : TTML subtitles decoder
3 *****************************************************************************
4 * Copyright (C) 2015 VLC authors and VideoLAN
6 * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr>
7 * Sushma Reddy <sushma.reddy@research.iiit.ac.in>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #include <vlc_common.h>
29 #include <vlc_plugin.h>
30 #include <vlc_modules.h>
31 #include <vlc_codec.h>
33 #include <vlc_stream.h>
34 #include <vlc_text_style.h>
35 #include <vlc_charset.h>
41 #define ALIGN_TEXT N_("Subtitle justification")
42 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
44 /*****************************************************************************
46 *****************************************************************************/
47 static int OpenDecoder ( vlc_object_t
* );
48 static void CloseDecoder ( vlc_object_t
* );
50 static text_segment_t
*ParseTTMLSubtitles( decoder_t
*, subpicture_updater_sys_t
*, char * );
53 set_capability( "decoder", 10 )
54 set_shortname( N_("TTML decoder"))
55 set_description( N_("TTML subtitles decoder") )
56 set_callbacks( OpenDecoder
, CloseDecoder
)
57 set_category( CAT_INPUT
)
58 set_subcategory( SUBCAT_INPUT_SCODEC
)
59 add_integer( "ttml-align", 0, ALIGN_TEXT
, ALIGN_LONGTEXT
, false )
62 /*****************************************************************************
64 *****************************************************************************/
69 text_style_t
* font_style
;
73 int i_margin_percent_h
;
74 int i_margin_percent_v
;
82 ttml_style_t
** pp_styles
;
90 UNICODE_BIDI_EMBEDDED
= 2,
91 UNICODE_BIDI_OVERRIDE
= 4,
94 static int tagnamecmp( char const* tagname
, char const* needle
)
96 if( !strncasecmp( "tt:", tagname
, 3 ) )
99 return strcasecmp( tagname
, needle
);
102 static void MergeTTMLStyle( ttml_style_t
*p_dst
, const ttml_style_t
*p_src
)
104 text_style_Merge( p_dst
->font_style
, p_src
->font_style
, false );
105 if( !( p_dst
->i_align
& SUBPICTURE_ALIGN_MASK
) )
106 p_dst
->i_align
|= p_src
->i_align
;
108 if( !p_dst
->i_margin_h
)
109 p_dst
->i_margin_h
= p_src
->i_margin_h
;
111 if( !p_dst
->i_margin_v
)
112 p_dst
->i_margin_v
= p_src
->i_margin_v
;
114 if( !p_dst
->i_margin_percent_h
)
115 p_dst
->i_margin_percent_h
= p_src
->i_margin_percent_h
;
117 if( !p_dst
->i_margin_percent_v
)
118 p_dst
->i_margin_percent_v
= p_src
->i_margin_percent_v
;
120 if( !p_dst
->b_direction_set
)
122 p_dst
->i_direction
= p_src
->i_direction
;
123 p_dst
->b_direction_set
= p_src
->b_direction_set
;
127 static ttml_style_t
* DuplicateStyle( ttml_style_t
* p_style_src
)
129 ttml_style_t
* p_style
= calloc( 1, sizeof( *p_style
) );
130 if( unlikely( p_style
== NULL
) )
133 *p_style
= *p_style_src
;
134 p_style
->psz_styleid
= strdup( p_style_src
->psz_styleid
);
135 if( unlikely( p_style
->psz_styleid
== NULL
) )
141 p_style
->font_style
= text_style_Duplicate( p_style_src
->font_style
);
142 if( unlikely( p_style
->font_style
== NULL
) )
144 free( p_style
->psz_styleid
);
151 static void CleanupStyle( ttml_style_t
* p_ttml_style
)
153 text_style_Delete( p_ttml_style
->font_style
);
154 free( p_ttml_style
->psz_styleid
);
155 free( p_ttml_style
);
158 static ttml_style_t
*FindTextStyle( decoder_t
*p_dec
, const char *psz_style
)
160 decoder_sys_t
*p_sys
= p_dec
->p_sys
;
162 for( size_t i
= 0; i
< p_sys
->i_styles
; i
++ )
164 if( !strcmp( p_sys
->pp_styles
[i
]->psz_styleid
, psz_style
) )
165 return DuplicateStyle( p_sys
->pp_styles
[i
] );
171 typedef struct style_stack_t
173 ttml_style_t
* p_style
;
174 struct style_stack_t
* p_next
;
177 static bool PushStyle( style_stack_t
**pp_stack
, ttml_style_t
* p_style
)
179 style_stack_t
* p_entry
= malloc( sizeof( *p_entry
) );
180 if( unlikely( p_entry
== NULL
) )
182 p_entry
->p_style
= p_style
;
183 p_entry
->p_next
= *pp_stack
;
188 static void PopStyle( style_stack_t
** pp_stack
)
190 if( *pp_stack
== NULL
)
192 style_stack_t
* p_next
= (*pp_stack
)->p_next
;
193 CleanupStyle( (*pp_stack
)->p_style
);
198 static void ClearStack( style_stack_t
* p_stack
)
200 while( p_stack
!= NULL
)
202 style_stack_t
* p_next
= p_stack
->p_next
;
203 CleanupStyle( p_stack
->p_style
);
209 static text_style_t
* CurrentStyle( style_stack_t
* p_stack
)
211 if( p_stack
== NULL
)
212 return text_style_Create( STYLE_NO_DEFAULTS
);
214 return text_style_Duplicate( p_stack
->p_style
->font_style
);
217 static ttml_style_t
* ParseTTMLStyle( decoder_t
*p_dec
, xml_reader_t
* p_reader
, const char* psz_node_name
)
219 decoder_sys_t
* p_sys
= p_dec
->p_sys
;
220 ttml_style_t
*p_ttml_style
= NULL
;
221 ttml_style_t
*p_base_style
= NULL
;
223 p_ttml_style
= calloc( 1, sizeof( ttml_style_t
) );
224 if( unlikely( !p_ttml_style
) )
227 p_ttml_style
->font_style
= text_style_Create( STYLE_NO_DEFAULTS
);
228 if( unlikely( !p_ttml_style
->font_style
) )
230 free( p_ttml_style
);
234 const char *attr
, *val
;
236 while( (attr
= xml_ReaderNextAttr( p_reader
, &val
) ) )
238 /* searching previous styles for inheritence */
239 if( !strcasecmp( attr
, "style" ) || !strcasecmp( attr
, "region" ) )
241 if( !tagnamecmp( psz_node_name
, "style" ) || !tagnamecmp( psz_node_name
, "region" ) )
243 for( size_t i
= 0; i
< p_sys
->i_styles
; i
++ )
245 if( !strcasecmp( p_sys
->pp_styles
[i
]->psz_styleid
, val
) )
247 p_base_style
= p_sys
->pp_styles
[i
];
253 * In p nodes, style attribute has this format :
254 * style="style1 style2 style3" where style1 and style2 are
255 * style applied on the parents of p in that order.
257 * In span node, we can apply several styles in the same order than
258 * in p nodes with the same inheritance order.
260 * In order to preserve this style predominance, we merge the styles
261 * in the from right to left ( the right one being predominant ) .
263 else if( !tagnamecmp( psz_node_name
, "p" ) || !tagnamecmp( psz_node_name
, "span" ) )
266 char *value
= strdup( val
);
267 if( unlikely( value
== NULL
) )
269 CleanupStyle( p_ttml_style
);
273 char *token
= strtok_r( value
, " ", &tmp
);
277 msg_Warn( p_dec
, "No IDREF specified in attribute "
278 "'%s' on tag '%s', ignoring.", attr
,
284 ttml_style_t
* p_style
= FindTextStyle( p_dec
, token
);
285 if( p_style
== NULL
)
287 msg_Warn( p_dec
, "IDREF '%s' in '%s' not found", token
, attr
);
292 while( ( token
= strtok_r( NULL
, " ", &tmp
) ) != NULL
)
294 ttml_style_t
* p_next_style
= FindTextStyle( p_dec
, token
);
295 if( p_next_style
== NULL
)
297 msg_Warn( p_dec
, "IDREF '%s' in '%s' not found", token
, attr
);
300 MergeTTMLStyle( p_next_style
, p_style
);
301 CleanupStyle( p_style
);
302 p_style
= p_next_style
;
304 MergeTTMLStyle( p_style
, p_ttml_style
);
306 CleanupStyle( p_ttml_style
);
307 p_ttml_style
= p_style
;
311 ttml_style_t
* p_style
= FindTextStyle( p_dec
, val
);
312 if( p_style
== NULL
)
314 msg_Warn( p_dec
, "IDREF '%s' in '%s' not found", val
, attr
);
317 MergeTTMLStyle( p_style
, p_ttml_style
);
318 CleanupStyle( p_ttml_style
);
319 p_ttml_style
= p_style
;
322 else if( !strcasecmp( "xml:id", attr
) )
324 free( p_ttml_style
->psz_styleid
);
325 p_ttml_style
->psz_styleid
= strdup( val
);
327 else if( !strcasecmp ( "tts:fontFamily", attr
) )
329 free( p_ttml_style
->font_style
->psz_fontname
);
330 p_ttml_style
->font_style
->psz_fontname
= strdup( val
);
331 if( unlikely( p_ttml_style
->font_style
->psz_fontname
== NULL
) )
333 CleanupStyle( p_ttml_style
);
337 else if( !strcasecmp( "tts:opacity", attr
) )
339 p_ttml_style
->font_style
->i_background_alpha
= atoi( val
);
340 p_ttml_style
->font_style
->i_font_alpha
= atoi( val
);
341 p_ttml_style
->font_style
->i_features
|= STYLE_HAS_BACKGROUND_ALPHA
| STYLE_HAS_FONT_ALPHA
;
343 else if( !strcasecmp( "tts:fontSize", attr
) )
345 char* psz_end
= NULL
;
346 float size
= us_strtof( val
, &psz_end
);
347 if( *psz_end
== '%' )
348 p_ttml_style
->font_style
->f_font_relsize
= size
;
350 p_ttml_style
->font_style
->i_font_size
= (int)( size
+ 0.5 );
352 else if( !strcasecmp( "tts:color", attr
) )
354 unsigned int i_color
= vlc_html_color( val
, NULL
);
355 p_ttml_style
->font_style
->i_font_color
= (i_color
& 0xffffff);
356 p_ttml_style
->font_style
->i_font_alpha
= (i_color
& 0xFF000000) >> 24;
357 p_ttml_style
->font_style
->i_features
|= STYLE_HAS_FONT_COLOR
| STYLE_HAS_FONT_ALPHA
;
359 else if( !strcasecmp( "tts:backgroundColor", attr
) )
361 unsigned int i_color
= vlc_html_color( val
, NULL
);
362 p_ttml_style
->font_style
->i_background_color
= i_color
& 0xFFFFFF;
363 p_ttml_style
->font_style
->i_background_alpha
= (i_color
& 0xFF000000) >> 24;
364 p_ttml_style
->font_style
->i_features
|= STYLE_HAS_BACKGROUND_COLOR
365 | STYLE_HAS_BACKGROUND_ALPHA
;
366 p_ttml_style
->font_style
->i_style_flags
|= STYLE_BACKGROUND
;
368 else if( !strcasecmp( "tts:textAlign", attr
) )
370 if( !strcasecmp ( "left", val
) )
371 p_ttml_style
->i_align
= SUBPICTURE_ALIGN_TOP
| SUBPICTURE_ALIGN_LEFT
;
372 else if( !strcasecmp ( "right", val
) )
373 p_ttml_style
->i_align
= SUBPICTURE_ALIGN_TOP
| SUBPICTURE_ALIGN_RIGHT
;
374 else if( !strcasecmp ( "center", val
) )
375 p_ttml_style
->i_align
= SUBPICTURE_ALIGN_BOTTOM
;
376 else if( !strcasecmp ( "start", val
) )
377 p_ttml_style
->i_align
= SUBPICTURE_ALIGN_BOTTOM
| SUBPICTURE_ALIGN_LEFT
;
378 else if( !strcasecmp ( "end", val
) )
379 p_ttml_style
->i_align
= SUBPICTURE_ALIGN_BOTTOM
| SUBPICTURE_ALIGN_RIGHT
;
381 else if( !strcasecmp( "tts:fontStyle", attr
) )
383 if( !strcasecmp ( "italic", val
) || !strcasecmp ( "oblique", val
) )
384 p_ttml_style
->font_style
->i_style_flags
|= STYLE_ITALIC
;
386 p_ttml_style
->font_style
->i_style_flags
&= ~STYLE_ITALIC
;
387 p_ttml_style
->font_style
->i_features
|= STYLE_HAS_FLAGS
;
389 else if( !strcasecmp ( "tts:fontWeight", attr
) )
391 if( !strcasecmp ( "bold", val
) )
392 p_ttml_style
->font_style
->i_style_flags
|= STYLE_BOLD
;
394 p_ttml_style
->font_style
->i_style_flags
&= ~STYLE_BOLD
;
395 p_ttml_style
->font_style
->i_features
|= STYLE_HAS_FLAGS
;
397 else if( !strcasecmp ( "tts:textDecoration", attr
) )
399 if( !strcasecmp ( "underline", val
) )
400 p_ttml_style
->font_style
->i_style_flags
|= STYLE_UNDERLINE
;
401 else if( !strcasecmp ( "noUnderline", val
) )
402 p_ttml_style
->font_style
->i_style_flags
&= ~STYLE_UNDERLINE
;
403 if( !strcasecmp ( "lineThrough", val
) )
404 p_ttml_style
->font_style
->i_style_flags
|= STYLE_STRIKEOUT
;
405 else if( !strcasecmp ( "noLineThrough", val
) )
406 p_ttml_style
->font_style
->i_style_flags
&= ~STYLE_STRIKEOUT
;
407 p_ttml_style
->font_style
->i_features
|= STYLE_HAS_FLAGS
;
409 else if( !strcasecmp ( "tts:origin", attr
) )
411 const char *psz_token
= val
;
412 while( isspace( *psz_token
) )
415 const char *psz_separator
= strchr( psz_token
, ' ' );
416 if( psz_separator
== NULL
)
418 msg_Warn( p_dec
, "Invalid origin attribute: \"%s\"", val
);
421 const char *psz_percent_sign
= strchr( psz_token
, '%' );
423 if( psz_percent_sign
!= NULL
&& psz_percent_sign
< psz_separator
)
425 p_ttml_style
->i_margin_h
= 0;
426 p_ttml_style
->i_margin_percent_h
= atoi( psz_token
);
430 p_ttml_style
->i_margin_h
= atoi( psz_token
);
431 p_ttml_style
->i_margin_percent_h
= 0;
433 while( isspace( *psz_separator
) )
435 psz_token
= psz_separator
;
436 psz_percent_sign
= strchr( psz_token
, '%' );
437 if( psz_percent_sign
!= NULL
)
439 p_ttml_style
->i_margin_v
= 0;
440 p_ttml_style
->i_margin_percent_v
= atoi( val
);
444 p_ttml_style
->i_margin_v
= atoi( val
);
445 p_ttml_style
->i_margin_percent_v
= 0;
448 else if( !strcasecmp( "tts:textOutline", attr
) )
450 char *value
= strdup( val
);
451 char* psz_saveptr
= NULL
;
452 char* token
= strtok_r( value
, " ", &psz_saveptr
);
453 // <color>? <length> <length>?
455 unsigned int color
= vlc_html_color( token
, &b_ok
);
458 p_ttml_style
->font_style
->i_outline_color
= color
& 0xFFFFFF;
459 p_ttml_style
->font_style
->i_outline_alpha
= (color
& 0xFF000000) >> 24;
460 token
= strtok_r( NULL
, " ", &psz_saveptr
);
462 char* psz_end
= NULL
;
463 int i_outline_width
= strtol( token
, &psz_end
, 10 );
464 if( psz_end
!= token
)
466 // Assume unit is pixel, and ignore border radius
467 p_ttml_style
->font_style
->i_outline_width
= i_outline_width
;
471 else if( !strcasecmp( "tts:direction", attr
) )
473 if( !strcasecmp( "rtl", val
) )
475 p_ttml_style
->i_direction
|= UNICODE_BIDI_RTL
;
476 p_ttml_style
->b_direction_set
= true;
478 else if( !strcasecmp( "ltr", val
) )
480 p_ttml_style
->i_direction
|= UNICODE_BIDI_LTR
;
481 p_ttml_style
->b_direction_set
= true;
484 else if( !strcasecmp( "tts:unicodeBidi", attr
) )
486 if( !strcasecmp( "bidiOverride", val
) )
487 p_ttml_style
->i_direction
|= UNICODE_BIDI_OVERRIDE
& ~UNICODE_BIDI_EMBEDDED
;
488 else if( !strcasecmp( "embed", val
) )
489 p_ttml_style
->i_direction
|= UNICODE_BIDI_EMBEDDED
& ~UNICODE_BIDI_OVERRIDE
;
491 else if( !strcasecmp( "tts:writingMode", attr
) )
493 if( !strcasecmp( "rl", val
) || !strcasecmp( "rltb", val
) )
495 p_ttml_style
->i_direction
= UNICODE_BIDI_RTL
| UNICODE_BIDI_OVERRIDE
;
496 p_ttml_style
->i_align
= SUBPICTURE_ALIGN_BOTTOM
| SUBPICTURE_ALIGN_RIGHT
;
497 p_ttml_style
->b_direction_set
= true;
499 else if( !strcasecmp( "lr", val
) || !strcasecmp( "lrtb", val
) )
501 p_ttml_style
->i_direction
= UNICODE_BIDI_LTR
| UNICODE_BIDI_OVERRIDE
;
502 p_ttml_style
->i_align
= SUBPICTURE_ALIGN_BOTTOM
| SUBPICTURE_ALIGN_LEFT
;
503 p_ttml_style
->b_direction_set
= true;
507 if( p_base_style
!= NULL
)
509 MergeTTMLStyle( p_ttml_style
, p_base_style
);
511 if( p_ttml_style
->psz_styleid
== NULL
)
513 CleanupStyle( p_ttml_style
);
519 static void ParseTTMLStyles( decoder_t
* p_dec
)
521 stream_t
* p_stream
= vlc_stream_MemoryNew( p_dec
, (uint8_t*)p_dec
->fmt_in
.p_extra
, p_dec
->fmt_in
.i_extra
, true );
522 if( unlikely( p_stream
== NULL
) )
525 xml_reader_t
* p_reader
= xml_ReaderCreate( p_dec
, p_stream
);
526 if( unlikely( p_reader
== NULL
) )
528 vlc_stream_Delete( p_stream
);
531 const char* psz_node_name
;
532 int i_type
= xml_ReaderNextNode( p_reader
, &psz_node_name
);
534 if( i_type
== XML_READER_STARTELEM
&& !tagnamecmp( psz_node_name
, "tt" ) )
536 int i_type
= xml_ReaderNextNode( p_reader
, &psz_node_name
);
538 while( i_type
!= XML_READER_STARTELEM
|| tagnamecmp( psz_node_name
, "head" ) )
539 i_type
= xml_ReaderNextNode( p_reader
, &psz_node_name
);
543 /* region and style tag are respectively inside layout and styling tags */
544 if( !tagnamecmp( psz_node_name
, "styling" ) || !tagnamecmp( psz_node_name
, "layout" ) )
546 i_type
= xml_ReaderNextNode( p_reader
, &psz_node_name
);
547 while( i_type
!= XML_READER_ENDELEM
)
549 ttml_style_t
* p_ttml_style
= ParseTTMLStyle( p_dec
, p_reader
, psz_node_name
);
550 if ( p_ttml_style
== NULL
)
552 xml_ReaderDelete( p_reader
);
553 vlc_stream_Delete( p_stream
);
556 decoder_sys_t
* p_sys
= p_dec
->p_sys
;
557 TAB_APPEND( p_sys
->i_styles
, p_sys
->pp_styles
, p_ttml_style
);
558 i_type
= xml_ReaderNextNode( p_reader
, &psz_node_name
);
561 i_type
= xml_ReaderNextNode( p_reader
, &psz_node_name
);
562 }while( i_type
!= XML_READER_ENDELEM
|| tagnamecmp( psz_node_name
, "head" ) );
564 xml_ReaderDelete( p_reader
);
565 vlc_stream_Delete( p_stream
);
568 static text_segment_t
*ParseTTMLSubtitles( decoder_t
*p_dec
, subpicture_updater_sys_t
*p_update_sys
, char *psz_subtitle
)
570 stream_t
* p_sub
= NULL
;
571 xml_reader_t
* p_xml_reader
= NULL
;
572 text_segment_t
* p_first_segment
= NULL
;
573 text_segment_t
* p_current_segment
= NULL
;
574 style_stack_t
* p_style_stack
= NULL
;
575 ttml_style_t
* p_style
= NULL
;
577 p_sub
= vlc_stream_MemoryNew( p_dec
, (uint8_t*)psz_subtitle
, strlen( psz_subtitle
), true );
578 if( unlikely( p_sub
== NULL
) )
581 p_xml_reader
= xml_ReaderCreate( p_dec
, p_sub
);
582 if( unlikely( p_xml_reader
== NULL
) )
584 vlc_stream_Delete( p_sub
);
591 i_type
= xml_ReaderNextNode( p_xml_reader
, &node
);
592 while( i_type
!= XML_READER_NONE
&& i_type
> 0 )
595 * We parse the styles and put them on the style stack
596 * until we reach a text node.
598 if( i_type
== XML_READER_STARTELEM
&& ( !tagnamecmp( node
, "p") || !tagnamecmp( node
, "span" ) ) )
600 p_style
= ParseTTMLStyle( p_dec
, p_xml_reader
, node
);
601 if( unlikely( p_style
== NULL
) )
604 if( p_style_stack
!= NULL
&& p_style_stack
->p_style
!= NULL
)
605 MergeTTMLStyle( p_style
, p_style_stack
->p_style
);
607 if( PushStyle( &p_style_stack
, p_style
) == false )
609 CleanupStyle( p_style
);
614 else if( i_type
== XML_READER_TEXT
)
617 * Once we have a text node, we create a segment, apply the
618 * latest style put on the style stack and fill it with the
619 * content of the node.
621 text_segment_t
* p_segment
= text_segment_New( NULL
);
622 if( unlikely( p_segment
== NULL
) )
625 p_segment
->psz_text
= strdup( node
);
626 if( unlikely( p_segment
->psz_text
== NULL
) )
628 text_segment_Delete( p_segment
);
632 vlc_xml_decode( p_segment
->psz_text
);
633 if( p_segment
->style
== NULL
&& p_style_stack
== NULL
)
635 p_segment
->style
= text_style_Create( STYLE_NO_DEFAULTS
);
637 else if( p_segment
->style
== NULL
)
639 p_segment
->style
= CurrentStyle( p_style_stack
);
640 if( p_segment
->style
->f_font_relsize
&& !p_segment
->style
->i_font_size
)
641 p_segment
->style
->i_font_size
= (int)( ( p_segment
->style
->f_font_relsize
* STYLE_DEFAULT_FONT_SIZE
/ 100 ) + 0.5 );
643 if( p_style_stack
->p_style
->i_margin_h
)
644 p_update_sys
->x
= p_style_stack
->p_style
->i_margin_h
;
646 p_update_sys
->x
= p_style_stack
->p_style
->i_margin_percent_h
;
648 if( p_style_stack
->p_style
->i_margin_v
)
649 p_update_sys
->y
= p_style_stack
->p_style
->i_margin_v
;
651 p_update_sys
->y
= p_style_stack
->p_style
->i_margin_percent_v
;
653 p_update_sys
->align
|= p_style_stack
->p_style
->i_align
;
655 * For bidirectionnal support, we use different enum
656 * to recognize different cases, en then we add the
657 * corresponding unicode character to the text of
660 int i_direction
= p_style_stack
->p_style
->i_direction
;
663 const char* psz_uni_start
;
664 const char* psz_uni_end
;
666 { "\u2066", "\u2069" },
667 { "\u2067", "\u2069" },
668 { "\u202A", "\u202C" },
669 { "\u202B", "\u202C" },
670 { "\u202D", "\u202C" },
671 { "\u202E", "\u202C" },
673 if( p_style_stack
->p_style
->b_direction_set
)
675 char* psz_text
= NULL
;
676 if( asprintf( &psz_text
, "%s%s%s", p_bidi
[i_direction
].psz_uni_start
, p_segment
->psz_text
, p_bidi
[i_direction
].psz_uni_end
) < 0 )
678 text_segment_Delete( p_segment
);
682 free( p_segment
->psz_text
);
683 p_segment
->psz_text
= psz_text
;
686 if( p_first_segment
== NULL
)
688 p_first_segment
= p_segment
;
689 p_current_segment
= p_segment
;
691 else if( p_current_segment
->psz_text
!= NULL
)
693 p_current_segment
->p_next
= p_segment
;
694 p_current_segment
= p_segment
;
699 * If p_first_segment isn't NULL but p_current_segment->psz_text is NULL
700 * this means that something went wrong in the decoding of the
701 * first segment text:
703 * Indeed, to allocate p_first_segment ( aka non NULL ), we must have
704 * - i_type == XML_READER_TEXT
705 * - passed the allocation of p_segment->psz_text without any error
707 * This would mean that vlc_xml_decode failed and p_first_segment->psz_text
710 text_segment_Delete( p_segment
);
714 else if( i_type
== XML_READER_ENDELEM
&& !tagnamecmp( node
, "span" ) )
716 if( p_style_stack
->p_next
)
717 PopStyle( &p_style_stack
);
719 else if( i_type
== XML_READER_ENDELEM
&& !tagnamecmp( node
, "p" ) )
721 PopStyle( &p_style_stack
);
722 p_current_segment
->p_next
= NULL
;
724 else if( i_type
== XML_READER_STARTELEM
&& !strcasecmp( node
, "br" ) )
726 if( p_current_segment
!= NULL
&& p_current_segment
->psz_text
!= NULL
)
728 char* psz_text
= NULL
;
729 if( asprintf( &psz_text
, "%s\n", p_current_segment
->psz_text
) != -1 )
731 free( p_current_segment
->psz_text
);
732 p_current_segment
->psz_text
= psz_text
;
736 i_type
= xml_ReaderNextNode( p_xml_reader
, &node
);
738 ClearStack( p_style_stack
);
739 xml_ReaderDelete( p_xml_reader
);
740 vlc_stream_Delete( p_sub
);
742 return p_first_segment
;
745 text_segment_ChainDelete( p_first_segment
);
746 ClearStack( p_style_stack
);
747 xml_ReaderDelete( p_xml_reader
);
748 vlc_stream_Delete( p_sub
);
752 static subpicture_t
*ParseText( decoder_t
*p_dec
, block_t
*p_block
)
754 decoder_sys_t
*p_sys
= p_dec
->p_sys
;
755 subpicture_t
*p_spu
= NULL
;
756 char *psz_subtitle
= NULL
;
758 if( p_block
->i_flags
& BLOCK_FLAG_CORRUPTED
)
761 /* We cannot display a subpicture with no date */
762 if( p_block
->i_pts
<= VLC_TS_INVALID
)
764 msg_Warn( p_dec
, "subtitle without a date" );
768 /* Check validity of packet data */
769 /* An "empty" line containing only \0 can be used to force
770 and ephemer picture from the screen */
772 if( p_block
->i_buffer
< 1 )
774 msg_Warn( p_dec
, "no subtitle data" );
778 psz_subtitle
= malloc( p_block
->i_buffer
);
779 if( unlikely( psz_subtitle
== NULL
) )
781 memcpy( psz_subtitle
, p_block
->p_buffer
, p_block
->i_buffer
);
783 /* Create the subpicture unit */
784 p_spu
= decoder_NewSubpictureText( p_dec
);
787 free( psz_subtitle
);
790 p_spu
->i_start
= p_block
->i_pts
;
791 p_spu
->i_stop
= p_block
->i_pts
+ p_block
->i_length
;
792 p_spu
->b_ephemer
= (p_block
->i_length
== 0);
793 p_spu
->b_absolute
= false;
795 subpicture_updater_sys_t
*p_spu_sys
= p_spu
->updater
.p_sys
;
797 p_spu_sys
->align
= SUBPICTURE_ALIGN_BOTTOM
| p_sys
->i_align
;
798 p_spu_sys
->p_segments
= ParseTTMLSubtitles( p_dec
, p_spu_sys
, psz_subtitle
);
799 free( psz_subtitle
);
806 /****************************************************************************
807 * DecodeBlock: the whole thing
808 ****************************************************************************/
809 static subpicture_t
*DecodeBlock( decoder_t
*p_dec
, block_t
**pp_block
)
811 if( !pp_block
|| *pp_block
== NULL
)
814 block_t
* p_block
= *pp_block
;
815 subpicture_t
*p_spu
= ParseText( p_dec
, p_block
);
817 block_Release( p_block
);
823 /*****************************************************************************
824 * OpenDecoder: probe the decoder and return score
825 *****************************************************************************/
826 static int OpenDecoder( vlc_object_t
*p_this
)
828 decoder_t
*p_dec
= (decoder_t
*)p_this
;
829 decoder_sys_t
*p_sys
;
831 if( p_dec
->fmt_in
.i_codec
!= VLC_CODEC_TTML
)
834 /* Allocate the memory needed to store the decoder's structure */
835 p_dec
->p_sys
= p_sys
= calloc( 1, sizeof( *p_sys
) );
836 if( unlikely( p_sys
== NULL
) )
839 if( p_dec
->fmt_in
.p_extra
!= NULL
&& p_dec
->fmt_in
.i_extra
> 0 )
840 ParseTTMLStyles( p_dec
);
842 p_dec
->pf_decode_sub
= DecodeBlock
;
843 p_dec
->fmt_out
.i_cat
= SPU_ES
;
844 p_sys
->i_align
= var_InheritInteger( p_dec
, "ttml-align" );
849 /*****************************************************************************
850 * CloseDecoder: clean up the decoder
851 *****************************************************************************/
852 static void CloseDecoder( vlc_object_t
*p_this
)
854 decoder_t
*p_dec
= (decoder_t
*)p_this
;
855 decoder_sys_t
*p_sys
= p_dec
->p_sys
;
857 for( size_t i
= 0; i
< p_sys
->i_styles
; ++i
)
859 free( p_sys
->pp_styles
[i
]->psz_styleid
);
860 text_style_Delete( p_sys
->pp_styles
[i
]->font_style
);
861 free( p_sys
->pp_styles
[i
] );
863 TAB_CLEAN( p_sys
->i_styles
, p_sys
->pp_styles
);