codec: ass: Don't force fonts that aren't shipped anymore in the winstore app
[vlc.git] / modules / codec / substtml.c
blob9986f297210431afc8e1b52bb3a42df578e56739
1 /*****************************************************************************
2 * substtml.c : TTML subtitles decoder
3 *****************************************************************************
4 * Copyright (C) 2015 VLC authors and VideoLAN
6 * Authors: Hugo Beauzée-Luyssen <hugo@beauzee.fr>
7 * Sushma Reddy <sushma.reddy@research.iiit.ac.in>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
24 #ifdef HAVE_CONFIG_H
25 # include "config.h"
26 #endif
28 #include <vlc_common.h>
29 #include <vlc_plugin.h>
30 #include <vlc_modules.h>
31 #include <vlc_codec.h>
32 #include <vlc_xml.h>
33 #include <vlc_stream.h>
34 #include <vlc_text_style.h>
35 #include <vlc_charset.h>
37 #include "substext.h"
39 #include <ctype.h>
41 #define ALIGN_TEXT N_("Subtitle justification")
42 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
44 /*****************************************************************************
45 * Module descriptor.
46 *****************************************************************************/
47 static int OpenDecoder ( vlc_object_t * );
48 static void CloseDecoder ( vlc_object_t * );
50 static text_segment_t *ParseTTMLSubtitles( decoder_t *, subpicture_updater_sys_t *, char * );
52 vlc_module_begin ()
53 set_capability( "decoder", 10 )
54 set_shortname( N_("TTML decoder"))
55 set_description( N_("TTML subtitles decoder") )
56 set_callbacks( OpenDecoder, CloseDecoder )
57 set_category( CAT_INPUT )
58 set_subcategory( SUBCAT_INPUT_SCODEC )
59 add_integer( "ttml-align", 0, ALIGN_TEXT, ALIGN_LONGTEXT, false )
60 vlc_module_end ();
62 /*****************************************************************************
63 * Local prototypes
64 *****************************************************************************/
66 typedef struct
68 char* psz_styleid;
69 text_style_t* font_style;
70 int i_align;
71 int i_margin_h;
72 int i_margin_v;
73 int i_margin_percent_h;
74 int i_margin_percent_v;
75 int i_direction;
76 bool b_direction_set;
77 } ttml_style_t;
79 struct decoder_sys_t
81 int i_align;
82 ttml_style_t** pp_styles;
83 size_t i_styles;
86 enum
88 UNICODE_BIDI_LTR = 0,
89 UNICODE_BIDI_RTL = 1,
90 UNICODE_BIDI_EMBEDDED = 2,
91 UNICODE_BIDI_OVERRIDE = 4,
94 static int tagnamecmp( char const* tagname, char const* needle )
96 if( !strncasecmp( "tt:", tagname, 3 ) )
97 tagname += 3;
99 return strcasecmp( tagname, needle );
102 static void MergeTTMLStyle( ttml_style_t *p_dst, const ttml_style_t *p_src)
104 text_style_Merge( p_dst->font_style, p_src->font_style, false );
105 if( !( p_dst->i_align & SUBPICTURE_ALIGN_MASK ) )
106 p_dst->i_align |= p_src->i_align;
108 if( !p_dst->i_margin_h )
109 p_dst->i_margin_h = p_src->i_margin_h;
111 if( !p_dst->i_margin_v )
112 p_dst->i_margin_v = p_src->i_margin_v;
114 if( !p_dst->i_margin_percent_h )
115 p_dst->i_margin_percent_h = p_src->i_margin_percent_h;
117 if( !p_dst->i_margin_percent_v )
118 p_dst->i_margin_percent_v = p_src->i_margin_percent_v;
120 if( !p_dst->b_direction_set )
122 p_dst->i_direction = p_src->i_direction;
123 p_dst->b_direction_set = p_src->b_direction_set;
127 static ttml_style_t* DuplicateStyle( ttml_style_t* p_style_src )
129 ttml_style_t* p_style = calloc( 1, sizeof( *p_style ) );
130 if( unlikely( p_style == NULL ) )
131 return NULL;
133 *p_style = *p_style_src;
134 p_style->psz_styleid = strdup( p_style_src->psz_styleid );
135 if( unlikely( p_style->psz_styleid == NULL ) )
137 free( p_style );
138 return NULL;
141 p_style->font_style = text_style_Duplicate( p_style_src->font_style );
142 if( unlikely( p_style->font_style == NULL ) )
144 free( p_style->psz_styleid );
145 free( p_style );
146 return NULL;
148 return p_style;
151 static void CleanupStyle( ttml_style_t* p_ttml_style )
153 text_style_Delete( p_ttml_style->font_style );
154 free( p_ttml_style->psz_styleid );
155 free( p_ttml_style );
158 static ttml_style_t *FindTextStyle( decoder_t *p_dec, const char *psz_style )
160 decoder_sys_t *p_sys = p_dec->p_sys;
162 for( size_t i = 0; i < p_sys->i_styles; i++ )
164 if( !strcmp( p_sys->pp_styles[i]->psz_styleid, psz_style ) )
165 return DuplicateStyle( p_sys->pp_styles[i] );
168 return NULL;
171 typedef struct style_stack_t
173 ttml_style_t* p_style;
174 struct style_stack_t* p_next;
175 } style_stack_t ;
177 static bool PushStyle( style_stack_t **pp_stack, ttml_style_t* p_style )
179 style_stack_t* p_entry = malloc( sizeof( *p_entry ) );
180 if( unlikely( p_entry == NULL ) )
181 return false;
182 p_entry->p_style = p_style;
183 p_entry->p_next = *pp_stack;
184 *pp_stack = p_entry;
185 return true;
188 static void PopStyle( style_stack_t** pp_stack )
190 if( *pp_stack == NULL )
191 return;
192 style_stack_t* p_next = (*pp_stack)->p_next;
193 CleanupStyle( (*pp_stack)->p_style );
194 free( *pp_stack );
195 *pp_stack = p_next;
198 static void ClearStack( style_stack_t* p_stack )
200 while( p_stack != NULL )
202 style_stack_t* p_next = p_stack->p_next;
203 CleanupStyle( p_stack->p_style );
204 free( p_stack );
205 p_stack = p_next;
209 static text_style_t* CurrentStyle( style_stack_t* p_stack )
211 if( p_stack == NULL )
212 return text_style_Create( STYLE_NO_DEFAULTS );
214 return text_style_Duplicate( p_stack->p_style->font_style );
217 static ttml_style_t* ParseTTMLStyle( decoder_t *p_dec, xml_reader_t* p_reader, const char* psz_node_name )
219 decoder_sys_t* p_sys = p_dec->p_sys;
220 ttml_style_t *p_ttml_style = NULL;
221 ttml_style_t *p_base_style = NULL;
223 p_ttml_style = calloc( 1, sizeof( ttml_style_t ) );
224 if( unlikely( !p_ttml_style ) )
225 return NULL;
227 p_ttml_style->font_style = text_style_Create( STYLE_NO_DEFAULTS );
228 if( unlikely( !p_ttml_style->font_style ) )
230 free( p_ttml_style );
231 return NULL;
234 const char *attr, *val;
236 while( (attr = xml_ReaderNextAttr( p_reader, &val ) ) )
238 /* searching previous styles for inheritence */
239 if( !strcasecmp( attr, "style" ) || !strcasecmp( attr, "region" ) )
241 if( !tagnamecmp( psz_node_name, "style" ) || !tagnamecmp( psz_node_name, "region" ) )
243 for( size_t i = 0; i < p_sys->i_styles; i++ )
245 if( !strcasecmp( p_sys->pp_styles[i]->psz_styleid, val ) )
247 p_base_style = p_sys->pp_styles[i];
248 break;
253 * In p nodes, style attribute has this format :
254 * style="style1 style2 style3" where style1 and style2 are
255 * style applied on the parents of p in that order.
257 * In span node, we can apply several styles in the same order than
258 * in p nodes with the same inheritance order.
260 * In order to preserve this style predominance, we merge the styles
261 * in the from right to left ( the right one being predominant ) .
263 else if( !tagnamecmp( psz_node_name, "p" ) || !tagnamecmp( psz_node_name, "span" ) )
265 char *tmp;
266 char *value = strdup( val );
267 if( unlikely( value == NULL ) )
269 CleanupStyle( p_ttml_style );
270 return NULL;
273 char *token = strtok_r( value , " ", &tmp );
275 if( token == NULL )
277 msg_Warn( p_dec, "No IDREF specified in attribute "
278 "'%s' on tag '%s', ignoring.", attr,
279 psz_node_name );
280 free( value );
281 continue;
284 ttml_style_t* p_style = FindTextStyle( p_dec, token );
285 if( p_style == NULL )
287 msg_Warn( p_dec, "IDREF '%s' in '%s' not found", token, attr );
288 free( value );
289 break;
292 while( ( token = strtok_r( NULL, " ", &tmp) ) != NULL )
294 ttml_style_t* p_next_style = FindTextStyle( p_dec, token );
295 if( p_next_style == NULL )
297 msg_Warn( p_dec, "IDREF '%s' in '%s' not found", token, attr );
298 break;
300 MergeTTMLStyle( p_next_style, p_style );
301 CleanupStyle( p_style );
302 p_style = p_next_style;
304 MergeTTMLStyle( p_style, p_ttml_style );
305 free( value );
306 CleanupStyle( p_ttml_style );
307 p_ttml_style = p_style;
309 else
311 ttml_style_t* p_style = FindTextStyle( p_dec, val );
312 if( p_style == NULL )
314 msg_Warn( p_dec, "IDREF '%s' in '%s' not found", val, attr );
315 break;
317 MergeTTMLStyle( p_style , p_ttml_style );
318 CleanupStyle( p_ttml_style );
319 p_ttml_style = p_style;
322 else if( !strcasecmp( "xml:id", attr ) )
324 free( p_ttml_style->psz_styleid );
325 p_ttml_style->psz_styleid = strdup( val );
327 else if( !strcasecmp ( "tts:fontFamily", attr ) )
329 free( p_ttml_style->font_style->psz_fontname );
330 p_ttml_style->font_style->psz_fontname = strdup( val );
331 if( unlikely( p_ttml_style->font_style->psz_fontname == NULL ) )
333 CleanupStyle( p_ttml_style );
334 return NULL;
337 else if( !strcasecmp( "tts:opacity", attr ) )
339 p_ttml_style->font_style->i_background_alpha = atoi( val );
340 p_ttml_style->font_style->i_font_alpha = atoi( val );
341 p_ttml_style->font_style->i_features |= STYLE_HAS_BACKGROUND_ALPHA | STYLE_HAS_FONT_ALPHA;
343 else if( !strcasecmp( "tts:fontSize", attr ) )
345 char* psz_end = NULL;
346 float size = us_strtof( val, &psz_end );
347 if( *psz_end == '%' )
348 p_ttml_style->font_style->f_font_relsize = size;
349 else
350 p_ttml_style->font_style->i_font_size = (int)( size + 0.5 );
352 else if( !strcasecmp( "tts:color", attr ) )
354 unsigned int i_color = vlc_html_color( val, NULL );
355 p_ttml_style->font_style->i_font_color = (i_color & 0xffffff);
356 p_ttml_style->font_style->i_font_alpha = (i_color & 0xFF000000) >> 24;
357 p_ttml_style->font_style->i_features |= STYLE_HAS_FONT_COLOR | STYLE_HAS_FONT_ALPHA;
359 else if( !strcasecmp( "tts:backgroundColor", attr ) )
361 unsigned int i_color = vlc_html_color( val, NULL );
362 p_ttml_style->font_style->i_background_color = i_color & 0xFFFFFF;
363 p_ttml_style->font_style->i_background_alpha = (i_color & 0xFF000000) >> 24;
364 p_ttml_style->font_style->i_features |= STYLE_HAS_BACKGROUND_COLOR
365 | STYLE_HAS_BACKGROUND_ALPHA;
366 p_ttml_style->font_style->i_style_flags |= STYLE_BACKGROUND;
368 else if( !strcasecmp( "tts:textAlign", attr ) )
370 if( !strcasecmp ( "left", val ) )
371 p_ttml_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_LEFT;
372 else if( !strcasecmp ( "right", val ) )
373 p_ttml_style->i_align = SUBPICTURE_ALIGN_TOP | SUBPICTURE_ALIGN_RIGHT;
374 else if( !strcasecmp ( "center", val ) )
375 p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM;
376 else if( !strcasecmp ( "start", val ) )
377 p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
378 else if( !strcasecmp ( "end", val ) )
379 p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
381 else if( !strcasecmp( "tts:fontStyle", attr ) )
383 if( !strcasecmp ( "italic", val ) || !strcasecmp ( "oblique", val ) )
384 p_ttml_style->font_style->i_style_flags |= STYLE_ITALIC;
385 else
386 p_ttml_style->font_style->i_style_flags &= ~STYLE_ITALIC;
387 p_ttml_style->font_style->i_features |= STYLE_HAS_FLAGS;
389 else if( !strcasecmp ( "tts:fontWeight", attr ) )
391 if( !strcasecmp ( "bold", val ) )
392 p_ttml_style->font_style->i_style_flags |= STYLE_BOLD;
393 else
394 p_ttml_style->font_style->i_style_flags &= ~STYLE_BOLD;
395 p_ttml_style->font_style->i_features |= STYLE_HAS_FLAGS;
397 else if( !strcasecmp ( "tts:textDecoration", attr ) )
399 if( !strcasecmp ( "underline", val ) )
400 p_ttml_style->font_style->i_style_flags |= STYLE_UNDERLINE;
401 else if( !strcasecmp ( "noUnderline", val ) )
402 p_ttml_style->font_style->i_style_flags &= ~STYLE_UNDERLINE;
403 if( !strcasecmp ( "lineThrough", val ) )
404 p_ttml_style->font_style->i_style_flags |= STYLE_STRIKEOUT;
405 else if( !strcasecmp ( "noLineThrough", val ) )
406 p_ttml_style->font_style->i_style_flags &= ~STYLE_STRIKEOUT;
407 p_ttml_style->font_style->i_features |= STYLE_HAS_FLAGS;
409 else if( !strcasecmp ( "tts:origin", attr ) )
411 const char *psz_token = val;
412 while( isspace( *psz_token ) )
413 psz_token++;
415 const char *psz_separator = strchr( psz_token, ' ' );
416 if( psz_separator == NULL )
418 msg_Warn( p_dec, "Invalid origin attribute: \"%s\"", val );
419 continue;
421 const char *psz_percent_sign = strchr( psz_token, '%' );
423 if( psz_percent_sign != NULL && psz_percent_sign < psz_separator )
425 p_ttml_style->i_margin_h = 0;
426 p_ttml_style->i_margin_percent_h = atoi( psz_token );
428 else
430 p_ttml_style->i_margin_h = atoi( psz_token );
431 p_ttml_style->i_margin_percent_h = 0;
433 while( isspace( *psz_separator ) )
434 psz_separator++;
435 psz_token = psz_separator;
436 psz_percent_sign = strchr( psz_token, '%' );
437 if( psz_percent_sign != NULL )
439 p_ttml_style->i_margin_v = 0;
440 p_ttml_style->i_margin_percent_v = atoi( val );
442 else
444 p_ttml_style->i_margin_v = atoi( val );
445 p_ttml_style->i_margin_percent_v = 0;
448 else if( !strcasecmp( "tts:textOutline", attr ) )
450 char *value = strdup( val );
451 char* psz_saveptr = NULL;
452 char* token = strtok_r( value, " ", &psz_saveptr );
453 // <color>? <length> <length>?
454 bool b_ok = false;
455 unsigned int color = vlc_html_color( token, &b_ok );
456 if( b_ok )
458 p_ttml_style->font_style->i_outline_color = color & 0xFFFFFF;
459 p_ttml_style->font_style->i_outline_alpha = (color & 0xFF000000) >> 24;
460 token = strtok_r( NULL, " ", &psz_saveptr );
462 char* psz_end = NULL;
463 int i_outline_width = strtol( token, &psz_end, 10 );
464 if( psz_end != token )
466 // Assume unit is pixel, and ignore border radius
467 p_ttml_style->font_style->i_outline_width = i_outline_width;
469 free( value );
471 else if( !strcasecmp( "tts:direction", attr ) )
473 if( !strcasecmp( "rtl", val ) )
475 p_ttml_style->i_direction |= UNICODE_BIDI_RTL;
476 p_ttml_style->b_direction_set = true;
478 else if( !strcasecmp( "ltr", val ) )
480 p_ttml_style->i_direction |= UNICODE_BIDI_LTR;
481 p_ttml_style->b_direction_set = true;
484 else if( !strcasecmp( "tts:unicodeBidi", attr ) )
486 if( !strcasecmp( "bidiOverride", val ) )
487 p_ttml_style->i_direction |= UNICODE_BIDI_OVERRIDE & ~UNICODE_BIDI_EMBEDDED;
488 else if( !strcasecmp( "embed", val ) )
489 p_ttml_style->i_direction |= UNICODE_BIDI_EMBEDDED & ~UNICODE_BIDI_OVERRIDE;
491 else if( !strcasecmp( "tts:writingMode", attr ) )
493 if( !strcasecmp( "rl", val ) || !strcasecmp( "rltb", val ) )
495 p_ttml_style->i_direction = UNICODE_BIDI_RTL | UNICODE_BIDI_OVERRIDE;
496 p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_RIGHT;
497 p_ttml_style->b_direction_set = true;
499 else if( !strcasecmp( "lr", val ) || !strcasecmp( "lrtb", val ) )
501 p_ttml_style->i_direction = UNICODE_BIDI_LTR | UNICODE_BIDI_OVERRIDE;
502 p_ttml_style->i_align = SUBPICTURE_ALIGN_BOTTOM | SUBPICTURE_ALIGN_LEFT;
503 p_ttml_style->b_direction_set = true;
507 if( p_base_style != NULL )
509 MergeTTMLStyle( p_ttml_style, p_base_style );
511 if( p_ttml_style->psz_styleid == NULL )
513 CleanupStyle( p_ttml_style );
514 return NULL;
516 return p_ttml_style;
519 static void ParseTTMLStyles( decoder_t* p_dec )
521 stream_t* p_stream = vlc_stream_MemoryNew( p_dec, (uint8_t*)p_dec->fmt_in.p_extra, p_dec->fmt_in.i_extra, true );
522 if( unlikely( p_stream == NULL ) )
523 return ;
525 xml_reader_t* p_reader = xml_ReaderCreate( p_dec, p_stream );
526 if( unlikely( p_reader == NULL ) )
528 vlc_stream_Delete( p_stream );
529 return ;
531 const char* psz_node_name;
532 int i_type = xml_ReaderNextNode( p_reader, &psz_node_name );
534 if( i_type == XML_READER_STARTELEM && !tagnamecmp( psz_node_name, "tt" ) )
536 int i_type = xml_ReaderNextNode( p_reader, &psz_node_name );
538 while( i_type != XML_READER_STARTELEM || tagnamecmp( psz_node_name, "head" ) )
539 i_type = xml_ReaderNextNode( p_reader, &psz_node_name );
543 /* region and style tag are respectively inside layout and styling tags */
544 if( !tagnamecmp( psz_node_name, "styling" ) || !tagnamecmp( psz_node_name, "layout" ) )
546 i_type = xml_ReaderNextNode( p_reader, &psz_node_name );
547 while( i_type != XML_READER_ENDELEM )
549 ttml_style_t* p_ttml_style = ParseTTMLStyle( p_dec, p_reader, psz_node_name );
550 if ( p_ttml_style == NULL )
552 xml_ReaderDelete( p_reader );
553 vlc_stream_Delete( p_stream );
554 return;
556 decoder_sys_t* p_sys = p_dec->p_sys;
557 TAB_APPEND( p_sys->i_styles, p_sys->pp_styles, p_ttml_style );
558 i_type = xml_ReaderNextNode( p_reader, &psz_node_name );
561 i_type = xml_ReaderNextNode( p_reader, &psz_node_name );
562 }while( i_type != XML_READER_ENDELEM || tagnamecmp( psz_node_name, "head" ) );
564 xml_ReaderDelete( p_reader );
565 vlc_stream_Delete( p_stream );
568 static text_segment_t *ParseTTMLSubtitles( decoder_t *p_dec, subpicture_updater_sys_t *p_update_sys, char *psz_subtitle )
570 stream_t* p_sub = NULL;
571 xml_reader_t* p_xml_reader = NULL;
572 text_segment_t* p_first_segment = NULL;
573 text_segment_t* p_current_segment = NULL;
574 style_stack_t* p_style_stack = NULL;
575 ttml_style_t* p_style = NULL;
577 p_sub = vlc_stream_MemoryNew( p_dec, (uint8_t*)psz_subtitle, strlen( psz_subtitle ), true );
578 if( unlikely( p_sub == NULL ) )
579 return NULL;
581 p_xml_reader = xml_ReaderCreate( p_dec, p_sub );
582 if( unlikely( p_xml_reader == NULL ) )
584 vlc_stream_Delete( p_sub );
585 return NULL;
588 const char *node;
589 int i_type;
591 i_type = xml_ReaderNextNode( p_xml_reader, &node );
592 while( i_type != XML_READER_NONE && i_type > 0 )
595 * We parse the styles and put them on the style stack
596 * until we reach a text node.
598 if( i_type == XML_READER_STARTELEM && ( !tagnamecmp( node, "p") || !tagnamecmp( node, "span" ) ) )
600 p_style = ParseTTMLStyle( p_dec, p_xml_reader, node );
601 if( unlikely( p_style == NULL ) )
602 goto fail;
604 if( p_style_stack != NULL && p_style_stack->p_style != NULL )
605 MergeTTMLStyle( p_style, p_style_stack->p_style );
607 if( PushStyle( &p_style_stack, p_style ) == false )
609 CleanupStyle( p_style );
610 goto fail;
614 else if( i_type == XML_READER_TEXT )
617 * Once we have a text node, we create a segment, apply the
618 * latest style put on the style stack and fill it with the
619 * content of the node.
621 text_segment_t* p_segment = text_segment_New( NULL );
622 if( unlikely( p_segment == NULL ) )
623 goto fail;
625 p_segment->psz_text = strdup( node );
626 if( unlikely( p_segment->psz_text == NULL ) )
628 text_segment_Delete( p_segment );
629 goto fail;
632 vlc_xml_decode( p_segment->psz_text );
633 if( p_segment->style == NULL && p_style_stack == NULL )
635 p_segment->style = text_style_Create( STYLE_NO_DEFAULTS );
637 else if( p_segment->style == NULL )
639 p_segment->style = CurrentStyle( p_style_stack );
640 if( p_segment->style->f_font_relsize && !p_segment->style->i_font_size )
641 p_segment->style->i_font_size = (int)( ( p_segment->style->f_font_relsize * STYLE_DEFAULT_FONT_SIZE / 100 ) + 0.5 );
643 if( p_style_stack->p_style->i_margin_h )
644 p_update_sys->x = p_style_stack->p_style->i_margin_h;
645 else
646 p_update_sys->x = p_style_stack->p_style->i_margin_percent_h;
648 if( p_style_stack->p_style->i_margin_v )
649 p_update_sys->y = p_style_stack->p_style->i_margin_v;
650 else
651 p_update_sys->y = p_style_stack->p_style->i_margin_percent_v;
653 p_update_sys->align |= p_style_stack->p_style->i_align;
655 * For bidirectionnal support, we use different enum
656 * to recognize different cases, en then we add the
657 * corresponding unicode character to the text of
658 * the text_segment.
660 int i_direction = p_style_stack->p_style->i_direction;
661 static const struct
663 const char* psz_uni_start;
664 const char* psz_uni_end;
665 }p_bidi[] = {
666 { "\u2066", "\u2069" },
667 { "\u2067", "\u2069" },
668 { "\u202A", "\u202C" },
669 { "\u202B", "\u202C" },
670 { "\u202D", "\u202C" },
671 { "\u202E", "\u202C" },
673 if( p_style_stack->p_style->b_direction_set )
675 char* psz_text = NULL;
676 if( asprintf( &psz_text, "%s%s%s", p_bidi[i_direction].psz_uni_start, p_segment->psz_text, p_bidi[i_direction].psz_uni_end ) < 0 )
678 text_segment_Delete( p_segment );
679 goto fail;
682 free( p_segment->psz_text );
683 p_segment->psz_text = psz_text;
686 if( p_first_segment == NULL )
688 p_first_segment = p_segment;
689 p_current_segment = p_segment;
691 else if( p_current_segment->psz_text != NULL )
693 p_current_segment->p_next = p_segment;
694 p_current_segment = p_segment;
696 else
699 * If p_first_segment isn't NULL but p_current_segment->psz_text is NULL
700 * this means that something went wrong in the decoding of the
701 * first segment text:
703 * Indeed, to allocate p_first_segment ( aka non NULL ), we must have
704 * - i_type == XML_READER_TEXT
705 * - passed the allocation of p_segment->psz_text without any error
707 * This would mean that vlc_xml_decode failed and p_first_segment->psz_text
708 * is NULL.
710 text_segment_Delete( p_segment );
711 goto fail;
714 else if( i_type == XML_READER_ENDELEM && !tagnamecmp( node, "span" ) )
716 if( p_style_stack->p_next )
717 PopStyle( &p_style_stack);
719 else if( i_type == XML_READER_ENDELEM && !tagnamecmp( node, "p" ) )
721 PopStyle( &p_style_stack );
722 p_current_segment->p_next = NULL;
724 else if( i_type == XML_READER_STARTELEM && !strcasecmp( node, "br" ) )
726 if( p_current_segment != NULL && p_current_segment->psz_text != NULL )
728 char* psz_text = NULL;
729 if( asprintf( &psz_text, "%s\n", p_current_segment->psz_text ) != -1 )
731 free( p_current_segment->psz_text );
732 p_current_segment->psz_text = psz_text;
736 i_type = xml_ReaderNextNode( p_xml_reader, &node );
738 ClearStack( p_style_stack );
739 xml_ReaderDelete( p_xml_reader );
740 vlc_stream_Delete( p_sub );
742 return p_first_segment;
744 fail:
745 text_segment_ChainDelete( p_first_segment );
746 ClearStack( p_style_stack );
747 xml_ReaderDelete( p_xml_reader );
748 vlc_stream_Delete( p_sub );
749 return NULL;
752 static subpicture_t *ParseText( decoder_t *p_dec, block_t *p_block )
754 decoder_sys_t *p_sys = p_dec->p_sys;
755 subpicture_t *p_spu = NULL;
756 char *psz_subtitle = NULL;
758 if( p_block->i_flags & BLOCK_FLAG_CORRUPTED )
759 return NULL;
761 /* We cannot display a subpicture with no date */
762 if( p_block->i_pts <= VLC_TS_INVALID )
764 msg_Warn( p_dec, "subtitle without a date" );
765 return NULL;
768 /* Check validity of packet data */
769 /* An "empty" line containing only \0 can be used to force
770 and ephemer picture from the screen */
772 if( p_block->i_buffer < 1 )
774 msg_Warn( p_dec, "no subtitle data" );
775 return NULL;
778 psz_subtitle = malloc( p_block->i_buffer );
779 if( unlikely( psz_subtitle == NULL ) )
780 return NULL;
781 memcpy( psz_subtitle, p_block->p_buffer, p_block->i_buffer );
783 /* Create the subpicture unit */
784 p_spu = decoder_NewSubpictureText( p_dec );
785 if( !p_spu )
787 free( psz_subtitle );
788 return NULL;
790 p_spu->i_start = p_block->i_pts;
791 p_spu->i_stop = p_block->i_pts + p_block->i_length;
792 p_spu->b_ephemer = (p_block->i_length == 0);
793 p_spu->b_absolute = false;
795 subpicture_updater_sys_t *p_spu_sys = p_spu->updater.p_sys;
797 p_spu_sys->align = SUBPICTURE_ALIGN_BOTTOM | p_sys->i_align;
798 p_spu_sys->p_segments = ParseTTMLSubtitles( p_dec, p_spu_sys, psz_subtitle );
799 free( psz_subtitle );
801 return p_spu;
806 /****************************************************************************
807 * DecodeBlock: the whole thing
808 ****************************************************************************/
809 static subpicture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block )
811 if( !pp_block || *pp_block == NULL )
812 return NULL;
814 block_t* p_block = *pp_block;
815 subpicture_t *p_spu = ParseText( p_dec, p_block );
817 block_Release( p_block );
818 *pp_block = NULL;
820 return p_spu;
823 /*****************************************************************************
824 * OpenDecoder: probe the decoder and return score
825 *****************************************************************************/
826 static int OpenDecoder( vlc_object_t *p_this )
828 decoder_t *p_dec = (decoder_t*)p_this;
829 decoder_sys_t *p_sys;
831 if( p_dec->fmt_in.i_codec != VLC_CODEC_TTML )
832 return VLC_EGENERIC;
834 /* Allocate the memory needed to store the decoder's structure */
835 p_dec->p_sys = p_sys = calloc( 1, sizeof( *p_sys ) );
836 if( unlikely( p_sys == NULL ) )
837 return VLC_ENOMEM;
839 if( p_dec->fmt_in.p_extra != NULL && p_dec->fmt_in.i_extra > 0 )
840 ParseTTMLStyles( p_dec );
842 p_dec->pf_decode_sub = DecodeBlock;
843 p_dec->fmt_out.i_cat = SPU_ES;
844 p_sys->i_align = var_InheritInteger( p_dec, "ttml-align" );
846 return VLC_SUCCESS;
849 /*****************************************************************************
850 * CloseDecoder: clean up the decoder
851 *****************************************************************************/
852 static void CloseDecoder( vlc_object_t *p_this )
854 decoder_t *p_dec = (decoder_t *)p_this;
855 decoder_sys_t *p_sys = p_dec->p_sys;
857 for( size_t i = 0; i < p_sys->i_styles; ++i )
859 free( p_sys->pp_styles[i]->psz_styleid );
860 text_style_Delete( p_sys->pp_styles[i]->font_style );
861 free( p_sys->pp_styles[i] );
863 TAB_CLEAN( p_sys->i_styles, p_sys->pp_styles );
865 free( p_sys );