1 /*****************************************************************************
2 * subsdec.c : text subtitle decoder
3 *****************************************************************************
4 * Copyright (C) 2000-2006 VLC authors and VideoLAN
7 * Authors: Gildas Bazin <gbazin@videolan.org>
8 * Samuel Hocevar <sam@zoy.org>
9 * Derk-Jan Hartman <hartman at videolan dot org>
10 * Bernie Purcell <bitmap@videolan.org>
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU Lesser General Public License as published by
14 * the Free Software Foundation; either version 2.1 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public License
23 * along with this program; if not, write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25 *****************************************************************************/
27 /*****************************************************************************
29 *****************************************************************************/
38 #include <vlc_common.h>
39 #include <vlc_plugin.h>
40 #include <vlc_codec.h>
41 #include <vlc_charset.h>
46 /*****************************************************************************
48 *****************************************************************************/
49 static const char *const ppsz_encodings
[] = {
95 static const char *const ppsz_encoding_names
[] = {
97 The character encoding name in parenthesis corresponds to that used for
98 the GetACP translation. "Windows-1252" applies to Western European
99 languages using the Latin alphabet. */
100 N_("Default (Windows-1252)"),
101 N_("System codeset"),
102 N_("Universal (UTF-8)"),
103 N_("Universal (UTF-16)"),
104 N_("Universal (big endian UTF-16)"),
105 N_("Universal (little endian UTF-16)"),
106 N_("Universal, Chinese (GB18030)"),
108 /* ISO 8859 and the likes */
110 N_("Western European (Latin-9)"), /* mostly superset of Latin-1 */
111 N_("Western European (Windows-1252)"),
112 N_("Western European (IBM 00850)"),
114 N_("Eastern European (Latin-2)"),
115 N_("Eastern European (Windows-1250)"),
117 N_("Esperanto (Latin-3)"),
119 N_("Nordic (Latin-6)"), /* Latin 6 supersedes Latin 4 */
121 N_("Cyrillic (Windows-1251)"), /* ISO 8859-5 is not practically used */
122 N_("Russian (KOI8-R)"),
123 N_("Ukrainian (KOI8-U)"),
125 N_("Arabic (ISO 8859-6)"),
126 N_("Arabic (Windows-1256)"),
128 N_("Greek (ISO 8859-7)"),
129 N_("Greek (Windows-1253)"),
131 N_("Hebrew (ISO 8859-8)"),
132 N_("Hebrew (Windows-1255)"),
134 N_("Turkish (ISO 8859-9)"),
135 N_("Turkish (Windows-1254)"),
138 N_("Thai (TIS 620-2533/ISO 8859-11)"),
139 N_("Thai (Windows-874)"),
141 N_("Baltic (Latin-7)"),
142 N_("Baltic (Windows-1257)"),
143 /* 12 -> /dev/null */
145 N_("Celtic (Latin-8)"),
148 N_("South-Eastern European (Latin-10)"),
150 N_("Simplified Chinese (ISO-2022-CN-EXT)"),
151 N_("Simplified Chinese Unix (EUC-CN)"),
152 N_("Japanese (7-bits JIS/ISO-2022-JP-2)"),
153 N_("Japanese Unix (EUC-JP)"),
154 N_("Japanese (Shift JIS)"),
155 N_("Korean (EUC-KR/CP949)"),
156 N_("Korean (ISO-2022-KR)"),
157 N_("Traditional Chinese (Big5)"),
158 N_("Traditional Chinese Unix (EUC-TW)"),
159 N_("Hong-Kong Supplementary (HKSCS)"),
161 N_("Vietnamese (VISCII)"),
162 N_("Vietnamese (Windows-1258)"),
165 static const int pi_justification
[] = { -1, 0, 1, 2 };
166 static const char *const ppsz_justification_text
[] = {
167 N_("Auto"),N_("Center"),N_("Left"),N_("Right")
170 #define ENCODING_TEXT N_("Subtitle text encoding")
171 #define ENCODING_LONGTEXT N_("Set the encoding used in text subtitles")
172 #define ALIGN_TEXT N_("Subtitle justification")
173 #define ALIGN_LONGTEXT N_("Set the justification of subtitles")
174 #define AUTODETECT_UTF8_TEXT N_("UTF-8 subtitle autodetection")
175 #define AUTODETECT_UTF8_LONGTEXT N_("This enables automatic detection of " \
176 "UTF-8 encoding within subtitle files.")
178 static int OpenDecoder ( vlc_object_t
* );
179 static void CloseDecoder ( vlc_object_t
* );
182 set_shortname( N_("Subtitles"))
183 set_description( N_("Text subtitle decoder") )
184 set_capability( "spu decoder", 50 )
185 set_callbacks( OpenDecoder
, CloseDecoder
)
186 set_category( CAT_INPUT
)
187 set_subcategory( SUBCAT_INPUT_SCODEC
)
189 add_integer( "subsdec-align", -1, ALIGN_TEXT
, ALIGN_LONGTEXT
,
191 change_integer_list( pi_justification
, ppsz_justification_text
)
192 add_string( "subsdec-encoding", "",
193 ENCODING_TEXT
, ENCODING_LONGTEXT
, false )
194 change_string_list( ppsz_encodings
, ppsz_encoding_names
)
195 add_bool( "subsdec-autodetect-utf8", true,
196 AUTODETECT_UTF8_TEXT
, AUTODETECT_UTF8_LONGTEXT
, false )
199 /*****************************************************************************
201 *****************************************************************************/
202 #define NO_BREAKING_SPACE " "
206 int i_align
; /* Subtitles alignment on the vout */
208 vlc_iconv_t iconv_handle
; /* handle to iconv instance */
209 bool b_autodetect_utf8
;
213 static int DecodeBlock ( decoder_t
*, block_t
* );
214 static subpicture_t
*ParseText ( decoder_t
*, block_t
* );
215 static text_segment_t
*ParseSubtitles(int *pi_align
, const char * );
217 /*****************************************************************************
218 * OpenDecoder: probe the decoder and return score
219 *****************************************************************************
220 * Tries to launch a decoder and return score so that the interface is able
222 *****************************************************************************/
223 static int OpenDecoder( vlc_object_t
*p_this
)
225 decoder_t
*p_dec
= (decoder_t
*)p_this
;
226 decoder_sys_t
*p_sys
;
228 switch( p_dec
->fmt_in
.i_codec
)
231 case VLC_CODEC_ITU_T140
:
237 /* Allocate the memory needed to store the decoder's structure */
238 p_dec
->p_sys
= p_sys
= calloc( 1, sizeof( *p_sys
) );
242 p_dec
->pf_decode
= DecodeBlock
;
243 p_dec
->fmt_out
.i_codec
= 0;
247 p_sys
->iconv_handle
= (vlc_iconv_t
)-1;
248 p_sys
->b_autodetect_utf8
= false;
250 const char *encoding
;
253 /* First try demux-specified encoding */
254 if( p_dec
->fmt_in
.i_codec
== VLC_CODEC_ITU_T140
)
255 encoding
= "UTF-8"; /* IUT T.140 is always using UTF-8 */
257 if( p_dec
->fmt_in
.subs
.psz_encoding
&& *p_dec
->fmt_in
.subs
.psz_encoding
)
259 encoding
= p_dec
->fmt_in
.subs
.psz_encoding
;
260 msg_Dbg (p_dec
, "trying demuxer-specified character encoding: %s",
265 /* Second, try configured encoding */
266 if ((var
= var_InheritString (p_dec
, "subsdec-encoding")) != NULL
)
268 msg_Dbg (p_dec
, "trying configured character encoding: %s", var
);
269 if (!strcmp (var
, "system"))
274 /* ^ iconv() treats "" as nl_langinfo(CODESET) */
280 /* Third, try "local" encoding */
283 The Windows ANSI code page most commonly used for this language.
284 VLC uses this as a guess of the subtitle files character set
285 (if UTF-8 and UTF-16 autodetection fails).
286 Western European languages normally use "CP1252", which is a
287 Microsoft-variant of ISO 8859-1. That suits the Latin alphabet.
288 Other scripts use other code pages.
290 This MUST be a valid iconv character set. If unsure, please refer
291 the VideoLAN translators mailing list. */
292 encoding
= vlc_pgettext("GetACP", "CP1252");
293 msg_Dbg (p_dec
, "trying default character encoding: %s", encoding
);
296 /* Check UTF-8 autodetection */
297 if (var_InheritBool (p_dec
, "subsdec-autodetect-utf8"))
299 msg_Dbg (p_dec
, "using automatic UTF-8 detection");
300 p_sys
->b_autodetect_utf8
= true;
304 if (strcasecmp (encoding
, "UTF-8") && strcasecmp (encoding
, "utf8"))
306 p_sys
->iconv_handle
= vlc_iconv_open ("UTF-8", encoding
);
307 if (p_sys
->iconv_handle
== (vlc_iconv_t
)(-1))
308 msg_Err (p_dec
, "cannot convert from %s: %s", encoding
,
309 vlc_strerror_c(errno
));
313 p_sys
->i_align
= var_InheritInteger( p_dec
, "subsdec-align" );
318 /****************************************************************************
319 * DecodeBlock: the whole thing
320 ****************************************************************************
321 * This function must be fed with complete subtitles units.
322 ****************************************************************************/
323 static int DecodeBlock( decoder_t
*p_dec
, block_t
*p_block
)
327 if( p_block
== NULL
) /* No Drain */
328 return VLCDEC_SUCCESS
;
330 if( p_block
->i_flags
& BLOCK_FLAG_CORRUPTED
)
332 block_Release( p_block
);
333 return VLCDEC_SUCCESS
;
336 p_spu
= ParseText( p_dec
, p_block
);
338 block_Release( p_block
);
340 decoder_QueueSub( p_dec
, p_spu
);
341 return VLCDEC_SUCCESS
;
344 /*****************************************************************************
345 * CloseDecoder: clean up the decoder
346 *****************************************************************************/
347 static void CloseDecoder( vlc_object_t
*p_this
)
349 decoder_t
*p_dec
= (decoder_t
*)p_this
;
350 decoder_sys_t
*p_sys
= p_dec
->p_sys
;
352 if( p_sys
->iconv_handle
!= (vlc_iconv_t
)-1 )
353 vlc_iconv_close( p_sys
->iconv_handle
);
358 /*****************************************************************************
359 * ParseText: parse an text subtitle packet and send it to the video output
360 *****************************************************************************/
361 static subpicture_t
*ParseText( decoder_t
*p_dec
, block_t
*p_block
)
363 decoder_sys_t
*p_sys
= p_dec
->p_sys
;
364 subpicture_t
*p_spu
= NULL
;
366 if( p_block
->i_flags
& BLOCK_FLAG_CORRUPTED
)
369 /* We cannot display a subpicture with no date */
370 if( p_block
->i_pts
== VLC_TICK_INVALID
)
372 msg_Warn( p_dec
, "subtitle without a date" );
376 /* Check validity of packet data */
377 /* An "empty" line containing only \0 can be used to force
378 and ephemer picture from the screen */
379 if( p_block
->i_buffer
< 1 )
381 msg_Warn( p_dec
, "no subtitle data" );
385 char *psz_subtitle
= NULL
;
387 /* Should be resiliant against bad subtitles */
388 if( p_sys
->iconv_handle
== (vlc_iconv_t
)-1 ||
389 p_sys
->b_autodetect_utf8
)
391 psz_subtitle
= malloc( p_block
->i_buffer
+ 1 );
392 if( psz_subtitle
== NULL
)
394 memcpy( psz_subtitle
, p_block
->p_buffer
, p_block
->i_buffer
);
395 psz_subtitle
[p_block
->i_buffer
] = '\0';
398 if( p_sys
->iconv_handle
== (vlc_iconv_t
)-1 )
400 if (EnsureUTF8( psz_subtitle
) == NULL
)
402 msg_Err( p_dec
, "failed to convert subtitle encoding.\n"
403 "Try manually setting a character-encoding "
404 "before you open the file." );
409 if( p_sys
->b_autodetect_utf8
)
411 if( IsUTF8( psz_subtitle
) == NULL
)
413 msg_Dbg( p_dec
, "invalid UTF-8 sequence: "
414 "disabling UTF-8 subtitles autodetection" );
415 p_sys
->b_autodetect_utf8
= false;
419 if( !p_sys
->b_autodetect_utf8
)
421 size_t inbytes_left
= p_block
->i_buffer
;
422 size_t outbytes_left
= 6 * inbytes_left
;
423 char *psz_new_subtitle
= xmalloc( outbytes_left
+ 1 );
424 char *psz_convert_buffer_out
= psz_new_subtitle
;
425 const char *psz_convert_buffer_in
=
426 psz_subtitle
? psz_subtitle
: (char *)p_block
->p_buffer
;
428 size_t ret
= vlc_iconv( p_sys
->iconv_handle
,
429 &psz_convert_buffer_in
, &inbytes_left
,
430 &psz_convert_buffer_out
, &outbytes_left
);
432 *psz_convert_buffer_out
++ = '\0';
433 free( psz_subtitle
);
435 if( ( ret
== (size_t)(-1) ) || inbytes_left
)
437 free( psz_new_subtitle
);
438 msg_Err( p_dec
, "failed to convert subtitle encoding.\n"
439 "Try manually setting a character-encoding "
440 "before you open the file." );
444 psz_subtitle
= realloc( psz_new_subtitle
,
445 psz_convert_buffer_out
- psz_new_subtitle
);
447 psz_subtitle
= psz_new_subtitle
;
451 /* Create the subpicture unit */
452 p_spu
= decoder_NewSubpictureText( p_dec
);
455 free( psz_subtitle
);
458 p_spu
->i_start
= p_block
->i_pts
;
459 p_spu
->i_stop
= p_block
->i_pts
+ p_block
->i_length
;
460 p_spu
->b_ephemer
= (p_block
->i_length
== VLC_TICK_INVALID
);
461 p_spu
->b_absolute
= false;
463 subtext_updater_sys_t
*p_spu_sys
= p_spu
->updater
.p_sys
;
465 int i_inline_align
= -1;
466 p_spu_sys
->region
.p_segments
= ParseSubtitles( &i_inline_align
, psz_subtitle
);
467 free( psz_subtitle
);
468 if( p_sys
->i_align
>= 0 ) /* bottom ; left, right or centered */
470 p_spu_sys
->region
.align
= SUBPICTURE_ALIGN_BOTTOM
| p_sys
->i_align
;
471 p_spu_sys
->region
.inner_align
= p_sys
->i_align
;
473 else if( i_inline_align
>= 0 )
475 p_spu_sys
->region
.align
= i_inline_align
;
476 p_spu_sys
->region
.inner_align
= i_inline_align
;
478 else /* default, bottom ; centered */
480 p_spu_sys
->region
.align
= SUBPICTURE_ALIGN_BOTTOM
;
481 p_spu_sys
->region
.inner_align
= 0;
487 static bool AppendCharacter( text_segment_t
* p_segment
, char c
)
490 if ( asprintf( &tmp
, "%s%c", p_segment
->psz_text
? p_segment
->psz_text
: "", c
) < 0 )
492 free( p_segment
->psz_text
);
493 p_segment
->psz_text
= tmp
;
497 static bool AppendString( text_segment_t
* p_segment
, const char* psz_str
)
500 if ( asprintf( &tmp
, "%s%s", p_segment
->psz_text
? p_segment
->psz_text
: "", psz_str
) < 0 )
502 free( p_segment
->psz_text
);
503 p_segment
->psz_text
= tmp
;
507 static char* ConsumeAttribute( const char** ppsz_subtitle
, char** ppsz_attribute_value
)
509 const char* psz_subtitle
= *ppsz_subtitle
;
510 char* psz_attribute_name
;
511 *ppsz_attribute_value
= NULL
;
513 while (*psz_subtitle
== ' ')
519 while ( *psz_subtitle
&& isalpha( *psz_subtitle
) )
524 if ( !*psz_subtitle
|| attr_len
== 0 )
526 psz_attribute_name
= malloc( attr_len
+ 1 );
527 if ( unlikely( !psz_attribute_name
) )
529 strncpy( psz_attribute_name
, psz_subtitle
- attr_len
, attr_len
);
530 psz_attribute_name
[attr_len
] = 0;
532 // Skip over to the attribute value
533 while ( *psz_subtitle
&& *psz_subtitle
!= '=' )
535 if ( !*psz_subtitle
)
537 *ppsz_subtitle
= psz_subtitle
;
538 return psz_attribute_name
;
543 // Aknoledge the delimiter if any
544 while ( *psz_subtitle
&& isspace( *psz_subtitle
) )
547 if ( *psz_subtitle
== '\'' || *psz_subtitle
== '"' )
549 // Save the delimiter and skip it
550 delimiter
= *psz_subtitle
;
556 // Skip spaces, just in case
557 while ( *psz_subtitle
&& isspace( *psz_subtitle
) )
561 while ( *psz_subtitle
&& ( ( delimiter
!= 0 && *psz_subtitle
!= delimiter
) ||
562 ( delimiter
== 0 && ( !isspace(*psz_subtitle
) && *psz_subtitle
!= '>' ) ) ) )
569 *ppsz_subtitle
= psz_subtitle
;
570 return psz_attribute_name
;
572 if ( unlikely( !( *ppsz_attribute_value
= malloc( attr_len
+ 1 ) ) ) )
574 free( psz_attribute_name
);
577 strncpy( *ppsz_attribute_value
, psz_subtitle
- attr_len
, attr_len
);
578 (*ppsz_attribute_value
)[attr_len
] = 0;
579 // Finally, skip over the final delimiter
580 if (delimiter
!= 0 && *psz_subtitle
)
582 *ppsz_subtitle
= psz_subtitle
;
583 return psz_attribute_name
;
586 // Returns the next tag and consume the string up to after the tag name, or
587 // returns NULL and doesn't advance if the angle bracket was not a tag opening
588 // For instance, if psz_subtitle == "<some_tag attribute=value>"
589 // GetTag will return "some_tag", and will advance up to the first 'a' in "attribute"
590 // The returned value must be freed.
591 static char* GetTag( const char** ppsz_subtitle
, bool b_closing
)
593 const char* psz_subtitle
= *ppsz_subtitle
;
594 if ( *psz_subtitle
!= '<' )
598 if ( b_closing
&& *psz_subtitle
== '/' )
600 // Skip potential spaces
601 while ( *psz_subtitle
== ' ' )
603 // Now we need to verify if what comes next is a valid tag:
604 if ( !isalpha( *psz_subtitle
) )
607 while ( isalnum( psz_subtitle
[tag_size
] ) || psz_subtitle
[tag_size
] == '_' )
609 char* psz_tagname
= vlc_alloc( tag_size
+ 1, sizeof( *psz_tagname
) );
610 if ( unlikely( !psz_tagname
) )
612 strncpy( psz_tagname
, psz_subtitle
, tag_size
);
613 psz_tagname
[tag_size
] = 0;
614 psz_subtitle
+= tag_size
;
615 *ppsz_subtitle
= psz_subtitle
;
619 static bool IsClosed( const char* psz_subtitle
, const char* psz_tagname
)
621 const char* psz_tagpos
= strcasestr( psz_subtitle
, psz_tagname
);
624 // Search for '</' and '>' immediatly before & after (minding the potential spaces)
625 const char* psz_endtag
= psz_tagpos
+ strlen( psz_tagname
);
626 while ( *psz_endtag
== ' ' )
628 if ( *psz_endtag
!= '>' )
630 // Skip back before the tag itself
632 while ( *psz_tagpos
== ' ' && psz_tagpos
> psz_subtitle
)
634 if ( *psz_tagpos
-- != '/' )
636 if ( *psz_tagpos
!= '<' )
641 typedef struct tag_stack tag_stack_t
;
648 static void AppendTag( tag_stack_t
**pp_stack
, char* psz_tagname
)
650 tag_stack_t
* p_elem
= malloc( sizeof( *p_elem
) );
651 if ( unlikely( !p_elem
) )
653 p_elem
->p_next
= *pp_stack
;
654 p_elem
->psz_tagname
= psz_tagname
;
658 static bool HasTag( tag_stack_t
**pp_stack
, const char* psz_tagname
)
660 tag_stack_t
*p_prev
= NULL
;
661 for ( tag_stack_t
* p_current
= *pp_stack
; p_current
; p_current
= p_current
->p_next
)
663 if ( !strcasecmp( psz_tagname
, p_current
->psz_tagname
) )
665 if ( p_current
== *pp_stack
)
667 *pp_stack
= p_current
->p_next
;
671 p_prev
->p_next
= p_current
->p_next
;
673 free( p_current
->psz_tagname
);
683 * mini style stack implementation
685 typedef struct style_stack style_stack_t
;
688 text_style_t
* p_style
;
689 style_stack_t
* p_next
;
692 static text_style_t
* DuplicateAndPushStyle(style_stack_t
** pp_stack
)
694 text_style_t
* p_dup
= ( *pp_stack
) ? text_style_Duplicate( (*pp_stack
)->p_style
) : text_style_Create( STYLE_NO_DEFAULTS
);
695 if ( unlikely( !p_dup
) )
697 style_stack_t
* p_entry
= malloc( sizeof( *p_entry
) );
698 if ( unlikely( !p_entry
) )
700 text_style_Delete( p_dup
);
703 // Give the style ownership to the segment.
704 p_entry
->p_style
= p_dup
;
705 p_entry
->p_next
= *pp_stack
;
710 static void PopStyle(style_stack_t
** pp_stack
)
712 style_stack_t
* p_old
= *pp_stack
;
715 *pp_stack
= p_old
->p_next
;
716 // Don't free the style, it is now owned by the text_segment_t
720 static text_segment_t
* NewTextSegmentPushStyle( text_segment_t
* p_segment
, style_stack_t
** pp_stack
)
722 text_segment_t
* p_new
= text_segment_New( NULL
);
723 if ( unlikely( p_new
== NULL
) )
725 text_style_t
* p_style
= DuplicateAndPushStyle( pp_stack
);
726 p_new
->style
= p_style
;
727 p_segment
->p_next
= p_new
;
731 static text_segment_t
* NewTextSegmentPopStyle( text_segment_t
* p_segment
, style_stack_t
** pp_stack
)
733 text_segment_t
* p_new
= text_segment_New( NULL
);
734 if ( unlikely( p_new
== NULL
) )
736 // We shouldn't have an empty stack since this happens when closing a tag,
737 // but better be safe than sorry if (/when) we encounter a broken subtitle file.
738 PopStyle( pp_stack
);
739 text_style_t
* p_dup
= ( *pp_stack
) ? text_style_Duplicate( (*pp_stack
)->p_style
) : text_style_Create( STYLE_NO_DEFAULTS
);
740 p_new
->style
= p_dup
;
741 p_segment
->p_next
= p_new
;
745 static text_segment_t
* ParseSubtitles( int *pi_align
, const char *psz_subtitle
)
747 text_segment_t
* p_segment
;
748 text_segment_t
* p_first_segment
;
749 style_stack_t
* p_stack
= NULL
;
750 tag_stack_t
* p_tag_stack
= NULL
;
752 //FIXME: Remove initial allocation? Might make the below code more complicated
753 p_first_segment
= p_segment
= text_segment_New( "" );
758 while( *psz_subtitle
)
760 /* HTML extensions */
761 if( *psz_subtitle
== '<' )
763 char *psz_tagname
= GetTag( &psz_subtitle
, false );
764 if ( psz_tagname
!= NULL
)
766 if( !strcasecmp( psz_tagname
, "br" ) )
768 if ( !AppendCharacter( p_segment
, '\n' ) )
774 else if( !strcasecmp( psz_tagname
, "b" ) )
776 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
777 p_segment
->style
->i_style_flags
|= STYLE_BOLD
;
778 p_segment
->style
->i_features
|= STYLE_HAS_FLAGS
;
780 else if( !strcasecmp( psz_tagname
, "i" ) )
782 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
783 p_segment
->style
->i_style_flags
|= STYLE_ITALIC
;
784 p_segment
->style
->i_features
|= STYLE_HAS_FLAGS
;
786 else if( !strcasecmp( psz_tagname
, "u" ) )
788 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
789 p_segment
->style
->i_style_flags
|= STYLE_UNDERLINE
;
790 p_segment
->style
->i_features
|= STYLE_HAS_FLAGS
;
792 else if( !strcasecmp( psz_tagname
, "s" ) )
794 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
795 p_segment
->style
->i_style_flags
|= STYLE_STRIKEOUT
;
796 p_segment
->style
->i_features
|= STYLE_HAS_FLAGS
;
798 else if( !strcasecmp( psz_tagname
, "font" ) )
800 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
802 char* psz_attribute_name
;
803 char* psz_attribute_value
;
805 while( ( psz_attribute_name
= ConsumeAttribute( &psz_subtitle
, &psz_attribute_value
) ) )
807 if ( !psz_attribute_value
)
809 free( psz_attribute_name
);
812 if ( !strcasecmp( psz_attribute_name
, "face" ) )
814 free(p_segment
->style
->psz_fontname
);
815 p_segment
->style
->psz_fontname
= psz_attribute_value
;
816 // We don't want to free the attribute value since it has become our fontname
817 psz_attribute_value
= NULL
;
819 else if ( !strcasecmp( psz_attribute_name
, "family" ) )
821 free(p_segment
->style
->psz_monofontname
);
822 p_segment
->style
->psz_monofontname
= psz_attribute_value
;
823 psz_attribute_value
= NULL
;
825 else if ( !strcasecmp( psz_attribute_name
, "size" ) )
827 int size
= atoi( psz_attribute_value
);
830 p_segment
->style
->i_font_size
= size
;
831 p_segment
->style
->f_font_relsize
= STYLE_DEFAULT_REL_FONT_SIZE
*
832 STYLE_DEFAULT_FONT_SIZE
/ p_segment
->style
->i_font_size
;
835 else if ( !strcasecmp( psz_attribute_name
, "color" ) )
837 p_segment
->style
->i_font_color
= vlc_html_color( psz_attribute_value
, NULL
);
838 p_segment
->style
->i_features
|= STYLE_HAS_FONT_COLOR
;
840 else if ( !strcasecmp( psz_attribute_name
, "outline-color" ) )
842 p_segment
->style
->i_outline_color
= vlc_html_color( psz_attribute_value
, NULL
);
843 p_segment
->style
->i_features
|= STYLE_HAS_OUTLINE_COLOR
;
845 else if ( !strcasecmp( psz_attribute_name
, "shadow-color" ) )
847 p_segment
->style
->i_shadow_color
= vlc_html_color( psz_attribute_value
, NULL
);
848 p_segment
->style
->i_features
|= STYLE_HAS_SHADOW_COLOR
;
850 else if ( !strcasecmp( psz_attribute_name
, "outline-level" ) )
852 p_segment
->style
->i_outline_width
= atoi( psz_attribute_value
);
854 else if ( !strcasecmp( psz_attribute_name
, "shadow-level" ) )
856 p_segment
->style
->i_shadow_width
= atoi( psz_attribute_value
);
858 else if ( !strcasecmp( psz_attribute_name
, "back-color" ) )
860 p_segment
->style
->i_background_color
= vlc_html_color( psz_attribute_value
, NULL
);
861 p_segment
->style
->i_features
|= STYLE_HAS_BACKGROUND_COLOR
;
863 else if ( !strcasecmp( psz_attribute_name
, "alpha" ) )
865 p_segment
->style
->i_font_alpha
= atoi( psz_attribute_value
);
866 p_segment
->style
->i_features
|= STYLE_HAS_FONT_ALPHA
;
869 free( psz_attribute_name
);
870 free( psz_attribute_value
);
875 // This is an unknown tag. We need to hide it if it's properly closed, and display it otherwise
876 if ( !IsClosed( psz_subtitle
, psz_tagname
) )
878 AppendCharacter( p_segment
, '<' );
879 AppendString( p_segment
, psz_tagname
);
880 AppendCharacter( p_segment
, '>' );
884 AppendTag( &p_tag_stack
, psz_tagname
);
885 // We don't want to free the tagname now, it will be freed when the tag
886 // gets poped from the stack.
889 // In any case, fall through and skip to the closing tag.
891 // Skip potential spaces & end tag
892 while ( *psz_subtitle
&& *psz_subtitle
!= '>' )
894 if ( *psz_subtitle
== '>' )
899 else if( !strncmp( psz_subtitle
, "</", 2 ))
901 char* psz_tagname
= GetTag( &psz_subtitle
, true );
902 if ( psz_tagname
!= NULL
)
904 if ( !strcasecmp( psz_tagname
, "b" ) ||
905 !strcasecmp( psz_tagname
, "i" ) ||
906 !strcasecmp( psz_tagname
, "u" ) ||
907 !strcasecmp( psz_tagname
, "s" ) ||
908 !strcasecmp( psz_tagname
, "font" ) )
910 // A closing tag for one of the tags we handle, meaning
911 // we pushed a style onto the stack earlier
912 p_segment
= NewTextSegmentPopStyle( p_segment
, &p_stack
);
916 // Unknown closing tag. If it is closing an unknown tag, ignore it. Otherwise, display it
917 if ( !HasTag( &p_tag_stack
, psz_tagname
) )
919 AppendString( p_segment
, "</" );
920 AppendString( p_segment
, psz_tagname
);
921 AppendCharacter( p_segment
, '>' );
924 while ( *psz_subtitle
== ' ' )
926 if ( *psz_subtitle
== '>' )
933 * This doesn't appear to be a valid tag closing syntax.
934 * Simply append the text
936 AppendString( p_segment
, "</" );
942 /* We have an unknown tag, just append it, and move on.
943 * The rest of the string won't be recognized as a tag, and
944 * we will ignore unknown closing tag
946 AppendCharacter( p_segment
, '<' );
951 else if( psz_subtitle
[0] == '{' && psz_subtitle
[1] == '\\' &&
952 strchr( psz_subtitle
, '}' ) )
954 /* Check for forced alignment */
956 !strncmp( psz_subtitle
, "{\\an", 4 ) && psz_subtitle
[4] >= '1' && psz_subtitle
[4] <= '9' && psz_subtitle
[5] == '}' )
958 static const int pi_vertical
[3] = { SUBPICTURE_ALIGN_BOTTOM
, 0, SUBPICTURE_ALIGN_TOP
};
959 static const int pi_horizontal
[3] = { SUBPICTURE_ALIGN_LEFT
, 0, SUBPICTURE_ALIGN_RIGHT
};
960 const int i_id
= psz_subtitle
[4] - '1';
962 *pi_align
= pi_vertical
[i_id
/3] | pi_horizontal
[i_id
%3];
964 /* TODO fr -> rotation */
966 /* Hide {\stupidity} */
967 psz_subtitle
= strchr( psz_subtitle
, '}' ) + 1;
969 /* MicroDVD extensions */
971 * - Currently, we don't do difference between X and x, and we should:
972 * Capital Letters applies to the whole text and not one line
973 * - We don't support Position and Coordinates
974 * - We don't support the DEFAULT flag (HEADER)
977 else if( psz_subtitle
[0] == '{' && psz_subtitle
[1] != 0 &&
978 psz_subtitle
[2] == ':' && strchr( &psz_subtitle
[2], '}' ) )
980 const char *psz_tag_end
= strchr( &psz_subtitle
[2], '}' );
981 size_t i_len
= psz_tag_end
- &psz_subtitle
[3];
983 if( psz_subtitle
[1] == 'Y' || psz_subtitle
[1] == 'y' )
985 if( psz_subtitle
[3] == 'i' )
987 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
988 p_segment
->style
->i_style_flags
|= STYLE_ITALIC
;
989 p_segment
->style
->i_features
|= STYLE_HAS_FLAGS
;
992 if( psz_subtitle
[3] == 'b' )
994 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
995 p_segment
->style
->i_style_flags
|= STYLE_BOLD
;
996 p_segment
->style
->i_features
|= STYLE_HAS_FLAGS
;
999 if( psz_subtitle
[3] == 'u' )
1001 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
1002 p_segment
->style
->i_style_flags
|= STYLE_UNDERLINE
;
1003 p_segment
->style
->i_features
|= STYLE_HAS_FLAGS
;
1007 else if( (psz_subtitle
[1] == 'C' || psz_subtitle
[1] == 'c' )
1008 && psz_subtitle
[3] == '$' && i_len
>= 7 )
1010 /* Yes, they use BBGGRR, instead of RRGGBB */
1012 psz_color
[0] = psz_subtitle
[8]; psz_color
[1] = psz_subtitle
[9];
1013 psz_color
[2] = psz_subtitle
[6]; psz_color
[3] = psz_subtitle
[7];
1014 psz_color
[4] = psz_subtitle
[4]; psz_color
[5] = psz_subtitle
[5];
1015 psz_color
[6] = '\0';
1016 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
1017 p_segment
->style
->i_font_color
= vlc_html_color( psz_color
, NULL
);
1018 p_segment
->style
->i_features
|= STYLE_HAS_FONT_COLOR
;
1020 else if( psz_subtitle
[1] == 'F' || psz_subtitle
[1] == 'f' )
1022 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
1023 free(p_segment
->style
->psz_fontname
);
1024 p_segment
->style
->psz_fontname
= strndup( &psz_subtitle
[3], i_len
);
1026 else if( psz_subtitle
[1] == 'S' || psz_subtitle
[1] == 's' )
1028 int size
= atoi( &psz_subtitle
[3] );
1031 p_segment
= NewTextSegmentPushStyle( p_segment
, &p_stack
);
1032 p_segment
->style
->i_font_size
= size
;
1033 p_segment
->style
->f_font_relsize
= STYLE_DEFAULT_REL_FONT_SIZE
*
1034 STYLE_DEFAULT_FONT_SIZE
/ p_segment
->style
->i_font_size
;
1038 /* Currently unsupported since we don't have access to the i_align flag here
1039 else if( psz_subtitle[1] == 'P' )
1041 if( psz_subtitle[3] == "1" )
1042 i_align = SUBPICTURE_ALIGN_TOP;
1043 else if( psz_subtitle[3] == "0" )
1044 i_align = SUBPICTURE_ALIGN_BOTTOM;
1046 // Hide other {x:y} atrocities, notably {o:x}
1047 psz_subtitle
= psz_tag_end
+ 1;
1051 if( *psz_subtitle
== '\n' || !strncasecmp( psz_subtitle
, "\\n", 2 ) )
1053 if ( !AppendCharacter( p_segment
, '\n' ) )
1055 if ( *psz_subtitle
== '\n' )
1060 else if( !strncasecmp( psz_subtitle
, "\\h", 2 ) )
1062 if ( !AppendString( p_segment
, "\xC2\xA0" ) )
1068 //FIXME: Highly inneficient
1069 AppendCharacter( p_segment
, *psz_subtitle
);
1075 PopStyle( &p_stack
);
1076 while ( p_tag_stack
)
1078 tag_stack_t
*p_tag
= p_tag_stack
;
1079 p_tag_stack
= p_tag_stack
->p_next
;
1080 free( p_tag
->psz_tagname
);
1084 return p_first_segment
;
1087 text_segment_ChainDelete( p_first_segment
);