3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
11 * Converts SSA subtitles to either:
12 * (1) TEXTSUB format: UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>), or
13 * (2) PICTURESUB format, using libass.
15 * SSA format references:
16 * http://www.matroska.org/technical/specs/subtitles/ssa.html
17 * http://moodub.free.fr/video/ass-specs.doc
18 * vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
22 * vlc-1.0.4/modules/codec/libass.c
24 * @author David Foster (davidfstr)
32 #include "decssasub.h"
35 struct hb_work_private_s
37 // If decoding to PICTURESUB format:
43 #define SSA_2_HB_TIME(hr,min,sec,centi) \
44 ( 90L * ( hr * 1000L * 60 * 60 +\
49 #define SSA_VERBOSE_PACKETS 0
51 static int ssa_update_style(char *ssa
, hb_subtitle_style_t
*style
)
59 while (ssa
[pos
] != '}' && ssa
[pos
] != '\0')
63 // Skip any malformed markup junk
64 while (strchr("\\}", ssa
[pos
]) == NULL
) pos
++;
66 // Check for an index that is in some markup (e.g. font color)
67 if (isdigit(ssa
[pos
]))
69 index
= ssa
[pos
++] - 0x30;
71 // Find the end of this markup clause
73 while (strchr("\\}", ssa
[end
]) == NULL
) end
++;
74 // Handle simple integer valued attributes
75 if (strchr("ibu", ssa
[pos
]) != NULL
&& isdigit(ssa
[pos
+1]))
77 int val
= strtol(ssa
+ pos
+ 1, NULL
, 0);
81 style
->flags
= (style
->flags
& ~HB_STYLE_FLAG_ITALIC
) |
82 !!val
* HB_STYLE_FLAG_ITALIC
;
85 style
->flags
= (style
->flags
& ~HB_STYLE_FLAG_BOLD
) |
86 !!val
* HB_STYLE_FLAG_BOLD
;
89 style
->flags
= (style
->flags
& ~HB_STYLE_FLAG_UNDERLINE
) |
90 !!val
* HB_STYLE_FLAG_UNDERLINE
;
94 if (ssa
[pos
] == 'c' && ssa
[pos
+1] == '&' && ssa
[pos
+2] == 'H')
100 bgr
= strtol(ssa
+ pos
+ 3, &endptr
, 16);
107 style
->fg_rgb
= HB_BGR_TO_RGB(bgr
);
110 style
->alt_rgb
= HB_BGR_TO_RGB(bgr
);
113 style
->ol_rgb
= HB_BGR_TO_RGB(bgr
);
116 style
->bg_rgb
= HB_BGR_TO_RGB(bgr
);
119 // Unknown color index, ignore
124 if ((ssa
[pos
] == 'a' && ssa
[pos
+1] == '&' && ssa
[pos
+2] == 'H') ||
125 (!strcmp(ssa
+pos
, "alpha") && ssa
[pos
+5] == '&' && ssa
[pos
+6] == 'H'))
135 alpha
= strtol(ssa
+ pos
+ alpha_pos
, &endptr
, 16);
138 // SSA alpha is inverted 0 is opaque
144 style
->fg_alpha
= alpha
;
147 style
->alt_alpha
= alpha
;
150 style
->ol_alpha
= alpha
;
153 style
->bg_alpha
= alpha
;
156 // Unknown alpha index, ignore
168 char * hb_ssa_to_text(char *in
, int *consumed
, hb_subtitle_style_t
*style
)
173 char *out
= malloc(strlen(in
) + 1); // out will never be longer than in
175 for (in_pos
= 0; in
[in_pos
] != '\0'; in_pos
++)
177 if ((markup_len
= ssa_update_style(in
+ in_pos
, style
)))
179 *consumed
= in_pos
+ markup_len
;
180 out
[out_pos
++] = '\0';
183 // Check escape codes
184 if (in
[in_pos
] == '\\')
194 out
[out_pos
++] = '\n';
197 out
[out_pos
++] = ' ';
200 out
[out_pos
++] = in
[in_pos
];
206 out
[out_pos
++] = in
[in_pos
];
210 out
[out_pos
++] = '\0';
214 void hb_ssa_style_init(hb_subtitle_style_t
*style
)
218 style
->fg_rgb
= 0x00FFFFFF;
219 style
->alt_rgb
= 0x00FFFFFF;
220 style
->ol_rgb
= 0x000F0F0F;
221 style
->bg_rgb
= 0x000F0F0F;
223 style
->fg_alpha
= 0xFF;
224 style
->alt_alpha
= 0xFF;
225 style
->ol_alpha
= 0xFF;
226 style
->bg_alpha
= 0xFF;
229 static hb_buffer_t
*ssa_decode_line_to_mkv_ssa( hb_work_object_t
* w
, uint8_t *in_data
, int in_size
, int in_sequence
);
232 * Decodes a single SSA packet to one or more TEXTSUB or PICTURESUB subtitle packets.
235 * ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) +
236 * 1 2 3 4 5 6 7 8 9 10
238 static hb_buffer_t
*ssa_decode_packet( hb_work_object_t
* w
, hb_buffer_t
*in
)
240 // Store NULL after the end of the buffer to make using string processing safe
241 hb_buffer_realloc(in
, ++in
->size
);
242 in
->data
[in
->size
- 1] = '\0';
244 hb_buffer_t
*out_list
= NULL
;
245 hb_buffer_t
**nextPtr
= &out_list
;
247 const char *EOL
= "\r\n";
248 char *curLine
, *curLine_parserData
;
249 for ( curLine
= strtok_r( (char *) in
->data
, EOL
, &curLine_parserData
);
251 curLine
= strtok_r( NULL
, EOL
, &curLine_parserData
) )
253 // Skip empty lines and spaces between adjacent CR and LF
254 if (curLine
[0] == '\0')
257 // Decode an individual SSA line
259 out
= ssa_decode_line_to_mkv_ssa(w
, (uint8_t *)curLine
, strlen(curLine
), in
->sequence
);
263 // Append 'out' to 'out_list'
265 nextPtr
= &out
->next
;
268 // For point-to-point encoding, when the start time of the stream
269 // may be offset, the timestamps of the subtitles must be offset as well.
271 // HACK: Here we are making the assumption that, under normal circumstances,
272 // the output display time of the first output packet is equal to the
273 // display time of the input packet.
275 // During point-to-point encoding, the display time of the input
276 // packet will be offset to compensate.
278 // Therefore we offset all of the output packets by a slip amount
279 // such that first output packet's display time aligns with the
280 // input packet's display time. This should give the correct time
281 // when point-to-point encoding is in effect.
282 if (out_list
&& out_list
->s
.start
> in
->s
.start
)
284 int64_t slip
= out_list
->s
.start
- in
->s
.start
;
290 out
->s
.start
-= slip
;
300 * Parses the start and stop time from the specified SSA packet.
302 * Returns true if parsing failed; false otherwise.
304 static int parse_timing_from_ssa_packet( char *in_data
, int64_t *in_start
, int64_t *in_stop
)
307 * Parse Start and End fields for timing information
309 int start_hr
, start_min
, start_sec
, start_centi
;
310 int end_hr
, end_min
, end_sec
, end_centi
;
311 // SSA subtitles have an empty layer field (bare ','). The scanf
312 // format specifier "%*128[^,]" will not match on a bare ','. There
313 // must be at least one non ',' character in the match. So the format
314 // specifier is placed directly next to the ':' so that the next
315 // expected ' ' after the ':' will be the character it matches on
316 // when there is no layer field.
317 int numPartsRead
= sscanf( (char *) in_data
, "Dialogue:%*128[^,],"
318 "%d:%d:%d.%d," // Start
319 "%d:%d:%d.%d,", // End
320 &start_hr
, &start_min
, &start_sec
, &start_centi
,
321 &end_hr
, &end_min
, &end_sec
, &end_centi
);
322 if ( numPartsRead
!= 8 )
325 *in_start
= SSA_2_HB_TIME(start_hr
, start_min
, start_sec
, start_centi
);
326 *in_stop
= SSA_2_HB_TIME( end_hr
, end_min
, end_sec
, end_centi
);
331 static uint8_t *find_field( uint8_t *pos
, uint8_t *end
, int fieldNum
)
339 if ( curFieldID
== fieldNum
)
348 * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
349 * 1 2 3 4 5 6 7 8 9 10
351 * MKV-SSA packet format:
352 * ReadOrder,Marked, Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
355 static hb_buffer_t
*ssa_decode_line_to_mkv_ssa( hb_work_object_t
* w
, uint8_t *in_data
, int in_size
, int in_sequence
)
357 hb_work_private_t
* pv
= w
->private_data
;
360 // Parse values for in->s.start and in->s.stop
361 int64_t in_start
, in_stop
;
362 if ( parse_timing_from_ssa_packet( (char *) in_data
, &in_start
, &in_stop
) )
365 // Convert the SSA packet to MKV-SSA format, which is what libass expects
368 char *styleToTextFields
;
369 char *layerField
= malloc( in_size
);
371 // SSA subtitles have an empty layer field (bare ','). The scanf
372 // format specifier "%*128[^,]" will not match on a bare ','. There
373 // must be at least one non ',' character in the match. So the format
374 // specifier is placed directly next to the ':' so that the next
375 // expected ' ' after the ':' will be the character it matches on
376 // when there is no layer field.
377 numPartsRead
= sscanf( (char *)in_data
, "Dialogue:%128[^,],", layerField
);
378 if ( numPartsRead
!= 1 )
381 styleToTextFields
= (char *)find_field( in_data
, in_data
+ in_size
, 4 );
382 if ( styleToTextFields
== NULL
) {
387 // The sscanf conversion above will result in an extra space
388 // before the layerField. Strip the space.
389 char *stripLayerField
= layerField
;
390 for(; *stripLayerField
== ' '; stripLayerField
++);
392 out
= hb_buffer_init( in_size
+ 1 );
393 mkvIn
= (char*)out
->data
;
396 sprintf(mkvIn
, "%d", pv
->readOrder
++); // ReadOrder: make this up
397 strcat( mkvIn
, "," );
398 strcat( mkvIn
, stripLayerField
);
399 strcat( mkvIn
, "," );
400 strcat( mkvIn
, (char *)styleToTextFields
);
402 out
->size
= strlen(mkvIn
) + 1;
403 out
->s
.frametype
= HB_FRAME_SUBTITLE
;
404 out
->s
.start
= in_start
;
405 out
->s
.stop
= in_stop
;
406 out
->sequence
= in_sequence
;
410 hb_buffer_close(&out
);
418 hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size
, in_data
);
422 static int decssaInit( hb_work_object_t
* w
, hb_job_t
* job
)
424 hb_work_private_t
* pv
;
426 pv
= calloc( 1, sizeof( hb_work_private_t
) );
427 w
->private_data
= pv
;
433 static int decssaWork( hb_work_object_t
* w
, hb_buffer_t
** buf_in
,
434 hb_buffer_t
** buf_out
)
436 hb_buffer_t
* in
= *buf_in
;
438 #if SSA_VERBOSE_PACKETS
439 printf("\nPACKET(%"PRId64
",%"PRId64
"): %.*s\n", in
->s
.start
/90, in
->s
.stop
/90, in
->size
, in
->data
);
442 if (in
->s
.flags
& HB_BUF_FLAG_EOF
)
449 *buf_out
= ssa_decode_packet(w
, in
);
454 static void decssaClose( hb_work_object_t
* w
)
456 free( w
->private_data
);
459 hb_work_object_t hb_decssasub
=
462 "SSA Subtitle Decoder",