WinGui: Fix another instance of the Caliburn vs Json.net sillyness where objects...
[HandBrake.git] / libhb / decssasub.c
blob4a974ac383e7d50340ec667d21892bb4bd45ce90
1 /* decssasub.c
3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
8 */
11 * Converts SSA subtitles to either:
12 * (1) TEXTSUB format: UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>), or
13 * (2) PICTURESUB format, using libass.
15 * SSA format references:
16 * http://www.matroska.org/technical/specs/subtitles/ssa.html
17 * http://moodub.free.fr/video/ass-specs.doc
18 * vlc-1.0.4/modules/codec/subtitles/subsass.c:ParseSSAString
20 * libass references:
21 * libass-0.9.9/ass.h
22 * vlc-1.0.4/modules/codec/libass.c
24 * @author David Foster (davidfstr)
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <ctype.h>
29 #include "hb.h"
31 #include <ass/ass.h>
32 #include "decssasub.h"
33 #include "colormap.h"
35 struct hb_work_private_s
37 // If decoding to PICTURESUB format:
38 int readOrder;
40 hb_job_t *job;
43 #define SSA_2_HB_TIME(hr,min,sec,centi) \
44 ( 90L * ( hr * 1000L * 60 * 60 +\
45 min * 1000L * 60 +\
46 sec * 1000L +\
47 centi * 10L ) )
49 #define SSA_VERBOSE_PACKETS 0
51 static int ssa_update_style(char *ssa, hb_subtitle_style_t *style)
53 int pos, end, index;
55 if (ssa[0] != '{')
56 return 0;
58 pos = 1;
59 while (ssa[pos] != '}' && ssa[pos] != '\0')
61 index = -1;
63 // Skip any malformed markup junk
64 while (strchr("\\}", ssa[pos]) == NULL) pos++;
65 pos++;
66 // Check for an index that is in some markup (e.g. font color)
67 if (isdigit(ssa[pos]))
69 index = ssa[pos++] - 0x30;
71 // Find the end of this markup clause
72 end = pos;
73 while (strchr("\\}", ssa[end]) == NULL) end++;
74 // Handle simple integer valued attributes
75 if (strchr("ibu", ssa[pos]) != NULL && isdigit(ssa[pos+1]))
77 int val = strtol(ssa + pos + 1, NULL, 0);
78 switch (ssa[pos])
80 case 'i':
81 style->flags = (style->flags & ~HB_STYLE_FLAG_ITALIC) |
82 !!val * HB_STYLE_FLAG_ITALIC;
83 break;
84 case 'b':
85 style->flags = (style->flags & ~HB_STYLE_FLAG_BOLD) |
86 !!val * HB_STYLE_FLAG_BOLD;
87 break;
88 case 'u':
89 style->flags = (style->flags & ~HB_STYLE_FLAG_UNDERLINE) |
90 !!val * HB_STYLE_FLAG_UNDERLINE;
91 break;
94 if (ssa[pos] == 'c' && ssa[pos+1] == '&' && ssa[pos+2] == 'H')
96 // Font color markup
97 char *endptr;
98 uint32_t bgr;
100 bgr = strtol(ssa + pos + 3, &endptr, 16);
101 if (*endptr == '&')
103 switch (index)
105 case -1:
106 case 1:
107 style->fg_rgb = HB_BGR_TO_RGB(bgr);
108 break;
109 case 2:
110 style->alt_rgb = HB_BGR_TO_RGB(bgr);
111 break;
112 case 3:
113 style->ol_rgb = HB_BGR_TO_RGB(bgr);
114 break;
115 case 4:
116 style->bg_rgb = HB_BGR_TO_RGB(bgr);
117 break;
118 default:
119 // Unknown color index, ignore
120 break;
124 if ((ssa[pos] == 'a' && ssa[pos+1] == '&' && ssa[pos+2] == 'H') ||
125 (!strcmp(ssa+pos, "alpha") && ssa[pos+5] == '&' && ssa[pos+6] == 'H'))
127 // Font alpha markup
128 char *endptr;
129 uint8_t alpha;
130 int alpha_pos = 3;
132 if (ssa[1] == 'l')
133 alpha_pos = 7;
135 alpha = strtol(ssa + pos + alpha_pos, &endptr, 16);
136 if (*endptr == '&')
138 // SSA alpha is inverted 0 is opaque
139 alpha = 255 - alpha;
140 switch (index)
142 case -1:
143 case 1:
144 style->fg_alpha = alpha;
145 break;
146 case 2:
147 style->alt_alpha = alpha;
148 break;
149 case 3:
150 style->ol_alpha = alpha;
151 break;
152 case 4:
153 style->bg_alpha = alpha;
154 break;
155 default:
156 // Unknown alpha index, ignore
157 break;
161 pos = end;
163 if (ssa[pos] == '}')
164 pos++;
165 return pos;
168 char * hb_ssa_to_text(char *in, int *consumed, hb_subtitle_style_t *style)
170 int markup_len = 0;
171 int in_pos = 0;
172 int out_pos = 0;
173 char *out = malloc(strlen(in) + 1); // out will never be longer than in
175 for (in_pos = 0; in[in_pos] != '\0'; in_pos++)
177 if ((markup_len = ssa_update_style(in + in_pos, style)))
179 *consumed = in_pos + markup_len;
180 out[out_pos++] = '\0';
181 return out;
183 // Check escape codes
184 if (in[in_pos] == '\\')
186 in_pos++;
187 switch (in[in_pos])
189 case '\0':
190 in_pos--;
191 break;
192 case 'N':
193 case 'n':
194 out[out_pos++] = '\n';
195 break;
196 case 'h':
197 out[out_pos++] = ' ';
198 break;
199 default:
200 out[out_pos++] = in[in_pos];
201 break;
204 else
206 out[out_pos++] = in[in_pos];
209 *consumed = in_pos;
210 out[out_pos++] = '\0';
211 return out;
214 void hb_ssa_style_init(hb_subtitle_style_t *style)
216 style->flags = 0;
218 style->fg_rgb = 0x00FFFFFF;
219 style->alt_rgb = 0x00FFFFFF;
220 style->ol_rgb = 0x000F0F0F;
221 style->bg_rgb = 0x000F0F0F;
223 style->fg_alpha = 0xFF;
224 style->alt_alpha = 0xFF;
225 style->ol_alpha = 0xFF;
226 style->bg_alpha = 0xFF;
229 static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence );
232 * Decodes a single SSA packet to one or more TEXTSUB or PICTURESUB subtitle packets.
234 * SSA packet format:
235 * ( Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text CR LF ) +
236 * 1 2 3 4 5 6 7 8 9 10
238 static hb_buffer_t *ssa_decode_packet( hb_work_object_t * w, hb_buffer_t *in )
240 // Store NULL after the end of the buffer to make using string processing safe
241 hb_buffer_realloc(in, ++in->size);
242 in->data[in->size - 1] = '\0';
244 hb_buffer_t *out_list = NULL;
245 hb_buffer_t **nextPtr = &out_list;
247 const char *EOL = "\r\n";
248 char *curLine, *curLine_parserData;
249 for ( curLine = strtok_r( (char *) in->data, EOL, &curLine_parserData );
250 curLine;
251 curLine = strtok_r( NULL, EOL, &curLine_parserData ) )
253 // Skip empty lines and spaces between adjacent CR and LF
254 if (curLine[0] == '\0')
255 continue;
257 // Decode an individual SSA line
258 hb_buffer_t *out;
259 out = ssa_decode_line_to_mkv_ssa(w, (uint8_t *)curLine, strlen(curLine), in->sequence);
260 if ( out == NULL )
261 continue;
263 // Append 'out' to 'out_list'
264 *nextPtr = out;
265 nextPtr = &out->next;
268 // For point-to-point encoding, when the start time of the stream
269 // may be offset, the timestamps of the subtitles must be offset as well.
271 // HACK: Here we are making the assumption that, under normal circumstances,
272 // the output display time of the first output packet is equal to the
273 // display time of the input packet.
275 // During point-to-point encoding, the display time of the input
276 // packet will be offset to compensate.
278 // Therefore we offset all of the output packets by a slip amount
279 // such that first output packet's display time aligns with the
280 // input packet's display time. This should give the correct time
281 // when point-to-point encoding is in effect.
282 if (out_list && out_list->s.start > in->s.start)
284 int64_t slip = out_list->s.start - in->s.start;
285 hb_buffer_t *out;
287 out = out_list;
288 while (out)
290 out->s.start -= slip;
291 out->s.stop -= slip;
292 out = out->next;
296 return out_list;
300 * Parses the start and stop time from the specified SSA packet.
302 * Returns true if parsing failed; false otherwise.
304 static int parse_timing_from_ssa_packet( char *in_data, int64_t *in_start, int64_t *in_stop )
307 * Parse Start and End fields for timing information
309 int start_hr, start_min, start_sec, start_centi;
310 int end_hr, end_min, end_sec, end_centi;
311 // SSA subtitles have an empty layer field (bare ','). The scanf
312 // format specifier "%*128[^,]" will not match on a bare ','. There
313 // must be at least one non ',' character in the match. So the format
314 // specifier is placed directly next to the ':' so that the next
315 // expected ' ' after the ':' will be the character it matches on
316 // when there is no layer field.
317 int numPartsRead = sscanf( (char *) in_data, "Dialogue:%*128[^,],"
318 "%d:%d:%d.%d," // Start
319 "%d:%d:%d.%d,", // End
320 &start_hr, &start_min, &start_sec, &start_centi,
321 &end_hr, &end_min, &end_sec, &end_centi );
322 if ( numPartsRead != 8 )
323 return 1;
325 *in_start = SSA_2_HB_TIME(start_hr, start_min, start_sec, start_centi);
326 *in_stop = SSA_2_HB_TIME( end_hr, end_min, end_sec, end_centi);
328 return 0;
331 static uint8_t *find_field( uint8_t *pos, uint8_t *end, int fieldNum )
333 int curFieldID = 1;
334 while (pos < end)
336 if ( *pos++ == ',' )
338 curFieldID++;
339 if ( curFieldID == fieldNum )
340 return pos;
343 return NULL;
347 * SSA line format:
348 * Dialogue: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
349 * 1 2 3 4 5 6 7 8 9 10
351 * MKV-SSA packet format:
352 * ReadOrder,Marked, Style,Name,MarginL,MarginR,MarginV,Effect,Text '\0'
353 * 1 2 3 4 5 6 7 8 9
355 static hb_buffer_t *ssa_decode_line_to_mkv_ssa( hb_work_object_t * w, uint8_t *in_data, int in_size, int in_sequence )
357 hb_work_private_t * pv = w->private_data;
358 hb_buffer_t * out;
360 // Parse values for in->s.start and in->s.stop
361 int64_t in_start, in_stop;
362 if ( parse_timing_from_ssa_packet( (char *) in_data, &in_start, &in_stop ) )
363 goto fail;
365 // Convert the SSA packet to MKV-SSA format, which is what libass expects
366 char *mkvIn;
367 int numPartsRead;
368 char *styleToTextFields;
369 char *layerField = malloc( in_size );
371 // SSA subtitles have an empty layer field (bare ','). The scanf
372 // format specifier "%*128[^,]" will not match on a bare ','. There
373 // must be at least one non ',' character in the match. So the format
374 // specifier is placed directly next to the ':' so that the next
375 // expected ' ' after the ':' will be the character it matches on
376 // when there is no layer field.
377 numPartsRead = sscanf( (char *)in_data, "Dialogue:%128[^,],", layerField );
378 if ( numPartsRead != 1 )
379 goto fail;
381 styleToTextFields = (char *)find_field( in_data, in_data + in_size, 4 );
382 if ( styleToTextFields == NULL ) {
383 free( layerField );
384 goto fail;
387 // The sscanf conversion above will result in an extra space
388 // before the layerField. Strip the space.
389 char *stripLayerField = layerField;
390 for(; *stripLayerField == ' '; stripLayerField++);
392 out = hb_buffer_init( in_size + 1 );
393 mkvIn = (char*)out->data;
395 mkvIn[0] = '\0';
396 sprintf(mkvIn, "%d", pv->readOrder++); // ReadOrder: make this up
397 strcat( mkvIn, "," );
398 strcat( mkvIn, stripLayerField );
399 strcat( mkvIn, "," );
400 strcat( mkvIn, (char *)styleToTextFields );
402 out->size = strlen(mkvIn) + 1;
403 out->s.frametype = HB_FRAME_SUBTITLE;
404 out->s.start = in_start;
405 out->s.stop = in_stop;
406 out->sequence = in_sequence;
408 if( out->size == 0 )
410 hb_buffer_close(&out);
413 free( layerField );
415 return out;
417 fail:
418 hb_log( "decssasub: malformed SSA subtitle packet: %.*s\n", in_size, in_data );
419 return NULL;
422 static int decssaInit( hb_work_object_t * w, hb_job_t * job )
424 hb_work_private_t * pv;
426 pv = calloc( 1, sizeof( hb_work_private_t ) );
427 w->private_data = pv;
428 pv->job = job;
430 return 0;
433 static int decssaWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
434 hb_buffer_t ** buf_out )
436 hb_buffer_t * in = *buf_in;
438 #if SSA_VERBOSE_PACKETS
439 printf("\nPACKET(%"PRId64",%"PRId64"): %.*s\n", in->s.start/90, in->s.stop/90, in->size, in->data);
440 #endif
442 if (in->s.flags & HB_BUF_FLAG_EOF)
444 *buf_out = in;
445 *buf_in = NULL;
446 return HB_WORK_DONE;
449 *buf_out = ssa_decode_packet(w, in);
451 return HB_WORK_OK;
454 static void decssaClose( hb_work_object_t * w )
456 free( w->private_data );
459 hb_work_object_t hb_decssasub =
461 WORK_DECSSASUB,
462 "SSA Subtitle Decoder",
463 decssaInit,
464 decssaWork,
465 decssaClose