subassconvert: handle unquoted attributes in subrip font tags
[mplayer.git] / sub / subassconvert.c
blob77b27267dce66b4f28ece4212d47ef4b1353edb6
1 /*
2 * Subtitles converter to SSA/ASS in order to allow special formatting
4 * This file is part of MPlayer.
6 * MPlayer is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * MPlayer is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 #include <string.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <stdarg.h>
26 #include <stdbool.h>
28 #include "mp_msg.h"
29 #include "subassconvert.h"
30 #include "bstr.h"
31 #include "libavutil/common.h"
33 struct line {
34 char *buf;
35 int bufsize;
36 int len;
39 #ifdef __GNUC__
40 static void append_text(struct line *dst, char *fmt, ...) __attribute__ ((format(printf, 2, 3)));
41 #endif
43 static void append_text(struct line *dst, char *fmt, ...)
45 va_list va;
46 va_start(va, fmt);
47 int ret = vsnprintf(dst->buf + dst->len, dst->bufsize - dst->len, fmt, va);
48 if (ret < 0)
49 goto out;
50 dst->len += ret;
51 if (dst->len > dst->bufsize)
52 dst->len = dst->bufsize;
53 out:
54 va_end(va);
57 static int indexof(const char *s, int c)
59 char *f = strchr(s, c);
60 return f ? (f - s) : -1;
66 * SubRip
68 * Support basic tags (italic, bold, underline, strike-through)
69 * and font tag with size, color and face attributes.
73 struct font_tag {
74 int size;
75 uint32_t color;
76 struct bstr face;
77 bool has_size : 1;
78 bool has_color : 1;
79 bool has_face : 1;
82 static const struct tag_conv {
83 char *from;
84 char *to;
85 } subrip_basic_tags[] = {
86 {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
87 {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
88 {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
89 {"<s>", "{\\s1}"}, {"</s>", "{\\s0}"},
90 {"{", "\\{"}, {"}", "\\}"},
91 {"\r\n", "\\N"}, {"\n", "\\N"}, {"\r", "\\N"},
94 static const struct {
95 char *s;
96 uint32_t v;
97 } subrip_web_colors[] = {
98 /* 16 named HTML colors in BGR format */
99 {"red", 0x0000ff}, {"blue", 0xff0000}, {"lime", 0x00ff00},
100 {"aqua", 0xffff00}, {"purple", 0x800080}, {"yellow", 0x00ffff},
101 {"fuchsia", 0xff00ff}, {"white", 0xffffff}, {"gray", 0x808080},
102 {"maroon", 0x000080}, {"olive", 0x008080}, {"black", 0x000000},
103 {"silver", 0xc0c0c0}, {"teal", 0x808000}, {"green", 0x008000},
104 {"navy", 0x800000}
107 #define SUBRIP_MAX_STACKED_FONT_TAGS 16
109 /* Read the attribute value starting at *s, and skip *s past the value.
110 * Set out_value to the parsed value, with possible '"' stripped.
111 * Return whether the attribute is well formed. */
112 static bool read_value(char **s, struct bstr *out_value)
114 char term = 0;
115 if (**s == '"') {
116 term = '"';
117 (*s)++;
119 out_value->start = *s;
120 out_value->len = 0;
121 unsigned char *start = *s;
122 unsigned char *end = term ? strchr(start, term) : strpbrk(start, " >");
123 if (!end)
124 return false;
125 out_value->len = end - out_value->start;
126 *s = end + (term ? 1 : 0);
127 return true;
130 void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size)
132 /* line is not const to avoid warnings with strtol, etc.
133 * orig content won't be changed */
134 char *line = (char *)orig;
135 struct line new_line = {
136 .buf = dest,
137 .bufsize = dest_buffer_size,
139 struct font_tag font_stack[SUBRIP_MAX_STACKED_FONT_TAGS + 1];
140 font_stack[0] = (struct font_tag){0}; // type with all defaults
141 int sp = 0;
143 while (*line && new_line.len < new_line.bufsize - 1) {
144 char *orig_line = line;
146 for (int i = 0; i < FF_ARRAY_ELEMS(subrip_basic_tags); i++) {
147 const struct tag_conv *tag = &subrip_basic_tags[i];
148 int from_len = strlen(tag->from);
149 if (strncmp(line, tag->from, from_len) == 0) {
150 append_text(&new_line, "%s", tag->to);
151 line += from_len;
155 if (strncmp(line, "</font>", 7) == 0) {
156 /* Closing font tag */
157 line += 7;
159 if (sp > 0) {
160 struct font_tag *tag = &font_stack[sp];
161 struct font_tag *last_tag = &tag[-1];
162 sp--;
164 if (tag->has_size) {
165 if (!last_tag->has_size)
166 append_text(&new_line, "{\\fs}");
167 else if (last_tag->size != tag->size)
168 append_text(&new_line, "{\\fs%d}", last_tag->size);
171 if (tag->has_color) {
172 if (!last_tag->has_color)
173 append_text(&new_line, "{\\c}");
174 else if (last_tag->color != tag->color)
175 append_text(&new_line, "{\\c&H%06X&}", last_tag->color);
178 if (tag->has_face) {
179 if (!last_tag->has_face)
180 append_text(&new_line, "{\\fn}");
181 else if (bstrcmp(last_tag->face, tag->face) != 0)
182 append_text(&new_line, "{\\fn%.*s}",
183 BSTR_P(last_tag->face));
186 } else if (strncmp(line, "<font ", 6) == 0
187 && sp + 1 < FF_ARRAY_ELEMS(font_stack)) {
188 /* Opening font tag */
189 char *potential_font_tag_start = line;
190 int len_backup = new_line.len;
191 struct font_tag *tag = &font_stack[sp + 1];
192 bool has_valid_attr = false;
194 *tag = tag[-1]; // keep values from previous tag
195 line += 6;
197 while (*line && *line != '>') {
198 if (strncmp(line, "size=", 5) == 0) {
199 line += 5;
200 struct bstr val;
201 if (!read_value(&line, &val))
202 break;
203 tag->size = bstrtoll(val, &val, 10);
204 if (val.len)
205 break;
206 append_text(&new_line, "{\\fs%d}", tag->size);
207 tag->has_size = true;
208 has_valid_attr = true;
209 } else if (strncmp(line, "color=", 6) == 0) {
210 line += 6;
211 struct bstr val;
212 if (!read_value(&line, &val))
213 break;
214 if (bstr_eatstart(&val, bstr("#"))) {
215 // #RRGGBB format
216 tag->color = bstrtoll(val, &val, 16) & 0x00ffffff;
217 if (val.len)
218 break;
219 tag->color = ((tag->color & 0xff) << 16)
220 | (tag->color & 0xff00)
221 | ((tag->color & 0xff0000) >> 16);
222 } else {
223 // Standard web colors
224 for (int i = 0; i < FF_ARRAY_ELEMS(subrip_web_colors); i++) {
225 char *color = subrip_web_colors[i].s;
226 if (bstrcasecmp(val, bstr(color)) == 0) {
227 tag->color = subrip_web_colors[i].v;
228 goto foundcolor;
232 /* We didn't find any matching color */
233 mp_tmsg(MSGT_SUBREADER, MSGL_WARN,
234 "SubRip: unknown font color in subtitle: %s\n", orig);
235 append_text(&new_line, "{\\c}");
236 continue;
238 foundcolor: ;
240 append_text(&new_line, "{\\c&H%06X&}", tag->color);
241 tag->has_color = true;
242 has_valid_attr = true;
243 } else if (strncmp(line, "face=", 5) == 0) {
244 /* Font face attribute */
245 line += 5;
246 struct bstr val;
247 if (!read_value(&line, &val))
248 break;
249 tag->face = val;
250 append_text(&new_line, "{\\fn%.*s}", BSTR_P(tag->face));
251 tag->has_face = true;
252 has_valid_attr = true;
253 } else
254 line++;
257 if (!has_valid_attr || *line != '>') { /* Not valid font tag */
258 line = potential_font_tag_start;
259 new_line.len = len_backup;
260 } else {
261 sp++;
262 line++;
266 /* Tag conversion code didn't match */
267 if (line == orig_line)
268 new_line.buf[new_line.len++] = *line++;
270 new_line.buf[new_line.len] = 0;
275 * MicroDVD
277 * Based on the specifications found here:
278 * https://trac.videolan.org/vlc/ticket/1825#comment:6
281 struct microdvd_tag {
282 char key;
283 int persistent;
284 uint32_t data1;
285 uint32_t data2;
286 struct bstr data_string;
289 #define MICRODVD_PERSISTENT_OFF 0
290 #define MICRODVD_PERSISTENT_ON 1
291 #define MICRODVD_PERSISTENT_OPENED 2
293 // Color, Font, Size, cHarset, stYle, Position, cOordinate
294 #define MICRODVD_TAGS "cfshyYpo"
296 static void microdvd_set_tag(struct microdvd_tag *tags, struct microdvd_tag tag)
298 int tag_index = indexof(MICRODVD_TAGS, tag.key);
300 if (tag_index < 0)
301 return;
302 memcpy(&tags[tag_index], &tag, sizeof(tag));
305 // italic, bold, underline, strike-through
306 #define MICRODVD_STYLES "ibus"
308 static char *microdvd_load_tags(struct microdvd_tag *tags, char *s)
310 while (*s == '{') {
311 char *start = s;
312 char tag_char = *(s + 1);
313 struct microdvd_tag tag = {0};
315 if (!tag_char || *(s + 2) != ':')
316 break;
317 s += 3;
319 switch (tag_char) {
321 /* Style */
322 case 'Y':
323 tag.persistent = MICRODVD_PERSISTENT_ON;
324 case 'y':
325 while (*s && *s != '}') {
326 int style_index = indexof(MICRODVD_STYLES, *s);
328 if (style_index >= 0)
329 tag.data1 |= (1 << style_index);
330 s++;
332 if (*s != '}')
333 break;
334 /* We must distinguish persistent and non-persistent styles
335 * to handle this kind of style tags: {y:ib}{Y:us} */
336 tag.key = tag_char;
337 break;
339 /* Color */
340 case 'C':
341 tag.persistent = MICRODVD_PERSISTENT_ON;
342 case 'c':
343 tag.data1 = strtol(s, &s, 16) & 0x00ffffff;
344 if (*s != '}')
345 break;
346 tag.key = 'c';
347 break;
349 /* Font name */
350 case 'F':
351 tag.persistent = MICRODVD_PERSISTENT_ON;
352 case 'f':
354 int len = indexof(s, '}');
355 if (len < 0)
356 break;
357 tag.data_string.start = s;
358 tag.data_string.len = len;
359 s += len;
360 tag.key = 'f';
361 break;
364 /* Font size */
365 case 'S':
366 tag.persistent = MICRODVD_PERSISTENT_ON;
367 case 's':
368 tag.data1 = strtol(s, &s, 10);
369 if (*s != '}')
370 break;
371 tag.key = 's';
372 break;
374 /* Charset */
375 case 'H':
377 //TODO: not yet handled, just parsed.
378 int len = indexof(s, '}');
379 if (len < 0)
380 break;
381 tag.data_string.start = s;
382 tag.data_string.len = len;
383 s += len;
384 tag.key = 'h';
385 break;
388 /* Position */
389 case 'P':
390 tag.persistent = MICRODVD_PERSISTENT_ON;
391 tag.data1 = (*s++ == '1');
392 if (*s != '}')
393 break;
394 tag.key = 'p';
395 break;
397 /* Coordinates */
398 case 'o':
399 tag.persistent = MICRODVD_PERSISTENT_ON;
400 tag.data1 = strtol(s, &s, 10);
401 if (*s != ',')
402 break;
403 s++;
404 tag.data2 = strtol(s, &s, 10);
405 if (*s != '}')
406 break;
407 tag.key = 'o';
408 break;
410 default: /* Unknown tag, we consider it to be text */
411 break;
414 if (tag.key == 0)
415 return start;
417 microdvd_set_tag(tags, tag);
418 s++;
420 return s;
423 static void microdvd_open_tags(struct line *new_line, struct microdvd_tag *tags)
425 for (int i = 0; i < sizeof(MICRODVD_TAGS) - 1; i++) {
426 if (tags[i].persistent == MICRODVD_PERSISTENT_OPENED)
427 continue;
428 switch (tags[i].key) {
429 case 'Y':
430 case 'y':
431 for (int sidx = 0; sidx < sizeof(MICRODVD_STYLES) - 1; sidx++)
432 if (tags[i].data1 & (1 << sidx))
433 append_text(new_line, "{\\%c1}", MICRODVD_STYLES[sidx]);
434 break;
436 case 'c':
437 append_text(new_line, "{\\c&H%06X&}", tags[i].data1);
438 break;
440 case 'f':
441 append_text(new_line, "{\\fn%.*s}", BSTR_P(tags[i].data_string));
442 break;
444 case 's':
445 append_text(new_line, "{\\fs%d}", tags[i].data1);
446 break;
448 case 'p':
449 if (tags[i].data1 == 0)
450 append_text(new_line, "{\\an8}");
451 break;
453 case 'o':
454 append_text(new_line, "{\\pos(%d,%d)}",
455 tags[i].data1, tags[i].data2);
456 break;
458 if (tags[i].persistent == MICRODVD_PERSISTENT_ON)
459 tags[i].persistent = MICRODVD_PERSISTENT_OPENED;
463 static void microdvd_close_no_persistent_tags(struct line *new_line,
464 struct microdvd_tag *tags)
466 int i;
468 for (i = sizeof(MICRODVD_TAGS) - 2; i; i--) {
469 if (tags[i].persistent != MICRODVD_PERSISTENT_OFF)
470 continue;
471 switch (tags[i].key) {
473 case 'y':
474 for (int sidx = sizeof(MICRODVD_STYLES) - 2; sidx >= 0; sidx--)
475 if (tags[i].data1 & (1 << sidx))
476 append_text(new_line, "{\\%c0}", MICRODVD_STYLES[sidx]);
477 break;
479 case 'c':
480 append_text(new_line, "{\\c}");
481 break;
483 case 'f':
484 append_text(new_line, "{\\fn}");
485 break;
487 case 's':
488 append_text(new_line, "{\\fs}");
489 break;
491 tags[i].key = 0;
495 void subassconvert_microdvd(const char *orig, char *dest, int dest_buffer_size)
497 /* line is not const to avoid warnings with strtol, etc.
498 * orig content won't be changed */
499 char *line = (char *)orig;
500 struct line new_line = {
501 .buf = dest,
502 .bufsize = dest_buffer_size,
504 struct microdvd_tag tags[sizeof(MICRODVD_TAGS) - 1] = {{0}};
506 while (*line) {
507 line = microdvd_load_tags(tags, line);
508 microdvd_open_tags(&new_line, tags);
510 while (*line && *line != '|')
511 new_line.buf[new_line.len++] = *line++;
513 if (*line == '|') {
514 microdvd_close_no_persistent_tags(&new_line, tags);
515 append_text(&new_line, "\\N");
516 line++;
519 new_line.buf[new_line.len] = 0;