subassconvert: do not escape likely ASS override tags
[mplayer.git] / sub / subassconvert.c
blob6bf3c5bfa42880fbed4181cb516f520b322bebb7
1 /*
2 * Subtitles converter to SSA/ASS in order to allow special formatting
4 * This file is part of MPlayer.
6 * MPlayer is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * MPlayer is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 #include <string.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <stdarg.h>
26 #include <stdbool.h>
27 #include <ctype.h>
29 #include "mp_msg.h"
30 #include "subassconvert.h"
31 #include "bstr.h"
32 #include "libavutil/common.h"
34 struct line {
35 char *buf;
36 int bufsize;
37 int len;
40 #ifdef __GNUC__
41 static void append_text(struct line *dst, char *fmt, ...) __attribute__ ((format(printf, 2, 3)));
42 #endif
44 static void append_text(struct line *dst, char *fmt, ...)
46 va_list va;
47 va_start(va, fmt);
48 int ret = vsnprintf(dst->buf + dst->len, dst->bufsize - dst->len, fmt, va);
49 if (ret < 0)
50 goto out;
51 dst->len += ret;
52 if (dst->len > dst->bufsize)
53 dst->len = dst->bufsize;
54 out:
55 va_end(va);
58 static void append_text_n(struct line *dst, char *start, int length)
60 append_text(dst, "%.*s", length, start);
63 static int indexof(const char *s, int c)
65 char *f = strchr(s, c);
66 return f ? (f - s) : -1;
72 * SubRip
74 * Support basic tags (italic, bold, underline, strike-through)
75 * and font tag with size, color and face attributes.
79 struct font_tag {
80 int size;
81 uint32_t color;
82 struct bstr face;
83 bool has_size : 1;
84 bool has_color : 1;
85 bool has_face : 1;
88 static const struct tag_conv {
89 char *from;
90 char *to;
91 } subrip_basic_tags[] = {
92 {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
93 {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
94 {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
95 {"<s>", "{\\s1}"}, {"</s>", "{\\s0}"},
96 {"}", "\\}"},
97 {"\r\n", "\\N"}, {"\n", "\\N"}, {"\r", "\\N"},
100 static const struct {
101 char *s;
102 uint32_t v;
103 } subrip_web_colors[] = {
104 /* Named CSS3 colors in BGR format; a subset of those
105 at http://www.w3.org/TR/css3-color/#svg-color */
106 {"aqua", 0xffff00},
107 {"black", 0x000000},
108 {"blue", 0xff0000},
109 {"cyan", 0xffff00},
110 {"fuchsia", 0xff00ff},
111 {"gray", 0x808080},
112 {"green", 0x008000},
113 {"grey", 0x808080},
114 {"lime", 0x00ff00},
115 {"magenta", 0xff00ff},
116 {"maroon", 0x000080},
117 {"navy", 0x800000},
118 {"olive", 0x008080},
119 {"orange", 0x00a5ff},
120 {"pink", 0xcbc0ff},
121 {"purple", 0x800080},
122 {"red", 0x0000ff},
123 {"silver", 0xc0c0c0},
124 {"teal", 0x808000},
125 {"white", 0xffffff},
126 {"yellow", 0x00ffff},
129 #define SUBRIP_MAX_STACKED_FONT_TAGS 16
131 /* Read the HTML-style attribute starting at *s, and skip *s past the value.
132 * Set attr and val to the parsed attribute name and value.
133 * Return 0 on success, or -1 if no valid attribute was found.
135 static int read_attr(char **s, struct bstr *attr, struct bstr *val)
137 char *eq = strchr(*s, '=');
138 if (!eq)
139 return -1;
140 attr->start = *s;
141 attr->len = eq - *s;
142 for (int i = 0; i < attr->len; i++)
143 if (!isalnum(attr->start[i]))
144 return -1;
145 val->start = eq + 1;
146 bool quoted = val->start[0] == '"';
147 if (quoted)
148 val->start++;
149 unsigned char *end = strpbrk(val->start, quoted ? "\"" : " >");
150 if (!end)
151 return -1;
152 val->len = end - val->start;
153 *s = end + quoted;
154 return 0;
157 void subassconvert_subrip(const char *orig, char *dest, int dest_buffer_size)
159 /* line is not const to avoid warnings with strtol, etc.
160 * orig content won't be changed */
161 char *line = (char *)orig;
162 struct line new_line = {
163 .buf = dest,
164 .bufsize = dest_buffer_size,
166 struct font_tag font_stack[SUBRIP_MAX_STACKED_FONT_TAGS + 1];
167 font_stack[0] = (struct font_tag){0}; // type with all defaults
168 int sp = 0;
170 while (*line && new_line.len < new_line.bufsize - 1) {
171 char *orig_line = line;
173 for (int i = 0; i < FF_ARRAY_ELEMS(subrip_basic_tags); i++) {
174 const struct tag_conv *tag = &subrip_basic_tags[i];
175 int from_len = strlen(tag->from);
176 if (strncmp(line, tag->from, from_len) == 0) {
177 append_text(&new_line, "%s", tag->to);
178 line += from_len;
182 if (strncmp(line, "</font>", 7) == 0) {
183 /* Closing font tag */
184 line += 7;
186 if (sp > 0) {
187 struct font_tag *tag = &font_stack[sp];
188 struct font_tag *last_tag = &tag[-1];
189 sp--;
191 if (tag->has_size) {
192 if (!last_tag->has_size)
193 append_text(&new_line, "{\\fs}");
194 else if (last_tag->size != tag->size)
195 append_text(&new_line, "{\\fs%d}", last_tag->size);
198 if (tag->has_color) {
199 if (!last_tag->has_color)
200 append_text(&new_line, "{\\c}");
201 else if (last_tag->color != tag->color)
202 append_text(&new_line, "{\\c&H%06X&}", last_tag->color);
205 if (tag->has_face) {
206 if (!last_tag->has_face)
207 append_text(&new_line, "{\\fn}");
208 else if (bstrcmp(last_tag->face, tag->face) != 0)
209 append_text(&new_line, "{\\fn%.*s}",
210 BSTR_P(last_tag->face));
213 } else if (strncmp(line, "<font ", 6) == 0
214 && sp + 1 < FF_ARRAY_ELEMS(font_stack)) {
215 /* Opening font tag */
216 char *potential_font_tag_start = line;
217 int len_backup = new_line.len;
218 struct font_tag *tag = &font_stack[sp + 1];
219 bool has_valid_attr = false;
221 *tag = tag[-1]; // keep values from previous tag
222 line += 6;
224 while (*line && *line != '>') {
225 if (*line == ' ') {
226 line++;
227 continue;
229 struct bstr attr, val;
230 if (read_attr(&line, &attr, &val) < 0)
231 break;
232 if (!bstrcmp0(attr, "size")) {
233 tag->size = bstrtoll(val, &val, 10);
234 if (val.len)
235 break;
236 append_text(&new_line, "{\\fs%d}", tag->size);
237 tag->has_size = true;
238 has_valid_attr = true;
239 } else if (!bstrcmp0(attr, "color")) {
240 // Treat unrecognized color names as valid attributes
241 tag->has_color = true;
242 has_valid_attr = true;
243 // Standard web colors
244 for (int i = 0; i < FF_ARRAY_ELEMS(subrip_web_colors); i++) {
245 char *color = subrip_web_colors[i].s;
246 if (bstrcasecmp(val, bstr(color)) == 0) {
247 tag->color = subrip_web_colors[i].v;
248 goto foundcolor;
251 // Try to parse as hex even if there is no '#'
252 bstr_eatstart(&val, bstr("#"));
253 // #RRGGBB format
254 tag->color = bstrtoll(val, &val, 16) & 0x00ffffff;
255 tag->color = ((tag->color & 0xff) << 16)
256 | (tag->color & 0xff00)
257 | ((tag->color & 0xff0000) >> 16);
258 if (val.len) {
259 /* We didn't find any matching color */
260 mp_tmsg(MSGT_SUBREADER, MSGL_WARN,
261 "SubRip: unknown font color in subtitle: %s\n",
262 orig);
263 append_text(&new_line, "{\\c}");
264 } else {
265 foundcolor:
266 append_text(&new_line, "{\\c&H%06X&}", tag->color);
268 } else if (!bstrcmp0(attr, "face")) {
269 /* Font face attribute */
270 tag->face = val;
271 append_text(&new_line, "{\\fn%.*s}", BSTR_P(tag->face));
272 tag->has_face = true;
273 has_valid_attr = true;
274 } else
275 mp_tmsg(MSGT_SUBREADER, MSGL_WARN,"SubRip: unrecognized "
276 "attribute \"%.*s\" in font tag\n", BSTR_P(attr));
279 if (!has_valid_attr || *line != '>') { /* Not valid font tag */
280 line = potential_font_tag_start;
281 new_line.len = len_backup;
282 } else {
283 sp++;
284 line++;
286 } else if (*line == '{') {
287 char *end = strchr(line, '}');
288 if (line[1] == '\\' && end) {
289 /* Likely ASS tag, pass them through.
290 * Note that ASS tags like {something\an8} are legal too (i.e.
291 * the first character after '{' doesn't have to be '\'), but
292 * consider these fringe cases not worth supporting. */
293 append_text_n(&new_line, line, end - line + 1);
294 line = end + 1;
295 } else {
296 append_text(&new_line, "\\{"); // escape '{'
297 line++;
301 /* Tag conversion code didn't match */
302 if (line == orig_line)
303 new_line.buf[new_line.len++] = *line++;
305 new_line.buf[new_line.len] = 0;
310 * MicroDVD
312 * Based on the specifications found here:
313 * https://trac.videolan.org/vlc/ticket/1825#comment:6
316 struct microdvd_tag {
317 char key;
318 int persistent;
319 uint32_t data1;
320 uint32_t data2;
321 struct bstr data_string;
324 #define MICRODVD_PERSISTENT_OFF 0
325 #define MICRODVD_PERSISTENT_ON 1
326 #define MICRODVD_PERSISTENT_OPENED 2
328 // Color, Font, Size, cHarset, stYle, Position, cOordinate
329 #define MICRODVD_TAGS "cfshyYpo"
331 static void microdvd_set_tag(struct microdvd_tag *tags, struct microdvd_tag tag)
333 int tag_index = indexof(MICRODVD_TAGS, tag.key);
335 if (tag_index < 0)
336 return;
337 memcpy(&tags[tag_index], &tag, sizeof(tag));
340 // italic, bold, underline, strike-through
341 #define MICRODVD_STYLES "ibus"
343 static char *microdvd_load_tags(struct microdvd_tag *tags, char *s)
345 while (*s == '{') {
346 char *start = s;
347 char tag_char = *(s + 1);
348 struct microdvd_tag tag = {0};
350 if (!tag_char || *(s + 2) != ':')
351 break;
352 s += 3;
354 switch (tag_char) {
356 /* Style */
357 case 'Y':
358 tag.persistent = MICRODVD_PERSISTENT_ON;
359 case 'y':
360 while (*s && *s != '}') {
361 int style_index = indexof(MICRODVD_STYLES, *s);
363 if (style_index >= 0)
364 tag.data1 |= (1 << style_index);
365 s++;
367 if (*s != '}')
368 break;
369 /* We must distinguish persistent and non-persistent styles
370 * to handle this kind of style tags: {y:ib}{Y:us} */
371 tag.key = tag_char;
372 break;
374 /* Color */
375 case 'C':
376 tag.persistent = MICRODVD_PERSISTENT_ON;
377 case 'c':
378 tag.data1 = strtol(s, &s, 16) & 0x00ffffff;
379 if (*s != '}')
380 break;
381 tag.key = 'c';
382 break;
384 /* Font name */
385 case 'F':
386 tag.persistent = MICRODVD_PERSISTENT_ON;
387 case 'f':
389 int len = indexof(s, '}');
390 if (len < 0)
391 break;
392 tag.data_string.start = s;
393 tag.data_string.len = len;
394 s += len;
395 tag.key = 'f';
396 break;
399 /* Font size */
400 case 'S':
401 tag.persistent = MICRODVD_PERSISTENT_ON;
402 case 's':
403 tag.data1 = strtol(s, &s, 10);
404 if (*s != '}')
405 break;
406 tag.key = 's';
407 break;
409 /* Charset */
410 case 'H':
412 //TODO: not yet handled, just parsed.
413 int len = indexof(s, '}');
414 if (len < 0)
415 break;
416 tag.data_string.start = s;
417 tag.data_string.len = len;
418 s += len;
419 tag.key = 'h';
420 break;
423 /* Position */
424 case 'P':
425 tag.persistent = MICRODVD_PERSISTENT_ON;
426 tag.data1 = (*s++ == '1');
427 if (*s != '}')
428 break;
429 tag.key = 'p';
430 break;
432 /* Coordinates */
433 case 'o':
434 tag.persistent = MICRODVD_PERSISTENT_ON;
435 tag.data1 = strtol(s, &s, 10);
436 if (*s != ',')
437 break;
438 s++;
439 tag.data2 = strtol(s, &s, 10);
440 if (*s != '}')
441 break;
442 tag.key = 'o';
443 break;
445 default: /* Unknown tag, we consider it to be text */
446 break;
449 if (tag.key == 0)
450 return start;
452 microdvd_set_tag(tags, tag);
453 s++;
455 return s;
458 static void microdvd_open_tags(struct line *new_line, struct microdvd_tag *tags)
460 for (int i = 0; i < sizeof(MICRODVD_TAGS) - 1; i++) {
461 if (tags[i].persistent == MICRODVD_PERSISTENT_OPENED)
462 continue;
463 switch (tags[i].key) {
464 case 'Y':
465 case 'y':
466 for (int sidx = 0; sidx < sizeof(MICRODVD_STYLES) - 1; sidx++)
467 if (tags[i].data1 & (1 << sidx))
468 append_text(new_line, "{\\%c1}", MICRODVD_STYLES[sidx]);
469 break;
471 case 'c':
472 append_text(new_line, "{\\c&H%06X&}", tags[i].data1);
473 break;
475 case 'f':
476 append_text(new_line, "{\\fn%.*s}", BSTR_P(tags[i].data_string));
477 break;
479 case 's':
480 append_text(new_line, "{\\fs%d}", tags[i].data1);
481 break;
483 case 'p':
484 if (tags[i].data1 == 0)
485 append_text(new_line, "{\\an8}");
486 break;
488 case 'o':
489 append_text(new_line, "{\\pos(%d,%d)}",
490 tags[i].data1, tags[i].data2);
491 break;
493 if (tags[i].persistent == MICRODVD_PERSISTENT_ON)
494 tags[i].persistent = MICRODVD_PERSISTENT_OPENED;
498 static void microdvd_close_no_persistent_tags(struct line *new_line,
499 struct microdvd_tag *tags)
501 int i;
503 for (i = sizeof(MICRODVD_TAGS) - 2; i; i--) {
504 if (tags[i].persistent != MICRODVD_PERSISTENT_OFF)
505 continue;
506 switch (tags[i].key) {
508 case 'y':
509 for (int sidx = sizeof(MICRODVD_STYLES) - 2; sidx >= 0; sidx--)
510 if (tags[i].data1 & (1 << sidx))
511 append_text(new_line, "{\\%c0}", MICRODVD_STYLES[sidx]);
512 break;
514 case 'c':
515 append_text(new_line, "{\\c}");
516 break;
518 case 'f':
519 append_text(new_line, "{\\fn}");
520 break;
522 case 's':
523 append_text(new_line, "{\\fs}");
524 break;
526 tags[i].key = 0;
530 void subassconvert_microdvd(const char *orig, char *dest, int dest_buffer_size)
532 /* line is not const to avoid warnings with strtol, etc.
533 * orig content won't be changed */
534 char *line = (char *)orig;
535 struct line new_line = {
536 .buf = dest,
537 .bufsize = dest_buffer_size,
539 struct microdvd_tag tags[sizeof(MICRODVD_TAGS) - 1] = {{0}};
541 while (*line) {
542 line = microdvd_load_tags(tags, line);
543 microdvd_open_tags(&new_line, tags);
545 while (*line && *line != '|')
546 new_line.buf[new_line.len++] = *line++;
548 if (*line == '|') {
549 microdvd_close_no_persistent_tags(&new_line, tags);
550 append_text(&new_line, "\\N");
551 line++;
554 new_line.buf[new_line.len] = 0;