find_subfiles: try to determine if a .sub file is text or vobsub
[mplayer/greg.git] / sub / subreader.c
blob6acecb5c54fbb884422cde12ce5c2f55631a8a4f
1 /*
2 * Subtitle reader with format autodetection
4 * Copyright (c) 2001 laaz
5 * Some code cleanup & realloc() by A'rpi/ESP-team
7 * This file is part of MPlayer.
9 * MPlayer is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * MPlayer is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <sys/types.h>
28 #include <dirent.h>
30 #include "config.h"
31 #include "mp_msg.h"
32 #include "subreader.h"
33 #include "mpcommon.h"
34 #include "subassconvert.h"
35 #include "options.h"
36 #include "stream/stream.h"
37 #include "libavutil/common.h"
38 #include "libavutil/avstring.h"
40 #ifdef CONFIG_ENCA
41 #include <enca.h>
42 #endif
44 #define ERR ((void *) -1)
46 #ifdef CONFIG_ICONV
47 #include <iconv.h>
48 char *sub_cp=NULL;
49 #endif
50 #ifdef CONFIG_FRIBIDI
51 #include <fribidi/fribidi.h>
52 char *fribidi_charset = NULL; ///character set that will be passed to FriBiDi
53 int flip_hebrew = 1; ///flip subtitles using fribidi
54 int fribidi_flip_commas = 0; ///flip comma when fribidi is used
55 #endif
57 // Parameter struct for the format-specific readline functions
58 struct readline_args {
59 int utf16;
60 struct MPOpts *opts;
63 /* Maximal length of line of a subtitle */
64 #define LINE_LEN 1000
65 static float mpsub_position=0;
66 static float mpsub_multiplier=1.;
67 static int sub_slacktime = 20000; //20 sec
69 int sub_no_text_pp=0; // 1 => do not apply text post-processing
70 // like {\...} elimination in SSA format.
72 int sub_match_fuzziness=0; // level of sub name matching fuzziness
74 /* Use the SUB_* constant defined in the header file */
75 int sub_format=SUB_INVALID;
76 #ifdef CONFIG_SORTSUB
78 Some subtitling formats, namely AQT and Subrip09, define the end of a
79 subtitle as the beginning of the following. Since currently we read one
80 subtitle at time, for these format we keep two global *subtitle,
81 previous_aqt_sub and previous_subrip09_sub, pointing to previous subtitle,
82 so we can change its end when we read current subtitle starting time.
83 When CONFIG_SORTSUB is defined, we use a single global unsigned long,
84 previous_sub_end, for both (and even future) formats, to store the end of
85 the previous sub: it is initialized to 0 in sub_read_file and eventually
86 modified by sub_read_aqt_line or sub_read_subrip09_line.
88 unsigned long previous_sub_end;
89 #endif
91 static int eol(char p) {
92 return p=='\r' || p=='\n' || p=='\0';
95 /* Remove leading and trailing space */
96 static void trail_space(char *s) {
97 int i = 0;
98 while (isspace(s[i])) ++i;
99 if (i) strcpy(s, s + i);
100 i = strlen(s) - 1;
101 while (i > 0 && isspace(s[i])) s[i--] = '\0';
104 static char *stristr(const char *haystack, const char *needle) {
105 int len = 0;
106 const char *p = haystack;
108 if (!(haystack && needle)) return NULL;
110 len=strlen(needle);
111 while (*p != '\0') {
112 if (strncasecmp(p, needle, len) == 0) return (char*)p;
113 p++;
116 return NULL;
119 static void sami_add_line(subtitle *current, char *buffer, char **pos) {
120 char *p = *pos;
121 *p = 0;
122 trail_space(buffer);
123 if (*buffer && current->lines < SUB_MAX_TEXT)
124 current->text[current->lines++] = strdup(buffer);
125 *pos = buffer;
128 static subtitle *sub_read_line_sami(stream_t* st, subtitle *current,
129 struct readline_args *args)
131 int utf16 = args->utf16;
132 static char line[LINE_LEN+1];
133 static char *s = NULL, *slacktime_s;
134 char text[LINE_LEN+1], *p=NULL, *q;
135 int state;
137 current->lines = current->start = current->end = 0;
138 current->alignment = SUB_ALIGNMENT_BOTTOMCENTER;
139 state = 0;
141 /* read the first line */
142 if (!s)
143 if (!(s = stream_read_line(st, line, LINE_LEN, utf16))) return 0;
145 do {
146 switch (state) {
148 case 0: /* find "START=" or "Slacktime:" */
149 slacktime_s = stristr (s, "Slacktime:");
150 if (slacktime_s)
151 sub_slacktime = strtol (slacktime_s+10, NULL, 0) / 10;
153 s = stristr (s, "Start=");
154 if (s) {
155 current->start = strtol (s + 6, &s, 0) / 10;
156 /* eat '>' */
157 for (; *s != '>' && *s != '\0'; s++);
158 s++;
159 state = 1; continue;
161 break;
163 case 1: /* find (optional) "<P", skip other TAGs */
164 for (; *s == ' ' || *s == '\t'; s++); /* strip blanks, if any */
165 if (*s == '\0') break;
166 if (*s != '<') { state = 3; p = text; continue; } /* not a TAG */
167 s++;
168 if (*s == 'P' || *s == 'p') { s++; state = 2; continue; } /* found '<P' */
169 for (; *s != '>' && *s != '\0'; s++); /* skip remains of non-<P> TAG */
170 if (s == '\0')
171 break;
172 s++;
173 continue;
175 case 2: /* find ">" */
176 if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; }
177 break;
179 case 3: /* get all text until '<' appears */
180 if (*s == '\0') break;
181 else if (!strncasecmp (s, "<br>", 4)) {
182 sami_add_line(current, text, &p);
183 s += 4;
185 else if ((*s == '{') && !sub_no_text_pp) { state = 5; ++s; continue; }
186 else if (*s == '<') { state = 4; }
187 else if (!strncasecmp (s, "&nbsp;", 6)) { *p++ = ' '; s += 6; }
188 else if (*s == '\t') { *p++ = ' '; s++; }
189 else if (*s == '\r' || *s == '\n') { s++; }
190 else *p++ = *s++;
192 /* skip duplicated space */
193 if (p > text + 2) if (*(p-1) == ' ' && *(p-2) == ' ') p--;
195 continue;
197 case 4: /* get current->end or skip <TAG> */
198 q = stristr (s, "Start=");
199 if (q) {
200 current->end = strtol (q + 6, &q, 0) / 10 - 1;
201 *p = '\0'; trail_space (text);
202 if (text[0] != '\0')
203 current->text[current->lines++] = strdup (text);
204 if (current->lines > 0) { state = 99; break; }
205 state = 0; continue;
207 s = strchr (s, '>');
208 if (s) { s++; state = 3; continue; }
209 break;
210 case 5: /* get rid of {...} text, but read the alignment code */
211 if ((*s == '\\') && (*(s + 1) == 'a') && !sub_no_text_pp) {
212 if (stristr(s, "\\a1") != NULL) {
213 current->alignment = SUB_ALIGNMENT_BOTTOMLEFT;
214 s = s + 3;
216 if (stristr(s, "\\a2") != NULL) {
217 current->alignment = SUB_ALIGNMENT_BOTTOMCENTER;
218 s = s + 3;
219 } else if (stristr(s, "\\a3") != NULL) {
220 current->alignment = SUB_ALIGNMENT_BOTTOMRIGHT;
221 s = s + 3;
222 } else if ((stristr(s, "\\a4") != NULL) || (stristr(s, "\\a5") != NULL) || (stristr(s, "\\a8") != NULL)) {
223 current->alignment = SUB_ALIGNMENT_TOPLEFT;
224 s = s + 3;
225 } else if (stristr(s, "\\a6") != NULL) {
226 current->alignment = SUB_ALIGNMENT_TOPCENTER;
227 s = s + 3;
228 } else if (stristr(s, "\\a7") != NULL) {
229 current->alignment = SUB_ALIGNMENT_TOPRIGHT;
230 s = s + 3;
231 } else if (stristr(s, "\\a9") != NULL) {
232 current->alignment = SUB_ALIGNMENT_MIDDLELEFT;
233 s = s + 3;
234 } else if (stristr(s, "\\a10") != NULL) {
235 current->alignment = SUB_ALIGNMENT_MIDDLECENTER;
236 s = s + 4;
237 } else if (stristr(s, "\\a11") != NULL) {
238 current->alignment = SUB_ALIGNMENT_MIDDLERIGHT;
239 s = s + 4;
242 if (*s == '}') state = 3;
243 ++s;
244 continue;
247 /* read next line */
248 if (state != 99 && !(s = stream_read_line (st, line, LINE_LEN, utf16))) {
249 if (current->start > 0) {
250 break; // if it is the last subtitle
251 } else {
252 return 0;
256 } while (state != 99);
258 // For the last subtitle
259 if (current->end <= 0) {
260 current->end = current->start + sub_slacktime;
261 sami_add_line(current, text, &p);
264 return current;
268 static char *sub_readtext(char *source, char **dest) {
269 int len=0;
270 char *p=source;
272 // printf("src=%p dest=%p \n",source,dest);
274 while ( !eol(*p) && *p!= '|' ) {
275 p++,len++;
278 *dest= malloc (len+1);
279 if (!dest) {return ERR;}
281 strncpy(*dest, source, len);
282 (*dest)[len]=0;
284 while (*p=='\r' || *p=='\n' || *p=='|') p++;
286 if (*p) return p; // not-last text field
287 else return NULL; // last text field
290 static subtitle *sub_read_line_microdvd(stream_t *st,subtitle *current,
291 struct readline_args *args)
293 int utf16 = args->utf16;
294 char line[LINE_LEN+1];
295 char line2[LINE_LEN+1];
296 char *p, *next;
297 int i;
299 do {
300 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
301 } while ((sscanf (line,
302 "{%ld}{}%[^\r\n]",
303 &(current->start), line2) < 2) &&
304 (sscanf (line,
305 "{%ld}{%ld}%[^\r\n]",
306 &(current->start), &(current->end), line2) < 3));
308 if (args->opts->ass_enabled) {
309 subassconvert_microdvd(line2, line, LINE_LEN + 1);
310 p = line;
311 } else
312 p = line2;
314 next=p, i=0;
315 while ((next =sub_readtext (next, &(current->text[i])))) {
316 if (current->text[i]==ERR) {return ERR;}
317 i++;
318 if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
320 current->lines= ++i;
322 return current;
325 static subtitle *sub_read_line_mpl2(stream_t *st,subtitle *current,
326 struct readline_args *args)
328 int utf16 = args->utf16;
329 char line[LINE_LEN+1];
330 char line2[LINE_LEN+1];
331 char *p, *next;
332 int i;
334 do {
335 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
336 } while ((sscanf (line,
337 "[%ld][%ld]%[^\r\n]",
338 &(current->start), &(current->end), line2) < 3));
339 current->start *= 10;
340 current->end *= 10;
341 p=line2;
343 next=p, i=0;
344 while ((next =sub_readtext (next, &(current->text[i])))) {
345 if (current->text[i]==ERR) {return ERR;}
346 i++;
347 if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
349 current->lines= ++i;
351 return current;
354 static subtitle *sub_read_line_subrip(stream_t* st, subtitle *current,
355 struct readline_args *args)
357 int utf16 = args->utf16;
358 char line[LINE_LEN+1];
359 int a1,a2,a3,a4,b1,b2,b3,b4;
360 char *p=NULL, *q=NULL;
361 int len;
363 while (1) {
364 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
365 if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) continue;
366 current->start = a1*360000+a2*6000+a3*100+a4;
367 current->end = b1*360000+b2*6000+b3*100+b4;
369 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
371 p=q=line;
372 for (current->lines=1; current->lines < SUB_MAX_TEXT; current->lines++) {
373 for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' && strncmp(p,"[br]",4); p++,len++);
374 current->text[current->lines-1]=malloc (len+1);
375 if (!current->text[current->lines-1]) return ERR;
376 strncpy (current->text[current->lines-1], q, len);
377 current->text[current->lines-1][len]='\0';
378 if (!*p || *p=='\r' || *p=='\n') break;
379 if (*p=='|') p++;
380 else while (*p++!=']');
382 break;
384 return current;
387 static subtitle *sub_ass_read_line_subviewer(stream_t *st, subtitle *current,
388 struct readline_args *args)
390 int utf16 = args->utf16;
391 int a1, a2, a3, a4, b1, b2, b3, b4, j = 0;
393 while (!current->text[0]) {
394 char line[LINE_LEN + 1], full_line[LINE_LEN + 1], sep;
395 int i;
397 /* Parse SubRip header */
398 if (!stream_read_line(st, line, LINE_LEN, utf16))
399 return NULL;
400 if (sscanf(line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",
401 &a1, &a2, &a3, &sep, &a4, &b1, &b2, &b3, &sep, &b4) < 10)
402 continue;
404 current->start = a1 * 360000 + a2 * 6000 + a3 * 100 + a4 / 10;
405 current->end = b1 * 360000 + b2 * 6000 + b3 * 100 + b4 / 10;
407 /* Concat lines */
408 full_line[0] = 0;
409 for (i = 0; i < SUB_MAX_TEXT; i++) {
410 int blank = 1, len = 0;
411 char *p;
413 if (!stream_read_line(st, line, LINE_LEN, utf16))
414 break;
416 for (p = line; *p != '\n' && *p != '\r' && *p; p++, len++)
417 if (*p != ' ' && *p != '\t')
418 blank = 0;
420 if (blank)
421 break;
423 *p = 0;
425 if (!(j + 1 + len < sizeof(full_line) - 1))
426 break;
428 if (j != 0)
429 full_line[j++] = '\n';
430 strcpy(&full_line[j], line);
431 j += len;
434 /* Use the ASS/SSA converter to transform the whole lines */
435 if (full_line[0]) {
436 char converted_line[LINE_LEN + 1];
437 subassconvert_subrip(full_line, converted_line, LINE_LEN + 1);
438 current->text[0] = strdup(converted_line);
439 current->lines = 1;
442 return current;
445 static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current,
446 struct readline_args *args)
448 int utf16 = args->utf16;
449 char line[LINE_LEN+1];
450 int a1,a2,a3,a4,b1,b2,b3,b4;
451 char *p=NULL;
452 int i,len;
454 if (args->opts->ass_enabled)
455 return sub_ass_read_line_subviewer(st, current, args);
456 while (!current->text[0]) {
457 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
458 if ((len=sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",&a1,&a2,&a3,(char *)&i,&a4,&b1,&b2,&b3,(char *)&i,&b4)) < 10)
459 continue;
460 current->start = a1*360000+a2*6000+a3*100+a4/10;
461 current->end = b1*360000+b2*6000+b3*100+b4/10;
462 for (i=0; i<SUB_MAX_TEXT;) {
463 int blank = 1;
464 if (!stream_read_line (st, line, LINE_LEN, utf16)) break;
465 len=0;
466 for (p=line; *p!='\n' && *p!='\r' && *p; p++,len++)
467 if (*p != ' ' && *p != '\t')
468 blank = 0;
469 if (len && !blank) {
470 int j=0,skip=0;
471 char *curptr=current->text[i]=malloc (len+1);
472 if (!current->text[i]) return ERR;
473 //strncpy (current->text[i], line, len); current->text[i][len]='\0';
474 for(; j<len; j++) {
475 /* let's filter html tags ::atmos */
476 if(line[j]=='>') {
477 skip=0;
478 continue;
480 if(line[j]=='<') {
481 skip=1;
482 continue;
484 if(skip) {
485 continue;
487 *curptr=line[j];
488 curptr++;
490 *curptr='\0';
492 i++;
493 } else {
494 break;
497 current->lines=i;
499 return current;
502 static subtitle *sub_read_line_subviewer2(stream_t *st,subtitle *current,
503 struct readline_args *args)
505 int utf16 = args->utf16;
506 char line[LINE_LEN+1];
507 int a1,a2,a3,a4;
508 char *p=NULL;
509 int i,len;
511 while (!current->text[0]) {
512 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
513 if (line[0]!='{')
514 continue;
515 if ((len=sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4)) < 4)
516 continue;
517 current->start = a1*360000+a2*6000+a3*100+a4/10;
518 for (i=0; i<SUB_MAX_TEXT;) {
519 if (!stream_read_line (st, line, LINE_LEN, utf16)) break;
520 if (line[0]=='}') break;
521 len=0;
522 for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
523 if (len) {
524 current->text[i]=malloc (len+1);
525 if (!current->text[i]) return ERR;
526 strncpy (current->text[i], line, len); current->text[i][len]='\0';
527 ++i;
528 } else {
529 break;
532 current->lines=i;
534 return current;
538 static subtitle *sub_read_line_vplayer(stream_t *st,subtitle *current,
539 struct readline_args *args)
541 int utf16 = args->utf16;
542 char line[LINE_LEN+1];
543 int a1,a2,a3;
544 char *p=NULL, *next,separator;
545 int i,len,plen;
547 while (!current->text[0]) {
548 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
549 if ((len=sscanf (line, "%d:%d:%d%c%n",&a1,&a2,&a3,&separator,&plen)) < 4)
550 continue;
552 if (!(current->start = a1*360000+a2*6000+a3*100))
553 continue;
554 /* removed by wodzu
555 p=line;
556 // finds the body of the subtitle
557 for (i=0; i<3; i++){
558 p=strchr(p,':');
559 if (p==NULL) break;
560 ++p;
562 if (p==NULL) {
563 printf("SUB: Skipping incorrect subtitle line!\n");
564 continue;
567 // by wodzu: hey! this time we know what length it has! what is
568 // that magic for? it can't deal with space instead of third
569 // colon! look, what simple it can be:
570 p = &line[ plen ];
572 i=0;
573 if (*p!='|') {
575 next = p,i=0;
576 while ((next =sub_readtext (next, &(current->text[i])))) {
577 if (current->text[i]==ERR) {return ERR;}
578 i++;
579 if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
581 current->lines=i+1;
584 return current;
587 static subtitle *sub_read_line_rt(stream_t *st,subtitle *current,
588 struct readline_args *args)
590 int utf16 = args->utf16;
592 //TODO: This format uses quite rich (sub/super)set of xhtml
593 // I couldn't check it since DTD is not included.
594 // WARNING: full XML parses can be required for proper parsing
595 char line[LINE_LEN+1];
596 int a1,a2,a3,a4,b1,b2,b3,b4;
597 char *p=NULL,*next=NULL;
598 int i,len,plen;
600 while (!current->text[0]) {
601 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
602 //TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
603 //to describe the same moment in time. Maybe there are even more formats in use.
604 //if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
605 plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
606 if (
607 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d.%d\" %*[Ee]nd=\"%d.%d\"%*[^<]<clear/>%n",&a3,&a4,&b3,&b4,&plen)) < 4) &&
608 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a3,&a4,&b2,&b3,&b4,&plen)) < 5) &&
609 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&plen)) < 4) &&
610 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&b4,&plen)) < 5) &&
611 // ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) &&
612 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&b4,&plen)) < 6) &&
613 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen)) < 8) &&
614 //now try it without end time
615 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d.%d\"%*[^<]<clear/>%n",&a3,&a4,&plen)) < 2) &&
616 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&plen)) < 2) &&
617 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&plen)) < 3) &&
618 ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&plen)) < 4)
620 continue;
621 current->start = a1*360000+a2*6000+a3*100+a4/10;
622 current->end = b1*360000+b2*6000+b3*100+b4/10;
623 if (b1 == 0 && b2 == 0 && b3 == 0 && b4 == 0)
624 current->end = current->start+200;
625 p=line; p+=plen;i=0;
626 // TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml?
627 next = strstr(line,"<clear/>");
628 if(next && strlen(next)>8){
629 next+=8;i=0;
630 while ((next =sub_readtext (next, &(current->text[i])))) {
631 if (current->text[i]==ERR) {return ERR;}
632 i++;
633 if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
636 current->lines=i+1;
638 return current;
641 static subtitle *sub_read_line_ssa(stream_t *st,subtitle *current,
642 struct readline_args *args)
645 * Sub Station Alpha v4 (and v2?) scripts have 9 commas before subtitle
646 * other Sub Station Alpha scripts have only 8 commas before subtitle
647 * Reading the "ScriptType:" field is not reliable since many scripts appear
648 * w/o it
650 * http://www.scriptclub.org is a good place to find more examples
651 * http://www.eswat.demon.co.uk is where the SSA specs can be found
653 int utf16 = args->utf16;
654 int comma;
655 static int max_comma = 32; /* let's use 32 for the case that the */
656 /* amount of commas increase with newer SSA versions */
658 int hour1, min1, sec1, hunsec1,
659 hour2, min2, sec2, hunsec2, nothing;
660 int num;
662 char line[LINE_LEN+1],
663 line3[LINE_LEN+1],
664 *line2;
665 char *tmp;
667 do {
668 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
669 } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d"
670 "%[^\n\r]", &nothing,
671 &hour1, &min1, &sec1, &hunsec1,
672 &hour2, &min2, &sec2, &hunsec2,
673 line3) < 9
675 sscanf (line, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d"
676 "%[^\n\r]", &nothing,
677 &hour1, &min1, &sec1, &hunsec1,
678 &hour2, &min2, &sec2, &hunsec2,
679 line3) < 9 );
681 line2=strchr(line3, ',');
682 if (!line2) return NULL;
684 for (comma = 4; comma < max_comma; comma ++)
686 tmp = line2;
687 if(!(tmp=strchr(++tmp, ','))) break;
688 if(*(++tmp) == ' ') break;
689 /* a space after a comma means we're already in a sentence */
690 line2 = tmp;
693 if(comma < max_comma)max_comma = comma;
694 /* eliminate the trailing comma */
695 if(*line2 == ',') line2++;
697 current->lines=0;num=0;
698 current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
699 current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
701 while (((tmp=strstr(line2, "\\n")) != NULL) || ((tmp=strstr(line2, "\\N")) != NULL) ){
702 current->text[num]=malloc(tmp-line2+1);
703 strncpy (current->text[num], line2, tmp-line2);
704 current->text[num][tmp-line2]='\0';
705 line2=tmp+2;
706 num++;
707 current->lines++;
708 if (current->lines >= SUB_MAX_TEXT) return current;
711 current->text[num]=strdup(line2);
712 current->lines++;
714 return current;
717 static void sub_pp_ssa(subtitle *sub) {
718 int l=sub->lines;
719 char *so,*de,*start;
721 while (l){
722 /* eliminate any text enclosed with {}, they are font and color settings */
723 so=de=sub->text[--l];
724 while (*so) {
725 if(*so == '{' && so[1]=='\\') {
726 for (start=so; *so && *so!='}'; so++);
727 if(*so) so++; else so=start;
729 if(*so) {
730 *de=*so;
731 so++; de++;
734 *de=*so;
739 * PJS subtitles reader.
740 * That's the "Phoenix Japanimation Society" format.
741 * I found some of them in http://www.scriptsclub.org/ (used for anime).
742 * The time is in tenths of second.
744 * by set, based on code by szabi (dunnowhat sub format ;-)
746 static subtitle *sub_read_line_pjs(stream_t *st,subtitle *current,
747 struct readline_args *args)
749 int utf16 = args->utf16;
750 char line[LINE_LEN+1];
751 char text[LINE_LEN+1], *s, *d;
753 if (!stream_read_line (st, line, LINE_LEN, utf16))
754 return NULL;
755 /* skip spaces */
756 for (s=line; *s && isspace(*s); s++);
757 /* allow empty lines at the end of the file */
758 if (*s==0)
759 return NULL;
760 /* get the time */
761 if (sscanf (s, "%ld,%ld,", &(current->start),
762 &(current->end)) <2) {
763 return ERR;
765 /* the files I have are in tenths of second */
766 current->start *= 10;
767 current->end *= 10;
768 /* walk to the beggining of the string */
769 for (; *s; s++) if (*s==',') break;
770 if (*s) {
771 for (s++; *s; s++) if (*s==',') break;
772 if (*s) s++;
774 if (*s!='"') {
775 return ERR;
777 /* copy the string to the text buffer */
778 for (s++, d=text; *s && *s!='"'; s++, d++)
779 *d=*s;
780 *d=0;
781 current->text[0] = strdup(text);
782 current->lines = 1;
784 return current;
787 static subtitle *sub_read_line_mpsub(stream_t *st, subtitle *current,
788 struct readline_args *args)
790 int utf16 = args->utf16;
791 char line[LINE_LEN+1];
792 float a,b;
793 int num=0;
794 char *p, *q;
798 if (!stream_read_line(st, line, LINE_LEN, utf16)) return NULL;
799 } while (sscanf (line, "%f %f", &a, &b) !=2);
801 mpsub_position += a*mpsub_multiplier;
802 current->start=(int) mpsub_position;
803 mpsub_position += b*mpsub_multiplier;
804 current->end=(int) mpsub_position;
806 while (num < SUB_MAX_TEXT) {
807 if (!stream_read_line (st, line, LINE_LEN, utf16)) {
808 if (num == 0) return NULL;
809 else return current;
811 p=line;
812 while (isspace(*p)) p++;
813 if (eol(*p) && num > 0) return current;
814 if (eol(*p)) return NULL;
816 for (q=p; !eol(*q); q++);
817 *q='\0';
818 if (strlen(p)) {
819 current->text[num]=strdup(p);
820 // printf (">%s<\n",p);
821 current->lines = ++num;
822 } else {
823 if (num) return current;
824 else return NULL;
827 return NULL; // we should have returned before if it's OK
830 #ifndef CONFIG_SORTSUB
831 //we don't need this if we use previous_sub_end
832 subtitle *previous_aqt_sub = NULL;
833 #endif
835 static subtitle *sub_read_line_aqt(stream_t *st,subtitle *current,
836 struct readline_args *args)
838 int utf16 = args->utf16;
839 char line[LINE_LEN+1];
840 char *next;
841 int i;
843 while (1) {
844 // try to locate next subtitle
845 if (!stream_read_line (st, line, LINE_LEN, utf16))
846 return NULL;
847 if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
848 break;
851 #ifdef CONFIG_SORTSUB
852 previous_sub_end = (current->start) ? current->start - 1 : 0;
853 #else
854 if (previous_aqt_sub != NULL)
855 previous_aqt_sub->end = current->start-1;
857 previous_aqt_sub = current;
858 #endif
860 if (!stream_read_line (st, line, LINE_LEN, utf16))
861 return NULL;
863 sub_readtext((char *) &line,&current->text[0]);
864 current->lines = 1;
865 current->end = current->start; // will be corrected by next subtitle
867 if (!stream_read_line (st, line, LINE_LEN, utf16))
868 return current;
870 next = line,i=1;
871 while ((next =sub_readtext (next, &(current->text[i])))) {
872 if (current->text[i]==ERR) {return ERR;}
873 i++;
874 if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
876 current->lines=i+1;
878 if (!strlen(current->text[0]) && !strlen(current->text[1])) {
879 #ifdef CONFIG_SORTSUB
880 previous_sub_end = 0;
881 #else
882 // void subtitle -> end of previous marked and exit
883 previous_aqt_sub = NULL;
884 #endif
885 return NULL;
888 return current;
891 #ifndef CONFIG_SORTSUB
892 subtitle *previous_subrip09_sub = NULL;
893 #endif
895 static subtitle *sub_read_line_subrip09(stream_t *st,subtitle *current,
896 struct readline_args *args)
898 int utf16 = args->utf16;
899 char line[LINE_LEN+1];
900 int a1,a2,a3;
901 char * next=NULL;
902 int i,len;
904 while (1) {
905 // try to locate next subtitle
906 if (!stream_read_line (st, line, LINE_LEN, utf16))
907 return NULL;
908 if (!((len=sscanf (line, "[%d:%d:%d]",&a1,&a2,&a3)) < 3))
909 break;
912 current->start = a1*360000+a2*6000+a3*100;
914 #ifdef CONFIG_SORTSUB
915 previous_sub_end = (current->start) ? current->start - 1 : 0;
916 #else
917 if (previous_subrip09_sub != NULL)
918 previous_subrip09_sub->end = current->start-1;
920 previous_subrip09_sub = current;
921 #endif
923 if (!stream_read_line (st, line, LINE_LEN, utf16))
924 return NULL;
926 next = line,i=0;
928 current->text[0]=""; // just to be sure that string is clear
930 while ((next =sub_readtext (next, &(current->text[i])))) {
931 if (current->text[i]==ERR) {return ERR;}
932 i++;
933 if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
935 current->lines=i+1;
937 if (!strlen(current->text[0]) && (i==0)) {
938 #ifdef CONFIG_SORTSUB
939 previous_sub_end = 0;
940 #else
941 // void subtitle -> end of previous marked and exit
942 previous_subrip09_sub = NULL;
943 #endif
944 return NULL;
947 return current;
950 static subtitle *sub_read_line_jacosub(stream_t* st, subtitle * current,
951 struct readline_args *args)
953 int utf16 = args->utf16;
954 char line1[LINE_LEN], line2[LINE_LEN], directive[LINE_LEN], *p, *q;
955 unsigned a1, a2, a3, a4, b1, b2, b3, b4, comment = 0;
956 static unsigned jacoTimeres = 30;
957 static int jacoShift = 0;
959 memset(current, 0, sizeof(subtitle));
960 memset(line1, 0, LINE_LEN);
961 memset(line2, 0, LINE_LEN);
962 memset(directive, 0, LINE_LEN);
963 while (!current->text[0]) {
964 if (!stream_read_line(st, line1, LINE_LEN, utf16)) {
965 return NULL;
967 if (sscanf
968 (line1, "%u:%u:%u.%u %u:%u:%u.%u %[^\n\r]", &a1, &a2, &a3, &a4,
969 &b1, &b2, &b3, &b4, line2) < 9) {
970 if (sscanf(line1, "@%u @%u %[^\n\r]", &a4, &b4, line2) < 3) {
971 if (line1[0] == '#') {
972 int hours = 0, minutes = 0, seconds, delta, inverter =
974 unsigned units = jacoShift;
975 switch (toupper(line1[1])) {
976 case 'S':
977 if (isalpha(line1[2])) {
978 delta = 6;
979 } else {
980 delta = 2;
982 if (sscanf(&line1[delta], "%d", &hours)) {
983 if (hours < 0) {
984 hours *= -1;
985 inverter = -1;
987 if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
988 if (sscanf
989 (&line1[delta], "%*d:%*d:%d",
990 &seconds)) {
991 sscanf(&line1[delta], "%*d:%*d:%*d.%d",
992 &units);
993 } else {
994 hours = 0;
995 sscanf(&line1[delta], "%d:%d.%d",
996 &minutes, &seconds, &units);
997 minutes *= inverter;
999 } else {
1000 hours = minutes = 0;
1001 sscanf(&line1[delta], "%d.%d", &seconds,
1002 &units);
1003 seconds *= inverter;
1005 jacoShift =
1006 ((hours * 3600 + minutes * 60 +
1007 seconds) * jacoTimeres +
1008 units) * inverter;
1010 break;
1011 case 'T':
1012 if (isalpha(line1[2])) {
1013 delta = 8;
1014 } else {
1015 delta = 2;
1017 sscanf(&line1[delta], "%u", &jacoTimeres);
1018 break;
1021 continue;
1022 } else {
1023 current->start =
1024 (unsigned long) ((a4 + jacoShift) * 100.0 /
1025 jacoTimeres);
1026 current->end =
1027 (unsigned long) ((b4 + jacoShift) * 100.0 /
1028 jacoTimeres);
1030 } else {
1031 current->start =
1032 (unsigned
1033 long) (((a1 * 3600 + a2 * 60 + a3) * jacoTimeres + a4 +
1034 jacoShift) * 100.0 / jacoTimeres);
1035 current->end =
1036 (unsigned
1037 long) (((b1 * 3600 + b2 * 60 + b3) * jacoTimeres + b4 +
1038 jacoShift) * 100.0 / jacoTimeres);
1040 current->lines = 0;
1041 p = line2;
1042 while ((*p == ' ') || (*p == '\t')) {
1043 ++p;
1045 if (isalpha(*p)||*p == '[') {
1046 int cont, jLength;
1048 if (sscanf(p, "%s %[^\n\r]", directive, line1) < 2)
1049 return (subtitle *) ERR;
1050 jLength = strlen(directive);
1051 for (cont = 0; cont < jLength; ++cont) {
1052 if (isalpha(*(directive + cont)))
1053 *(directive + cont) = toupper(*(directive + cont));
1055 if ((strstr(directive, "RDB") != NULL)
1056 || (strstr(directive, "RDC") != NULL)
1057 || (strstr(directive, "RLB") != NULL)
1058 || (strstr(directive, "RLG") != NULL)) {
1059 continue;
1061 if (strstr(directive, "JL") != NULL) {
1062 current->alignment = SUB_ALIGNMENT_BOTTOMLEFT;
1063 } else if (strstr(directive, "JR") != NULL) {
1064 current->alignment = SUB_ALIGNMENT_BOTTOMRIGHT;
1065 } else {
1066 current->alignment = SUB_ALIGNMENT_BOTTOMCENTER;
1068 strcpy(line2, line1);
1069 p = line2;
1071 for (q = line1; (!eol(*p)) && (current->lines < SUB_MAX_TEXT); ++p) {
1072 switch (*p) {
1073 case '{':
1074 comment++;
1075 break;
1076 case '}':
1077 if (comment) {
1078 --comment;
1079 //the next line to get rid of a blank after the comment
1080 if ((*(p + 1)) == ' ')
1081 p++;
1083 break;
1084 case '~':
1085 if (!comment) {
1086 *q = ' ';
1087 ++q;
1089 break;
1090 case ' ':
1091 case '\t':
1092 if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
1093 break;
1094 if (!comment) {
1095 *q = ' ';
1096 ++q;
1098 break;
1099 case '\\':
1100 if (*(p + 1) == 'n') {
1101 *q = '\0';
1102 q = line1;
1103 current->text[current->lines++] = strdup(line1);
1104 ++p;
1105 break;
1107 if ((toupper(*(p + 1)) == 'C')
1108 || (toupper(*(p + 1)) == 'F')) {
1109 ++p,++p;
1110 break;
1112 if ((*(p + 1) == 'B') || (*(p + 1) == 'b') || (*(p + 1) == 'D') || //actually this means "insert current date here"
1113 (*(p + 1) == 'I') || (*(p + 1) == 'i') || (*(p + 1) == 'N') || (*(p + 1) == 'T') || //actually this means "insert current time here"
1114 (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
1115 ++p;
1116 break;
1118 if ((*(p + 1) == '\\') ||
1119 (*(p + 1) == '~') || (*(p + 1) == '{')) {
1120 ++p;
1121 } else if (eol(*(p + 1))) {
1122 if (!stream_read_line(st, directive, LINE_LEN, utf16))
1123 return NULL;
1124 trail_space(directive);
1125 av_strlcat(line2, directive, LINE_LEN);
1126 break;
1128 default:
1129 if (!comment) {
1130 *q = *p;
1131 ++q;
1133 } //-- switch
1134 } //-- for
1135 *q = '\0';
1136 current->text[current->lines] = strdup(line1);
1137 } //-- while
1138 current->lines++;
1139 return current;
1142 static int sub_autodetect (stream_t* st, int *uses_time, int utf16) {
1143 char line[LINE_LEN+1];
1144 int i,j=0;
1146 while (j < 100) {
1147 j++;
1148 if (!stream_read_line (st, line, LINE_LEN, utf16))
1149 return SUB_INVALID;
1151 if (sscanf (line, "{%d}{%d}", &i, &i)==2)
1152 {*uses_time=0;return SUB_MICRODVD;}
1153 if (sscanf (line, "{%d}{}", &i)==1)
1154 {*uses_time=0;return SUB_MICRODVD;}
1155 if (sscanf (line, "[%d][%d]", &i, &i)==2)
1156 {*uses_time=1;return SUB_MPL2;}
1157 if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8)
1158 {*uses_time=1;return SUB_SUBRIP;}
1159 if (sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d", &i, &i, &i, (char *)&i, &i, &i, &i, &i, (char *)&i, &i)==10)
1160 {*uses_time=1;return SUB_SUBVIEWER;}
1161 if (sscanf (line, "{T %d:%d:%d:%d",&i, &i, &i, &i)==4)
1162 {*uses_time=1;return SUB_SUBVIEWER2;}
1163 if (strstr (line, "<SAMI>"))
1164 {*uses_time=1; return SUB_SAMI;}
1165 if (sscanf(line, "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8)
1166 {*uses_time = 1; return SUB_JACOSUB;}
1167 if (sscanf(line, "@%d @%d", &i, &i) == 2)
1168 {*uses_time = 1; return SUB_JACOSUB;}
1169 if (sscanf (line, "%d:%d:%d:", &i, &i, &i )==3)
1170 {*uses_time=1;return SUB_VPLAYER;}
1171 if (sscanf (line, "%d:%d:%d ", &i, &i, &i )==3)
1172 {*uses_time=1;return SUB_VPLAYER;}
1173 if (!strncasecmp(line, "<window", 7))
1174 {*uses_time=1;return SUB_RT;}
1175 if (!memcmp(line, "Dialogue: Marked", 16))
1176 {*uses_time=1; return SUB_SSA;}
1177 if (!memcmp(line, "Dialogue: ", 10))
1178 {*uses_time=1; return SUB_SSA;}
1179 if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3)
1180 {*uses_time=1;return SUB_PJS;}
1181 if (sscanf (line, "FORMAT=%d", &i) == 1)
1182 {*uses_time=0; return SUB_MPSUB;}
1183 if (!memcmp(line, "FORMAT=TIME", 11))
1184 {*uses_time=1; return SUB_MPSUB;}
1185 if (strstr (line, "-->>"))
1186 {*uses_time=0; return SUB_AQTITLE;}
1187 if (sscanf (line, "[%d:%d:%d]", &i, &i, &i)==3)
1188 {*uses_time=1;return SUB_SUBRIP09;}
1191 return SUB_INVALID; // too many bad lines
1194 extern int sub_utf8;
1195 int sub_utf8_prev=0;
1197 extern float sub_delay;
1198 extern float sub_fps;
1200 #ifdef CONFIG_ICONV
1201 static iconv_t icdsc = (iconv_t)(-1);
1203 void subcp_open (stream_t *st)
1205 char *tocp = "UTF-8";
1207 if (sub_cp){
1208 const char *cp_tmp = sub_cp;
1209 #ifdef CONFIG_ENCA
1210 char enca_lang[3], enca_fallback[100];
1211 if (sscanf(sub_cp, "enca:%2s:%99s", enca_lang, enca_fallback) == 2
1212 || sscanf(sub_cp, "ENCA:%2s:%99s", enca_lang, enca_fallback) == 2) {
1213 if (st && st->flags & MP_STREAM_SEEK ) {
1214 cp_tmp = guess_cp(st, enca_lang, enca_fallback);
1215 } else {
1216 cp_tmp = enca_fallback;
1217 if (st)
1218 mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: enca failed, stream must be seekable.\n");
1221 #endif
1222 if ((icdsc = iconv_open (tocp, cp_tmp)) != (iconv_t)(-1)){
1223 mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n");
1224 sub_utf8 = 2;
1225 } else
1226 mp_msg(MSGT_SUBREADER,MSGL_ERR,"SUB: error opening iconv descriptor.\n");
1230 void subcp_close (void)
1232 if (icdsc != (iconv_t)(-1)){
1233 (void) iconv_close (icdsc);
1234 icdsc = (iconv_t)(-1);
1235 mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: closed iconv descriptor.\n");
1239 subtitle* subcp_recode (subtitle *sub)
1241 int l=sub->lines;
1242 size_t ileft, oleft;
1243 char *op, *ip, *ot;
1244 if(icdsc == (iconv_t)(-1)) return sub;
1246 while (l){
1247 ip = sub->text[--l];
1248 ileft = strlen(ip);
1249 oleft = 4 * ileft;
1251 if (!(ot = malloc(oleft + 1))){
1252 mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error allocating mem.\n");
1253 continue;
1255 op = ot;
1256 if (iconv(icdsc, &ip, &ileft,
1257 &op, &oleft) == (size_t)(-1)) {
1258 mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error recoding line.\n");
1259 free(ot);
1260 continue;
1262 // In some stateful encodings, we must clear the state to handle the last character
1263 if (iconv(icdsc, NULL, NULL,
1264 &op, &oleft) == (size_t)(-1)) {
1265 mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error recoding line, can't clear encoding state.\n");
1267 *op='\0' ;
1268 free (sub->text[l]);
1269 sub->text[l] = ot;
1271 return sub;
1273 #endif
1275 #ifdef CONFIG_FRIBIDI
1277 * Do conversion necessary for right-to-left language support via fribidi.
1278 * @param sub subtitle to convert
1279 * @param sub_utf8 whether the subtitle is encoded in UTF-8
1280 * @param from first new subtitle, all lines before this are assumed to be already converted
1282 static subtitle* sub_fribidi (subtitle *sub, int sub_utf8, int from)
1284 FriBidiChar logical[LINE_LEN+1], visual[LINE_LEN+1]; // Hopefully these two won't smash the stack
1285 char *ip = NULL, *op = NULL;
1286 size_t len,orig_len;
1287 int l=sub->lines;
1288 int char_set_num;
1289 fribidi_boolean log2vis;
1290 if (!flip_hebrew)
1291 return sub;
1292 fribidi_set_mirroring(1);
1293 fribidi_set_reorder_nsm(0);
1295 if( sub_utf8 == 0 ) {
1296 char_set_num = fribidi_parse_charset (fribidi_charset?fribidi_charset:"ISO8859-8");
1297 }else {
1298 char_set_num = fribidi_parse_charset ("UTF-8");
1300 while (l > from) {
1301 ip = sub->text[--l];
1302 orig_len = len = strlen( ip ); // We assume that we don't use full unicode, only UTF-8 or ISO8859-x
1303 if(len > LINE_LEN) {
1304 mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: sub->text is longer than LINE_LEN.\n");
1305 l++;
1306 break;
1308 len = fribidi_charset_to_unicode (char_set_num, ip, len, logical);
1309 #if FRIBIDI_INTERFACE_VERSION < 3
1310 FriBidiCharType base = fribidi_flip_commas?FRIBIDI_TYPE_ON:FRIBIDI_TYPE_L;
1311 #else
1312 FriBidiParType base = fribidi_flip_commas?FRIBIDI_TYPE_ON:FRIBIDI_TYPE_L;
1313 #endif
1314 log2vis = fribidi_log2vis (logical, len, &base,
1315 /* output */
1316 visual, NULL, NULL, NULL);
1317 if(log2vis) {
1318 len = fribidi_remove_bidi_marks (visual, len, NULL, NULL,
1319 NULL);
1320 if((op = malloc((FFMAX(2*orig_len,2*len) + 1))) == NULL) {
1321 mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error allocating mem.\n");
1322 l++;
1323 break;
1325 fribidi_unicode_to_charset ( char_set_num, visual, len,op);
1326 free (ip);
1327 sub->text[l] = op;
1330 if (!from && l){
1331 for (l = sub->lines; l;)
1332 free (sub->text[--l]);
1333 return ERR;
1335 return sub;
1338 #endif
1340 static void adjust_subs_time(subtitle* sub, float subtime, float fps, int block,
1341 int sub_num, int sub_uses_time) {
1342 int n,m;
1343 subtitle* nextsub;
1344 int i = sub_num;
1345 unsigned long subfms = (sub_uses_time ? 100 : fps) * subtime;
1346 unsigned long overlap = (sub_uses_time ? 100 : fps) / 5; // 0.2s
1348 n=m=0;
1349 if (i) for (;;){
1350 if (sub->end <= sub->start){
1351 sub->end = sub->start + subfms;
1352 m++;
1353 n++;
1355 if (!--i) break;
1356 nextsub = sub + 1;
1357 if(block){
1358 if ((sub->end > nextsub->start) && (sub->end <= nextsub->start + overlap)) {
1359 // these subtitles overlap for less than 0.2 seconds
1360 // and would result in very short overlapping subtitle
1361 // so let's fix the problem here, before overlapping code
1362 // get its hands on them
1363 unsigned delta = sub->end - nextsub->start, half = delta / 2;
1364 sub->end -= half + 1;
1365 nextsub->start += delta - half;
1367 if (sub->end >= nextsub->start){
1368 sub->end = nextsub->start - 1;
1369 if (sub->end - sub->start > subfms)
1370 sub->end = sub->start + subfms;
1371 if (!m)
1372 n++;
1376 /* Theory:
1377 * Movies are often converted from FILM (24 fps)
1378 * to PAL (25) by simply speeding it up, so we
1379 * to multiply the original timestmaps by
1380 * (Movie's FPS / Subtitle's (guessed) FPS)
1381 * so eg. for 23.98 fps movie and PAL time based
1382 * subtitles we say -subfps 25 and we're fine!
1385 /* timed sub fps correction ::atmos */
1386 /* the frame-based case is handled in mpcommon.c
1387 * where find_sub is called */
1388 if(sub_uses_time && sub_fps) {
1389 sub->start *= sub_fps/fps;
1390 sub->end *= sub_fps/fps;
1393 sub = nextsub;
1394 m = 0;
1396 if (n) mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: Adjusted %d subtitle(s).\n", n);
1399 struct subreader {
1400 subtitle * (*read)(stream_t *st, subtitle *dest,
1401 struct readline_args *args);
1402 void (*post)(subtitle *dest);
1403 const char *name;
1406 #ifdef CONFIG_ENCA
1407 const char* guess_buffer_cp(unsigned char* buffer, int buflen, const char *preferred_language, const char *fallback)
1409 const char **languages;
1410 size_t langcnt;
1411 EncaAnalyser analyser;
1412 EncaEncoding encoding;
1413 const char *detected_sub_cp = NULL;
1414 int i;
1416 languages = enca_get_languages(&langcnt);
1417 mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: ");
1418 for (i = 0; i < langcnt; i++) {
1419 mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]);
1421 mp_msg(MSGT_SUBREADER, MSGL_V, "\n");
1423 for (i = 0; i < langcnt; i++) {
1424 if (strcasecmp(languages[i], preferred_language) != 0) continue;
1425 analyser = enca_analyser_alloc(languages[i]);
1426 encoding = enca_analyse_const(analyser, buffer, buflen);
1427 enca_analyser_free(analyser);
1428 if (encoding.charset != ENCA_CS_UNKNOWN) {
1429 detected_sub_cp = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV);
1430 break;
1434 free(languages);
1436 if (!detected_sub_cp) {
1437 detected_sub_cp = fallback;
1438 mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detection failed: fallback to %s\n", fallback);
1439 }else{
1440 mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", detected_sub_cp);
1443 return detected_sub_cp;
1446 #define MAX_GUESS_BUFFER_SIZE (256*1024)
1447 const char* guess_cp(stream_t *st, const char *preferred_language, const char *fallback)
1449 size_t buflen;
1450 unsigned char *buffer;
1451 const char *detected_sub_cp = NULL;
1453 buffer = malloc(MAX_GUESS_BUFFER_SIZE);
1454 buflen = stream_read(st,buffer, MAX_GUESS_BUFFER_SIZE);
1456 detected_sub_cp = guess_buffer_cp(buffer, buflen, preferred_language, fallback);
1458 free(buffer);
1459 stream_reset(st);
1460 stream_seek(st,0);
1462 return detected_sub_cp;
1464 #undef MAX_GUESS_BUFFER_SIZE
1465 #endif
1467 sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts)
1469 int utf16;
1470 stream_t* fd;
1471 int n_max, n_first, i, j, sub_first, sub_orig;
1472 subtitle *first, *second, *sub, *return_sub, *alloced_sub = NULL;
1473 sub_data *subt_data;
1474 int uses_time = 0, sub_num = 0, sub_errs = 0;
1475 static const struct subreader sr[]=
1477 { sub_read_line_microdvd, NULL, "microdvd" },
1478 { sub_read_line_subrip, NULL, "subrip" },
1479 { sub_read_line_subviewer, NULL, "subviewer" },
1480 { sub_read_line_sami, NULL, "sami" },
1481 { sub_read_line_vplayer, NULL, "vplayer" },
1482 { sub_read_line_rt, NULL, "rt" },
1483 { sub_read_line_ssa, sub_pp_ssa, "ssa" },
1484 { sub_read_line_pjs, NULL, "pjs" },
1485 { sub_read_line_mpsub, NULL, "mpsub" },
1486 { sub_read_line_aqt, NULL, "aqt" },
1487 { sub_read_line_subviewer2, NULL, "subviewer 2.0" },
1488 { sub_read_line_subrip09, NULL, "subrip 0.9" },
1489 { sub_read_line_jacosub, NULL, "jacosub" },
1490 { sub_read_line_mpl2, NULL, "mpl2" }
1492 const struct subreader *srp;
1494 if(filename==NULL) return NULL; //qnx segfault
1495 fd=open_stream (filename, NULL, NULL); if (!fd) return NULL;
1497 sub_format = SUB_INVALID;
1498 for (utf16 = 0; sub_format == SUB_INVALID && utf16 < 3; utf16++) {
1499 sub_format=sub_autodetect (fd, &uses_time, utf16);
1500 stream_reset(fd);
1501 stream_seek(fd,0);
1503 utf16--;
1505 mpsub_multiplier = (uses_time ? 100.0 : 1.0);
1506 if (sub_format==SUB_INVALID) {mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: Could not determine file format\n");return NULL;}
1507 srp=sr+sub_format;
1508 mp_msg(MSGT_SUBREADER, MSGL_V, "SUB: Detected subtitle file format: %s\n", srp->name);
1510 #ifdef CONFIG_ICONV
1511 sub_utf8_prev=sub_utf8;
1513 int l,k;
1514 k = -1;
1515 if ((l=strlen(filename))>4){
1516 char *exts[] = {".utf", ".utf8", ".utf-8" };
1517 for (k=3;--k>=0;)
1518 if (l >= strlen(exts[k]) && !strcasecmp(filename+(l - strlen(exts[k])), exts[k])){
1519 sub_utf8 = 1;
1520 break;
1523 if (k<0) subcp_open(fd);
1525 #endif
1527 sub_num=0;n_max=32;
1528 first=malloc(n_max*sizeof(subtitle));
1529 if(!first){
1530 #ifdef CONFIG_ICONV
1531 subcp_close();
1532 sub_utf8=sub_utf8_prev;
1533 #endif
1534 return NULL;
1537 #ifdef CONFIG_SORTSUB
1538 alloced_sub =
1539 sub = malloc(sizeof(subtitle));
1540 //This is to deal with those formats (AQT & Subrip) which define the end of a subtitle
1541 //as the beginning of the following
1542 previous_sub_end = 0;
1543 #endif
1544 while(1){
1545 if(sub_num>=n_max){
1546 n_max+=16;
1547 first=realloc(first,n_max*sizeof(subtitle));
1549 #ifndef CONFIG_SORTSUB
1550 sub = &first[sub_num];
1551 #endif
1552 memset(sub, '\0', sizeof(subtitle));
1553 sub=srp->read(fd, sub, &(struct readline_args){utf16, opts});
1554 if(!sub) break; // EOF
1555 #ifdef CONFIG_ICONV
1556 if ((sub!=ERR) && sub_utf8 == 2) sub=subcp_recode(sub);
1557 #endif
1558 #ifdef CONFIG_FRIBIDI
1559 if (sub!=ERR) sub=sub_fribidi(sub,sub_utf8,0);
1560 #endif
1561 if ( sub == ERR )
1563 #ifdef CONFIG_ICONV
1564 subcp_close();
1565 #endif
1566 free(first);
1567 free(alloced_sub);
1568 return NULL;
1570 // Apply any post processing that needs recoding first
1571 if ((sub!=ERR) && !sub_no_text_pp && srp->post) srp->post(sub);
1572 #ifdef CONFIG_SORTSUB
1573 if(!sub_num || (first[sub_num - 1].start <= sub->start)){
1574 first[sub_num].start = sub->start;
1575 first[sub_num].end = sub->end;
1576 first[sub_num].lines = sub->lines;
1577 first[sub_num].alignment = sub->alignment;
1578 for(i = 0; i < sub->lines; ++i){
1579 first[sub_num].text[i] = sub->text[i];
1581 if (previous_sub_end){
1582 first[sub_num - 1].end = previous_sub_end;
1583 previous_sub_end = 0;
1585 } else {
1586 for(j = sub_num - 1; j >= 0; --j){
1587 first[j + 1].start = first[j].start;
1588 first[j + 1].end = first[j].end;
1589 first[j + 1].lines = first[j].lines;
1590 first[j + 1].alignment = first[j].alignment;
1591 for(i = 0; i < first[j].lines; ++i){
1592 first[j + 1].text[i] = first[j].text[i];
1594 if(!j || (first[j - 1].start <= sub->start)){
1595 first[j].start = sub->start;
1596 first[j].end = sub->end;
1597 first[j].lines = sub->lines;
1598 first[j].alignment = sub->alignment;
1599 for(i = 0; i < SUB_MAX_TEXT; ++i){
1600 first[j].text[i] = sub->text[i];
1602 if (previous_sub_end){
1603 first[j].end = first[j - 1].end;
1604 first[j - 1].end = previous_sub_end;
1605 previous_sub_end = 0;
1607 break;
1611 #endif
1612 if(sub==ERR) ++sub_errs; else ++sub_num; // Error vs. Valid
1615 free_stream(fd);
1617 #ifdef CONFIG_ICONV
1618 subcp_close();
1619 #endif
1620 free(alloced_sub);
1622 // printf ("SUB: Subtitle format %s time.\n", uses_time?"uses":"doesn't use");
1623 mp_msg(MSGT_SUBREADER, MSGL_V,"SUB: Read %i subtitles, %i bad line(s).\n",
1624 sub_num, sub_errs);
1626 if(sub_num<=0){
1627 free(first);
1628 return NULL;
1631 // we do overlap if the user forced it (suboverlap_enable == 2) or
1632 // the user didn't forced no-overlapsub and the format is Jacosub or Ssa.
1633 // this is because usually overlapping subtitles are found in these formats,
1634 // while in others they are probably result of bad timing
1635 if ((suboverlap_enabled == 2) ||
1636 ((suboverlap_enabled) && ((sub_format == SUB_JACOSUB) || (sub_format == SUB_SSA)))) {
1637 adjust_subs_time(first, 6.0, fps, 0, sub_num, uses_time);/*~6 secs AST*/
1638 // here we manage overlapping subtitles
1639 sub_orig = sub_num;
1640 n_first = sub_num;
1641 sub_num = 0;
1642 second = NULL;
1643 // for each subtitle in first[] we deal with its 'block' of
1644 // bonded subtitles
1645 for (sub_first = 0; sub_first < n_first; ++sub_first) {
1646 unsigned long global_start = first[sub_first].start,
1647 global_end = first[sub_first].end, local_start, local_end;
1648 int lines_to_add = first[sub_first].lines, sub_to_add = 0,
1649 **placeholder = NULL, higher_line = 0, counter, start_block_sub = sub_num;
1650 char real_block = 1;
1652 // here we find the number of subtitles inside the 'block'
1653 // and its span interval. this works well only with sorted
1654 // subtitles
1655 while ((sub_first + sub_to_add + 1 < n_first) && (first[sub_first + sub_to_add + 1].start < global_end)) {
1656 ++sub_to_add;
1657 lines_to_add += first[sub_first + sub_to_add].lines;
1658 if (first[sub_first + sub_to_add].start < global_start) {
1659 global_start = first[sub_first + sub_to_add].start;
1661 if (first[sub_first + sub_to_add].end > global_end) {
1662 global_end = first[sub_first + sub_to_add].end;
1666 /* Avoid n^2 memory use for the "placeholder" data structure
1667 * below with subtitles that have a huge number of
1668 * consecutive overlapping lines. */
1669 lines_to_add = FFMIN(lines_to_add, SUB_MAX_TEXT);
1671 // we need a structure to keep trace of the screen lines
1672 // used by the subs, a 'placeholder'
1673 counter = 2 * sub_to_add + 1; // the maximum number of subs derived
1674 // from a block of sub_to_add+1 subs
1675 placeholder = malloc(sizeof(int *) * counter);
1676 for (i = 0; i < counter; ++i) {
1677 placeholder[i] = malloc(sizeof(int) * lines_to_add);
1678 for (j = 0; j < lines_to_add; ++j) {
1679 placeholder[i][j] = -1;
1683 counter = 0;
1684 local_end = global_start - 1;
1685 do {
1686 int ls;
1688 // here we find the beginning and the end of a new
1689 // subtitle in the block
1690 local_start = local_end + 1;
1691 local_end = global_end;
1692 for (j = 0; j <= sub_to_add; ++j) {
1693 if ((first[sub_first + j].start - 1 > local_start) && (first[sub_first + j].start - 1 < local_end)) {
1694 local_end = first[sub_first + j].start - 1;
1695 } else if ((first[sub_first + j].end > local_start) && (first[sub_first + j].end < local_end)) {
1696 local_end = first[sub_first + j].end;
1699 // here we allocate the screen lines to subs we must
1700 // display in current local_start-local_end interval.
1701 // if the subs were yet presents in the previous interval
1702 // they keep the same lines, otherside they get unused lines
1703 for (j = 0; j <= sub_to_add; ++j) {
1704 if ((first[sub_first + j].start <= local_end) && (first[sub_first + j].end > local_start)) {
1705 unsigned long sub_lines = first[sub_first + j].lines, fragment_length = lines_to_add + 1,
1706 tmp = 0;
1707 char boolean = 0;
1708 int fragment_position = -1;
1710 // if this is not the first new sub of the block
1711 // we find if this sub was present in the previous
1712 // new sub
1713 if (counter)
1714 for (i = 0; i < lines_to_add; ++i) {
1715 if (placeholder[counter - 1][i] == sub_first + j) {
1716 placeholder[counter][i] = sub_first + j;
1717 boolean = 1;
1720 if (boolean)
1721 continue;
1723 // we are looking for the shortest among all groups of
1724 // sequential blank lines whose length is greater than or
1725 // equal to sub_lines. we store in fragment_position the
1726 // position of the shortest group, in fragment_length its
1727 // length, and in tmp the length of the group currently
1728 // examinated
1729 for (i = 0; i < lines_to_add; ++i) {
1730 if (placeholder[counter][i] == -1) {
1731 // placeholder[counter][i] is part of the current group
1732 // of blank lines
1733 ++tmp;
1734 } else {
1735 if (tmp == sub_lines) {
1736 // current group's size fits exactly the one we
1737 // need, so we stop looking
1738 fragment_position = i - tmp;
1739 tmp = 0;
1740 break;
1742 if ((tmp) && (tmp > sub_lines) && (tmp < fragment_length)) {
1743 // current group is the best we found till here,
1744 // but is still bigger than the one we are looking
1745 // for, so we keep on looking
1746 fragment_length = tmp;
1747 fragment_position = i - tmp;
1748 tmp = 0;
1749 } else {
1750 // current group doesn't fit at all, so we forget it
1751 tmp = 0;
1755 if (tmp) {
1756 // last screen line is blank, a group ends with it
1757 if ((tmp >= sub_lines) && (tmp < fragment_length)) {
1758 fragment_position = i - tmp;
1761 if (fragment_position == -1) {
1762 // it was not possible to find free screen line(s) for a subtitle,
1763 // usually this means a bug in the code; however we do not overlap
1764 mp_msg(MSGT_SUBREADER, MSGL_WARN, "SUB: we could not find a suitable position for an overlapping subtitle\n");
1765 higher_line = SUB_MAX_TEXT + 1;
1766 break;
1767 } else {
1768 for (tmp = 0; tmp < sub_lines; ++tmp) {
1769 placeholder[counter][fragment_position + tmp] = sub_first + j;
1774 for (j = higher_line + 1; j < lines_to_add; ++j) {
1775 if (placeholder[counter][j] != -1)
1776 higher_line = j;
1777 else
1778 break;
1780 if (higher_line >= SUB_MAX_TEXT) {
1781 // the 'block' has too much lines, so we don't overlap the
1782 // subtitles
1783 second = realloc(second, (sub_num + sub_to_add + 1) * sizeof(subtitle));
1784 for (j = 0; j <= sub_to_add; ++j) {
1785 int ls;
1786 memset(&second[sub_num + j], '\0', sizeof(subtitle));
1787 second[sub_num + j].start = first[sub_first + j].start;
1788 second[sub_num + j].end = first[sub_first + j].end;
1789 second[sub_num + j].lines = first[sub_first + j].lines;
1790 second[sub_num + j].alignment = first[sub_first + j].alignment;
1791 for (ls = 0; ls < second[sub_num + j].lines; ls++) {
1792 second[sub_num + j].text[ls] = strdup(first[sub_first + j].text[ls]);
1795 sub_num += sub_to_add + 1;
1796 sub_first += sub_to_add;
1797 real_block = 0;
1798 break;
1801 // we read the placeholder structure and create the new
1802 // subs.
1803 second = realloc(second, (sub_num + 1) * sizeof(subtitle));
1804 memset(&second[sub_num], '\0', sizeof(subtitle));
1805 second[sub_num].start = local_start;
1806 second[sub_num].end = local_end;
1807 second[sub_num].alignment = first[sub_first].alignment;
1808 n_max = (lines_to_add < SUB_MAX_TEXT) ? lines_to_add : SUB_MAX_TEXT;
1809 for (i = 0, j = 0; j < n_max; ++j) {
1810 if (placeholder[counter][j] != -1) {
1811 int lines = first[placeholder[counter][j]].lines;
1812 for (ls = 0; ls < lines; ++ls) {
1813 second[sub_num].text[i++] = strdup(first[placeholder[counter][j]].text[ls]);
1815 j += lines - 1;
1816 } else {
1817 second[sub_num].text[i++] = strdup(" ");
1820 ++sub_num;
1821 ++counter;
1822 } while (local_end < global_end);
1823 if (real_block)
1824 for (i = 0; i < counter; ++i)
1825 second[start_block_sub + i].lines = higher_line + 1;
1827 counter = 2 * sub_to_add + 1;
1828 for (i = 0; i < counter; ++i) {
1829 free(placeholder[i]);
1831 free(placeholder);
1832 sub_first += sub_to_add;
1835 for (j = sub_orig - 1; j >= 0; --j) {
1836 for (i = first[j].lines - 1; i >= 0; --i) {
1837 free(first[j].text[i]);
1840 free(first);
1842 return_sub = second;
1843 } else { //if(suboverlap_enabled)
1844 adjust_subs_time(first, 6.0, fps, 1, sub_num, uses_time);/*~6 secs AST*/
1845 return_sub = first;
1847 if (return_sub == NULL) return NULL;
1848 subt_data = malloc(sizeof(sub_data));
1849 subt_data->filename = strdup(filename);
1850 subt_data->sub_uses_time = uses_time;
1851 subt_data->sub_num = sub_num;
1852 subt_data->sub_errs = sub_errs;
1853 subt_data->subtitles = return_sub;
1854 return subt_data;
1857 void list_sub_file(sub_data* subd){
1858 int i,j;
1859 subtitle *subs = subd->subtitles;
1861 for(j=0; j < subd->sub_num; j++){
1862 subtitle* egysub=&subs[j];
1863 mp_msg(MSGT_SUBREADER,MSGL_INFO,"%i line%c (%li-%li)\n",
1864 egysub->lines,
1865 (1==egysub->lines)?' ':'s',
1866 egysub->start,
1867 egysub->end);
1868 for (i=0; i<egysub->lines; i++) {
1869 mp_msg(MSGT_SUBREADER,MSGL_INFO,"\t\t%d: %s%s", i,egysub->text[i], i==egysub->lines-1?"":" \n ");
1871 mp_msg(MSGT_SUBREADER,MSGL_INFO,"\n");
1874 mp_msg(MSGT_SUBREADER,MSGL_INFO,"Subtitle format %s time.\n",
1875 subd->sub_uses_time ? "uses":"doesn't use");
1876 mp_msg(MSGT_SUBREADER,MSGL_INFO,"Read %i subtitles, %i errors.\n", subd->sub_num, subd->sub_errs);
1879 void dump_srt(sub_data* subd, float fps){
1880 int i,j;
1881 int h,m,s,ms;
1882 FILE * fd;
1883 subtitle * onesub;
1884 unsigned long temp;
1885 subtitle *subs = subd->subtitles;
1887 if (!subd->sub_uses_time && sub_fps == 0)
1888 sub_fps = fps;
1889 fd=fopen("dumpsub.srt","w");
1890 if(!fd)
1892 perror("dump_srt: fopen");
1893 return;
1895 for(i=0; i < subd->sub_num; i++)
1897 onesub=subs+i; //=&subs[i];
1898 fprintf(fd,"%d\n",i+1);//line number
1900 temp=onesub->start;
1901 if (!subd->sub_uses_time)
1902 temp = temp * 100 / sub_fps;
1903 temp -= sub_delay * 100;
1904 h=temp/360000;temp%=360000; //h =1*100*60*60
1905 m=temp/6000; temp%=6000; //m =1*100*60
1906 s=temp/100; temp%=100; //s =1*100
1907 ms=temp*10; //ms=1*10
1908 fprintf(fd,"%02d:%02d:%02d,%03d --> ",h,m,s,ms);
1910 temp=onesub->end;
1911 if (!subd->sub_uses_time)
1912 temp = temp * 100 / sub_fps;
1913 temp -= sub_delay * 100;
1914 h=temp/360000;temp%=360000;
1915 m=temp/6000; temp%=6000;
1916 s=temp/100; temp%=100;
1917 ms=temp*10;
1918 fprintf(fd,"%02d:%02d:%02d,%03d\n",h,m,s,ms);
1920 for(j=0;j<onesub->lines;j++)
1921 fprintf(fd,"%s\n",onesub->text[j]);
1923 fprintf(fd,"\n");
1925 fclose(fd);
1926 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.srt\'.\n");
1929 void dump_mpsub(sub_data* subd, float fps){
1930 int i,j;
1931 FILE *fd;
1932 float a,b;
1933 subtitle *subs = subd->subtitles;
1935 mpsub_position = subd->sub_uses_time? (sub_delay*100) : (sub_delay*fps);
1936 if (sub_fps==0) sub_fps=fps;
1938 fd=fopen ("dump.mpsub", "w");
1939 if (!fd) {
1940 perror ("dump_mpsub: fopen");
1941 return;
1945 if (subd->sub_uses_time) fprintf (fd,"FORMAT=TIME\n\n");
1946 else fprintf (fd, "FORMAT=%5.2f\n\n", fps);
1948 for(j=0; j < subd->sub_num; j++){
1949 subtitle* egysub=&subs[j];
1950 if (subd->sub_uses_time) {
1951 a=((egysub->start-mpsub_position)/100.0);
1952 b=((egysub->end-egysub->start)/100.0);
1953 if ( (float)((int)a) == a)
1954 fprintf (fd, "%.0f",a);
1955 else
1956 fprintf (fd, "%.2f",a);
1958 if ( (float)((int)b) == b)
1959 fprintf (fd, " %.0f\n",b);
1960 else
1961 fprintf (fd, " %.2f\n",b);
1962 } else {
1963 fprintf (fd, "%ld %ld\n", (long)((egysub->start*(fps/sub_fps))-((mpsub_position*(fps/sub_fps)))),
1964 (long)(((egysub->end)-(egysub->start))*(fps/sub_fps)));
1967 mpsub_position = egysub->end;
1968 for (i=0; i<egysub->lines; i++) {
1969 fprintf (fd, "%s\n",egysub->text[i]);
1971 fprintf (fd, "\n");
1973 fclose (fd);
1974 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dump.mpsub\'.\n");
1977 void dump_microdvd(sub_data* subd, float fps) {
1978 int i, delay;
1979 FILE *fd;
1980 subtitle *subs = subd->subtitles;
1981 if (sub_fps == 0)
1982 sub_fps = fps;
1983 fd = fopen("dumpsub.sub", "w");
1984 if (!fd) {
1985 perror("dumpsub.sub: fopen");
1986 return;
1988 delay = sub_delay * sub_fps;
1989 for (i = 0; i < subd->sub_num; ++i) {
1990 int j, start, end;
1991 start = subs[i].start;
1992 end = subs[i].end;
1993 if (subd->sub_uses_time) {
1994 start = start * sub_fps / 100 ;
1995 end = end * sub_fps / 100;
1997 else {
1998 start = start * sub_fps / fps;
1999 end = end * sub_fps / fps;
2001 start -= delay;
2002 end -= delay;
2003 fprintf(fd, "{%d}{%d}", start, end);
2004 for (j = 0; j < subs[i].lines; ++j)
2005 fprintf(fd, "%s%s", j ? "|" : "", subs[i].text[j]);
2006 fprintf(fd, "\n");
2008 fclose(fd);
2009 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.sub\'.\n");
2012 void dump_jacosub(sub_data* subd, float fps) {
2013 int i,j;
2014 int h,m,s,cs;
2015 FILE * fd;
2016 subtitle * onesub;
2017 unsigned long temp;
2018 subtitle *subs = subd->subtitles;
2020 if (!subd->sub_uses_time && sub_fps == 0)
2021 sub_fps = fps;
2022 fd=fopen("dumpsub.jss","w");
2023 if(!fd)
2025 perror("dump_jacosub: fopen");
2026 return;
2028 fprintf(fd, "#TIMERES %d\n", (subd->sub_uses_time) ? 100 : (int)sub_fps);
2029 for(i=0; i < subd->sub_num; i++)
2031 onesub=subs+i; //=&subs[i];
2033 temp=onesub->start;
2034 if (!subd->sub_uses_time)
2035 temp = temp * 100 / sub_fps;
2036 temp -= sub_delay * 100;
2037 h=temp/360000;temp%=360000; //h =1*100*60*60
2038 m=temp/6000; temp%=6000; //m =1*100*60
2039 s=temp/100; temp%=100; //s =1*100
2040 cs=temp; //cs=1*10
2041 fprintf(fd,"%02d:%02d:%02d.%02d ",h,m,s,cs);
2043 temp=onesub->end;
2044 if (!subd->sub_uses_time)
2045 temp = temp * 100 / sub_fps;
2046 temp -= sub_delay * 100;
2047 h=temp/360000;temp%=360000;
2048 m=temp/6000; temp%=6000;
2049 s=temp/100; temp%=100;
2050 cs=temp;
2051 fprintf(fd,"%02d:%02d:%02d.%02d {~} ",h,m,s,cs);
2053 for(j=0;j<onesub->lines;j++)
2054 fprintf(fd,"%s%s",j ? "\\n" : "", onesub->text[j]);
2056 fprintf(fd,"\n");
2058 fclose(fd);
2059 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.js\'.\n");
2062 void dump_sami(sub_data* subd, float fps) {
2063 int i,j;
2064 FILE * fd;
2065 subtitle * onesub;
2066 unsigned long temp;
2067 subtitle *subs = subd->subtitles;
2069 if (!subd->sub_uses_time && sub_fps == 0)
2070 sub_fps = fps;
2071 fd=fopen("dumpsub.smi","w");
2072 if(!fd)
2074 perror("dump_jacosub: fopen");
2075 return;
2077 fprintf(fd, "<SAMI>\n"
2078 "<HEAD>\n"
2079 " <STYLE TYPE=\"Text/css\">\n"
2080 " <!--\n"
2081 " P {margin-left: 29pt; margin-right: 29pt; font-size: 24pt; text-align: center; font-family: Tahoma; font-weight: bold; color: #FCDD03; background-color: #000000;}\n"
2082 " .SUBTTL {Name: 'Subtitles'; Lang: en-US; SAMIType: CC;}\n"
2083 " -->\n"
2084 " </STYLE>\n"
2085 "</HEAD>\n"
2086 "<BODY>\n");
2087 for(i=0; i < subd->sub_num; i++)
2089 onesub=subs+i; //=&subs[i];
2091 temp=onesub->start;
2092 if (!subd->sub_uses_time)
2093 temp = temp * 100 / sub_fps;
2094 temp -= sub_delay * 100;
2095 fprintf(fd,"\t<SYNC Start=%lu>\n"
2096 "\t <P>", temp * 10);
2098 for(j=0;j<onesub->lines;j++)
2099 fprintf(fd,"%s%s",j ? "<br>" : "", onesub->text[j]);
2101 fprintf(fd,"\n");
2103 temp=onesub->end;
2104 if (!subd->sub_uses_time)
2105 temp = temp * 100 / sub_fps;
2106 temp -= sub_delay * 100;
2107 fprintf(fd,"\t<SYNC Start=%lu>\n"
2108 "\t <P>&nbsp;\n", temp * 10);
2110 fprintf(fd, "</BODY>\n"
2111 "</SAMI>\n");
2112 fclose(fd);
2113 mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.smi\'.\n");
2116 void sub_free( sub_data * subd )
2118 int i, j;
2120 if ( !subd ) return;
2122 for (i = 0; i < subd->sub_num; i++)
2123 for (j = 0; j < subd->subtitles[i].lines; j++)
2124 free( subd->subtitles[i].text[j] );
2125 free( subd->subtitles );
2126 free( subd->filename );
2127 free( subd );
2130 #define MAX_SUBLINE 512
2132 * \brief parse text and append it to subtitle in sub
2133 * \param sub subtitle struct to add text to
2134 * \param txt text to parse
2135 * \param len length of text in txt
2136 * \param endpts pts at which this subtitle text should be removed again
2138 * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r'
2139 * and '\0' are interpreted as newlines, duplicate, leading and trailing
2140 * newlines are ignored.
2142 void sub_add_text(subtitle *sub, const char *txt, int len, double endpts) {
2143 int comment = 0;
2144 int double_newline = 1; // ignore newlines at the beginning
2145 int i, pos;
2146 char *buf;
2147 #ifdef CONFIG_FRIBIDI
2148 int orig_lines = sub->lines;
2149 #endif
2150 if (sub->lines >= SUB_MAX_TEXT) return;
2151 pos = 0;
2152 buf = malloc(MAX_SUBLINE + 1);
2153 sub->text[sub->lines] = buf;
2154 sub->endpts[sub->lines] = endpts;
2155 for (i = 0; i < len && pos < MAX_SUBLINE; i++) {
2156 char c = txt[i];
2157 if (c == '<') comment |= 1;
2158 if (c == '{') comment |= 2;
2159 if (comment) {
2160 if (c == '}') comment &= ~2;
2161 if (c == '>') comment &= ~1;
2162 continue;
2164 if (pos == MAX_SUBLINE - 1) {
2165 i--;
2166 c = 0;
2168 if (c == '\\' && i + 1 < len) {
2169 c = txt[++i];
2170 if (c == 'n' || c == 'N') c = 0;
2172 if (c == '\n' || c == '\r') c = 0;
2173 if (c) {
2174 double_newline = 0;
2175 buf[pos++] = c;
2176 } else if (!double_newline) {
2177 if (sub->lines >= SUB_MAX_TEXT - 1) {
2178 mp_msg(MSGT_VO, MSGL_WARN, "Too many subtitle lines\n");
2179 break;
2181 double_newline = 1;
2182 buf[pos] = 0;
2183 sub->lines++;
2184 pos = 0;
2185 buf = malloc(MAX_SUBLINE + 1);
2186 sub->text[sub->lines] = buf;
2187 sub->endpts[sub->lines] = endpts;
2190 buf[pos] = 0;
2191 if (sub->lines < SUB_MAX_TEXT &&
2192 strlen(sub->text[sub->lines]))
2193 sub->lines++;
2194 #ifdef CONFIG_FRIBIDI
2195 sub = sub_fribidi(sub, sub_utf8, orig_lines);
2196 #endif
2200 * \brief remove outdated subtitle lines.
2201 * \param sub subtitle struct to modify
2202 * \param pts current pts. All lines with endpts <= this will be removed.
2203 * Use MP_NOPTS_VALUE to remove all lines
2204 * \return 1 if sub was modified, 0 otherwise.
2206 int sub_clear_text(subtitle *sub, double pts) {
2207 int i = 0;
2208 int changed = 0;
2209 while (i < sub->lines) {
2210 double endpts = sub->endpts[i];
2211 if (pts == MP_NOPTS_VALUE || (endpts != MP_NOPTS_VALUE && pts >= endpts)) {
2212 int j;
2213 free(sub->text[i]);
2214 for (j = i + 1; j < sub->lines; j++) {
2215 sub->text[j - 1] = sub->text[j];
2216 sub->endpts[j - 1] = sub->endpts[j];
2218 sub->lines--;
2219 changed = 1;
2220 } else
2221 i++;
2223 return changed;