2 * Subtitle reader with format autodetection
4 * Copyright (c) 2001 laaz
5 * Some code cleanup & realloc() by A'rpi/ESP-team
7 * This file is part of MPlayer.
9 * MPlayer is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * MPlayer is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include <sys/types.h>
33 #include "subreader.h"
35 #include "subassconvert.h"
37 #include "stream/stream.h"
38 #include "libavutil/common.h"
39 #include "libavutil/avstring.h"
45 #define ERR ((void *) -1)
52 int suboverlap_enabled
= 1;
54 // Parameter struct for the format-specific readline functions
55 struct readline_args
{
60 /* Maximal length of line of a subtitle */
62 static float mpsub_position
=0;
63 static float mpsub_multiplier
=1.;
64 static int sub_slacktime
= 20000; //20 sec
66 int sub_no_text_pp
=0; // 1 => do not apply text post-processing
67 // like {\...} elimination in SSA format.
69 int sub_match_fuzziness
=0; // level of sub name matching fuzziness
71 /* Use the SUB_* constant defined in the header file */
72 int sub_format
=SUB_INVALID
;
74 Some subtitling formats, namely AQT and Subrip09, define the end of a
75 subtitle as the beginning of the following. Since currently we read one
76 subtitle at time, previous_sub_end is used to communicate the end of the
77 previous subtitle from the code reading the next. The previous subtitle
78 is then modified to have the correct end time.
80 unsigned long previous_sub_end
;
82 static int eol(char p
) {
83 return p
=='\r' || p
=='\n' || p
=='\0';
86 /* Remove leading and trailing space */
87 static void trail_space(char *s
) {
89 while (isspace(s
[i
])) ++i
;
90 if (i
) strcpy(s
, s
+ i
);
92 while (i
> 0 && isspace(s
[i
])) s
[i
--] = '\0';
95 static char *stristr(const char *haystack
, const char *needle
) {
97 const char *p
= haystack
;
99 if (!(haystack
&& needle
)) return NULL
;
103 if (strncasecmp(p
, needle
, len
) == 0) return (char*)p
;
110 static void sami_add_line(subtitle
*current
, char *buffer
, char **pos
) {
114 if (*buffer
&& current
->lines
< SUB_MAX_TEXT
)
115 current
->text
[current
->lines
++] = strdup(buffer
);
119 static subtitle
*sub_read_line_sami(stream_t
* st
, subtitle
*current
,
120 struct readline_args
*args
)
122 int utf16
= args
->utf16
;
123 static char line
[LINE_LEN
+1];
124 static char *s
= NULL
, *slacktime_s
;
125 char text
[LINE_LEN
+1], *p
=NULL
, *q
;
128 current
->lines
= current
->start
= current
->end
= 0;
129 current
->alignment
= SUB_ALIGNMENT_BOTTOMCENTER
;
132 /* read the first line */
134 if (!(s
= stream_read_line(st
, line
, LINE_LEN
, utf16
))) return 0;
139 case 0: /* find "START=" or "Slacktime:" */
140 slacktime_s
= stristr (s
, "Slacktime:");
142 sub_slacktime
= strtol (slacktime_s
+10, NULL
, 0) / 10;
144 s
= stristr (s
, "Start=");
146 current
->start
= strtol (s
+ 6, &s
, 0) / 10;
148 for (; *s
!= '>' && *s
!= '\0'; s
++);
154 case 1: /* find (optional) "<P", skip other TAGs */
155 for (; *s
== ' ' || *s
== '\t'; s
++); /* strip blanks, if any */
156 if (*s
== '\0') break;
157 if (*s
!= '<') { state
= 3; p
= text
; continue; } /* not a TAG */
159 if (*s
== 'P' || *s
== 'p') { s
++; state
= 2; continue; } /* found '<P' */
160 for (; *s
!= '>' && *s
!= '\0'; s
++); /* skip remains of non-<P> TAG */
166 case 2: /* find ">" */
167 if ((s
= strchr (s
, '>'))) { s
++; state
= 3; p
= text
; continue; }
170 case 3: /* get all text until '<' appears */
171 if (p
- text
>= LINE_LEN
)
172 sami_add_line(current
, text
, &p
);
173 if (*s
== '\0') break;
174 else if (!strncasecmp (s
, "<br>", 4)) {
175 sami_add_line(current
, text
, &p
);
178 else if ((*s
== '{') && !sub_no_text_pp
) { state
= 5; ++s
; continue; }
179 else if (*s
== '<') { state
= 4; }
180 else if (!strncasecmp (s
, " ", 6)) { *p
++ = ' '; s
+= 6; }
181 else if (*s
== '\t') { *p
++ = ' '; s
++; }
182 else if (*s
== '\r' || *s
== '\n') { s
++; }
185 /* skip duplicated space */
186 if (p
> text
+ 2) if (*(p
-1) == ' ' && *(p
-2) == ' ') p
--;
190 case 4: /* get current->end or skip <TAG> */
191 q
= stristr (s
, "Start=");
193 current
->end
= strtol (q
+ 6, &q
, 0) / 10 - 1;
194 *p
= '\0'; trail_space (text
);
196 current
->text
[current
->lines
++] = strdup (text
);
197 if (current
->lines
> 0) { state
= 99; break; }
201 if (s
) { s
++; state
= 3; continue; }
203 case 5: /* get rid of {...} text, but read the alignment code */
204 if ((*s
== '\\') && (*(s
+ 1) == 'a') && !sub_no_text_pp
) {
205 if (stristr(s
, "\\a1") != NULL
) {
206 current
->alignment
= SUB_ALIGNMENT_BOTTOMLEFT
;
209 if (stristr(s
, "\\a2") != NULL
) {
210 current
->alignment
= SUB_ALIGNMENT_BOTTOMCENTER
;
212 } else if (stristr(s
, "\\a3") != NULL
) {
213 current
->alignment
= SUB_ALIGNMENT_BOTTOMRIGHT
;
215 } else if ((stristr(s
, "\\a4") != NULL
) || (stristr(s
, "\\a5") != NULL
) || (stristr(s
, "\\a8") != NULL
)) {
216 current
->alignment
= SUB_ALIGNMENT_TOPLEFT
;
218 } else if (stristr(s
, "\\a6") != NULL
) {
219 current
->alignment
= SUB_ALIGNMENT_TOPCENTER
;
221 } else if (stristr(s
, "\\a7") != NULL
) {
222 current
->alignment
= SUB_ALIGNMENT_TOPRIGHT
;
224 } else if (stristr(s
, "\\a9") != NULL
) {
225 current
->alignment
= SUB_ALIGNMENT_MIDDLELEFT
;
227 } else if (stristr(s
, "\\a10") != NULL
) {
228 current
->alignment
= SUB_ALIGNMENT_MIDDLECENTER
;
230 } else if (stristr(s
, "\\a11") != NULL
) {
231 current
->alignment
= SUB_ALIGNMENT_MIDDLERIGHT
;
235 if (*s
== '}') state
= 3;
241 if (state
!= 99 && !(s
= stream_read_line (st
, line
, LINE_LEN
, utf16
))) {
242 if (current
->start
> 0) {
243 break; // if it is the last subtitle
249 } while (state
!= 99);
251 // For the last subtitle
252 if (current
->end
<= 0) {
253 current
->end
= current
->start
+ sub_slacktime
;
254 sami_add_line(current
, text
, &p
);
261 static char *sub_readtext(char *source
, char **dest
) {
265 // printf("src=%p dest=%p \n",source,dest);
267 while ( !eol(*p
) && *p
!= '|' ) {
271 *dest
= malloc (len
+1);
272 if (!dest
) {return ERR
;}
274 strncpy(*dest
, source
, len
);
277 while (*p
=='\r' || *p
=='\n' || *p
=='|') p
++;
279 if (*p
) return p
; // not-last text field
280 else return NULL
; // last text field
283 static subtitle
*sub_read_line_microdvd(stream_t
*st
,subtitle
*current
,
284 struct readline_args
*args
)
286 int utf16
= args
->utf16
;
287 char line
[LINE_LEN
+1];
288 char line2
[LINE_LEN
+1];
293 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
294 } while ((sscanf (line
,
296 &(current
->start
), line2
) < 2) &&
298 "{%ld}{%ld}%[^\r\n]",
299 &(current
->start
), &(current
->end
), line2
) < 3));
301 if (args
->opts
->ass_enabled
) {
302 subassconvert_microdvd(line2
, line
, LINE_LEN
+ 1);
308 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
309 if (current
->text
[i
]==ERR
) {return ERR
;}
311 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
318 static subtitle
*sub_read_line_mpl2(stream_t
*st
,subtitle
*current
,
319 struct readline_args
*args
)
321 int utf16
= args
->utf16
;
322 char line
[LINE_LEN
+1];
323 char line2
[LINE_LEN
+1];
328 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
329 } while ((sscanf (line
,
330 "[%ld][%ld]%[^\r\n]",
331 &(current
->start
), &(current
->end
), line2
) < 3));
332 current
->start
*= 10;
337 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
338 if (current
->text
[i
]==ERR
) {return ERR
;}
340 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
347 static subtitle
*sub_read_line_subrip(stream_t
* st
, subtitle
*current
,
348 struct readline_args
*args
)
350 int utf16
= args
->utf16
;
351 char line
[LINE_LEN
+1];
352 int a1
,a2
,a3
,a4
,b1
,b2
,b3
,b4
;
353 char *p
=NULL
, *q
=NULL
;
357 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
358 if (sscanf (line
, "%d:%d:%d.%d,%d:%d:%d.%d",&a1
,&a2
,&a3
,&a4
,&b1
,&b2
,&b3
,&b4
) < 8) continue;
359 current
->start
= a1
*360000+a2
*6000+a3
*100+a4
;
360 current
->end
= b1
*360000+b2
*6000+b3
*100+b4
;
362 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
365 for (current
->lines
=1; current
->lines
< SUB_MAX_TEXT
; current
->lines
++) {
366 for (q
=p
,len
=0; *p
&& *p
!='\r' && *p
!='\n' && *p
!='|' && strncmp(p
,"[br]",4); p
++,len
++);
367 current
->text
[current
->lines
-1]=malloc (len
+1);
368 if (!current
->text
[current
->lines
-1]) return ERR
;
369 strncpy (current
->text
[current
->lines
-1], q
, len
);
370 current
->text
[current
->lines
-1][len
]='\0';
371 if (!*p
|| *p
=='\r' || *p
=='\n') break;
373 else while (*p
++!=']');
380 static subtitle
*sub_ass_read_line_subviewer(stream_t
*st
, subtitle
*current
,
381 struct readline_args
*args
)
383 int utf16
= args
->utf16
;
384 int a1
, a2
, a3
, a4
, b1
, b2
, b3
, b4
, j
= 0;
386 while (!current
->text
[0]) {
387 char line
[LINE_LEN
+ 1], full_line
[LINE_LEN
+ 1];
390 /* Parse SubRip header */
391 if (!stream_read_line(st
, line
, LINE_LEN
, utf16
))
393 if (sscanf(line
, "%d:%d:%d%*1[,.:]%d --> %d:%d:%d%*1[,.:]%d",
394 &a1
, &a2
, &a3
, &a4
, &b1
, &b2
, &b3
, &b4
) < 8)
397 current
->start
= a1
* 360000 + a2
* 6000 + a3
* 100 + a4
/ 10;
398 current
->end
= b1
* 360000 + b2
* 6000 + b3
* 100 + b4
/ 10;
402 for (i
= 0; i
< SUB_MAX_TEXT
; i
++) {
403 int blank
= 1, len
= 0;
406 if (!stream_read_line(st
, line
, LINE_LEN
, utf16
))
409 for (p
= line
; *p
!= '\n' && *p
!= '\r' && *p
; p
++, len
++)
410 if (*p
!= ' ' && *p
!= '\t')
418 if (!(j
+ 1 + len
< sizeof(full_line
) - 1))
422 full_line
[j
++] = '\n';
423 strcpy(&full_line
[j
], line
);
427 /* Use the ASS/SSA converter to transform the whole lines */
429 char converted_line
[LINE_LEN
+ 1];
430 subassconvert_subrip(full_line
, converted_line
, LINE_LEN
+ 1);
431 current
->text
[0] = strdup(converted_line
);
438 static subtitle
*sub_read_line_subviewer(stream_t
*st
,subtitle
*current
,
439 struct readline_args
*args
)
441 int utf16
= args
->utf16
;
442 char line
[LINE_LEN
+1];
443 int a1
,a2
,a3
,a4
,b1
,b2
,b3
,b4
;
447 if (args
->opts
->ass_enabled
)
448 return sub_ass_read_line_subviewer(st
, current
, args
);
449 while (!current
->text
[0]) {
450 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
451 if ((len
=sscanf (line
, "%d:%d:%d%*1[,.:]%d --> %d:%d:%d%*1[,.:]%d",&a1
,&a2
,&a3
,&a4
,&b1
,&b2
,&b3
,&b4
)) < 8)
453 current
->start
= a1
*360000+a2
*6000+a3
*100+a4
/10;
454 current
->end
= b1
*360000+b2
*6000+b3
*100+b4
/10;
455 for (i
=0; i
<SUB_MAX_TEXT
;) {
457 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) break;
459 for (p
=line
; *p
!='\n' && *p
!='\r' && *p
; p
++,len
++)
460 if (*p
!= ' ' && *p
!= '\t')
464 char *curptr
=current
->text
[i
]=malloc (len
+1);
465 if (!current
->text
[i
]) return ERR
;
466 //strncpy (current->text[i], line, len); current->text[i][len]='\0';
468 /* let's filter html tags ::atmos */
495 static subtitle
*sub_read_line_subviewer2(stream_t
*st
,subtitle
*current
,
496 struct readline_args
*args
)
498 int utf16
= args
->utf16
;
499 char line
[LINE_LEN
+1];
504 while (!current
->text
[0]) {
505 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
508 if ((len
=sscanf (line
, "{T %d:%d:%d:%d",&a1
,&a2
,&a3
,&a4
)) < 4)
510 current
->start
= a1
*360000+a2
*6000+a3
*100+a4
/10;
511 for (i
=0; i
<SUB_MAX_TEXT
;) {
512 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) break;
513 if (line
[0]=='}') break;
515 for (p
=line
; *p
!='\n' && *p
!='\r' && *p
; ++p
,++len
);
517 current
->text
[i
]=malloc (len
+1);
518 if (!current
->text
[i
]) return ERR
;
519 strncpy (current
->text
[i
], line
, len
); current
->text
[i
][len
]='\0';
531 static subtitle
*sub_read_line_vplayer(stream_t
*st
,subtitle
*current
,
532 struct readline_args
*args
)
534 int utf16
= args
->utf16
;
535 char line
[LINE_LEN
+1];
537 char *p
=NULL
, *next
,separator
;
540 while (!current
->text
[0]) {
541 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
542 if ((len
=sscanf (line
, "%d:%d:%d%c%n",&a1
,&a2
,&a3
,&separator
,&plen
)) < 4)
545 if (!(current
->start
= a1
*360000+a2
*6000+a3
*100))
549 // finds the body of the subtitle
556 printf("SUB: Skipping incorrect subtitle line!\n");
560 // by wodzu: hey! this time we know what length it has! what is
561 // that magic for? it can't deal with space instead of third
562 // colon! look, what simple it can be:
569 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
570 if (current
->text
[i
]==ERR
) {return ERR
;}
572 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
580 static subtitle
*sub_read_line_rt(stream_t
*st
,subtitle
*current
,
581 struct readline_args
*args
)
583 int utf16
= args
->utf16
;
585 //TODO: This format uses quite rich (sub/super)set of xhtml
586 // I couldn't check it since DTD is not included.
587 // WARNING: full XML parses can be required for proper parsing
588 char line
[LINE_LEN
+1];
589 int a1
,a2
,a3
,a4
,b1
,b2
,b3
,b4
;
590 char *p
=NULL
,*next
=NULL
;
593 while (!current
->text
[0]) {
594 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
595 //TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
596 //to describe the same moment in time. Maybe there are even more formats in use.
597 //if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
598 plen
=a1
=a2
=a3
=a4
=b1
=b2
=b3
=b4
=0;
600 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d.%d\" %*[Ee]nd=\"%d.%d\"%*[^<]<clear/>%n",&a3
,&a4
,&b3
,&b4
,&plen
)) < 4) &&
601 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a3
,&a4
,&b2
,&b3
,&b4
,&plen
)) < 5) &&
602 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2
,&a3
,&b2
,&b3
,&plen
)) < 4) &&
603 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2
,&a3
,&b2
,&b3
,&b4
,&plen
)) < 5) &&
604 // ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) &&
605 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2
,&a3
,&a4
,&b2
,&b3
,&b4
,&plen
)) < 6) &&
606 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1
,&a2
,&a3
,&a4
,&b1
,&b2
,&b3
,&b4
,&plen
)) < 8) &&
607 //now try it without end time
608 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d.%d\"%*[^<]<clear/>%n",&a3
,&a4
,&plen
)) < 2) &&
609 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d\"%*[^<]<clear/>%n",&a2
,&a3
,&plen
)) < 2) &&
610 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2
,&a3
,&a4
,&plen
)) < 3) &&
611 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1
,&a2
,&a3
,&a4
,&plen
)) < 4)
614 current
->start
= a1
*360000+a2
*6000+a3
*100+a4
/10;
615 current
->end
= b1
*360000+b2
*6000+b3
*100+b4
/10;
616 if (b1
== 0 && b2
== 0 && b3
== 0 && b4
== 0)
617 current
->end
= current
->start
+200;
619 // TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml?
620 next
= strstr(line
,"<clear/>");
621 if(next
&& strlen(next
)>8){
623 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
624 if (current
->text
[i
]==ERR
) {return ERR
;}
626 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
634 static subtitle
*sub_read_line_ssa(stream_t
*st
,subtitle
*current
,
635 struct readline_args
*args
)
637 /* Instead of hardcoding the expected fields and their order on
638 * each dialogue line, this code should parse the "Format: " line
639 * which lists the fields used in the script. As is, this may not
640 * work correctly with all scripts.
643 int utf16
= args
->utf16
;
646 int hour1
, min1
, sec1
, hunsec1
,
647 hour2
, min2
, sec2
, hunsec2
, nothing
;
650 char line
[LINE_LEN
+1],
656 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
657 } while (sscanf (line
, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d"
658 "%[^\n\r]", ¬hing
,
659 &hour1
, &min1
, &sec1
, &hunsec1
,
660 &hour2
, &min2
, &sec2
, &hunsec2
,
663 sscanf (line
, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d"
664 "%[^\n\r]", ¬hing
,
665 &hour1
, &min1
, &sec1
, &hunsec1
,
666 &hour2
, &min2
, &sec2
, &hunsec2
,
669 line2
=strchr(line3
, ',');
670 if (!line2
) return NULL
;
672 for (comma
= 3; comma
< 9; comma
++)
673 if (!(line2
= strchr(++line2
, ',')))
677 current
->lines
=0;num
=0;
678 current
->start
= 360000*hour1
+ 6000*min1
+ 100*sec1
+ hunsec1
;
679 current
->end
= 360000*hour2
+ 6000*min2
+ 100*sec2
+ hunsec2
;
681 while (((tmp
=strstr(line2
, "\\n")) != NULL
) || ((tmp
=strstr(line2
, "\\N")) != NULL
) ){
682 current
->text
[num
]=malloc(tmp
-line2
+1);
683 strncpy (current
->text
[num
], line2
, tmp
-line2
);
684 current
->text
[num
][tmp
-line2
]='\0';
688 if (current
->lines
>= SUB_MAX_TEXT
) return current
;
691 current
->text
[num
]=strdup(line2
);
697 static void sub_pp_ssa(subtitle
*sub
)
699 for (int i
= 0; i
< sub
->lines
; i
++) {
701 s
= d
= sub
->text
[i
];
704 while (*s
&& *s
++ != '}');
712 * PJS subtitles reader.
713 * That's the "Phoenix Japanimation Society" format.
714 * I found some of them in http://www.scriptsclub.org/ (used for anime).
715 * The time is in tenths of second.
717 * by set, based on code by szabi (dunnowhat sub format ;-)
719 static subtitle
*sub_read_line_pjs(stream_t
*st
,subtitle
*current
,
720 struct readline_args
*args
)
722 int utf16
= args
->utf16
;
723 char line
[LINE_LEN
+1];
724 char text
[LINE_LEN
+1], *s
, *d
;
726 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
729 for (s
=line
; *s
&& isspace(*s
); s
++);
730 /* allow empty lines at the end of the file */
734 if (sscanf (s
, "%ld,%ld,", &(current
->start
),
735 &(current
->end
)) <2) {
738 /* the files I have are in tenths of second */
739 current
->start
*= 10;
741 /* walk to the beggining of the string */
742 for (; *s
; s
++) if (*s
==',') break;
744 for (s
++; *s
; s
++) if (*s
==',') break;
750 /* copy the string to the text buffer */
751 for (s
++, d
=text
; *s
&& *s
!='"'; s
++, d
++)
754 current
->text
[0] = strdup(text
);
760 static subtitle
*sub_read_line_mpsub(stream_t
*st
, subtitle
*current
,
761 struct readline_args
*args
)
763 int utf16
= args
->utf16
;
764 char line
[LINE_LEN
+1];
771 if (!stream_read_line(st
, line
, LINE_LEN
, utf16
)) return NULL
;
772 } while (sscanf (line
, "%f %f", &a
, &b
) !=2);
774 mpsub_position
+= a
*mpsub_multiplier
;
775 current
->start
=(int) mpsub_position
;
776 mpsub_position
+= b
*mpsub_multiplier
;
777 current
->end
=(int) mpsub_position
;
779 while (num
< SUB_MAX_TEXT
) {
780 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) {
781 if (num
== 0) return NULL
;
785 while (isspace(*p
)) p
++;
786 if (eol(*p
) && num
> 0) return current
;
787 if (eol(*p
)) return NULL
;
789 for (q
=p
; !eol(*q
); q
++);
792 current
->text
[num
]=strdup(p
);
793 // printf (">%s<\n",p);
794 current
->lines
= ++num
;
796 if (num
) return current
;
800 return NULL
; // we should have returned before if it's OK
803 static subtitle
*sub_read_line_aqt(stream_t
*st
,subtitle
*current
,
804 struct readline_args
*args
)
806 int utf16
= args
->utf16
;
807 char line
[LINE_LEN
+1];
812 // try to locate next subtitle
813 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
815 if (!(sscanf (line
, "-->> %ld", &(current
->start
)) <1))
819 previous_sub_end
= (current
->start
) ? current
->start
- 1 : 0;
821 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
824 sub_readtext((char *) &line
,¤t
->text
[0]);
826 current
->end
= current
->start
; // will be corrected by next subtitle
828 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
832 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
833 if (current
->text
[i
]==ERR
) {return ERR
;}
835 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
839 if (!strlen(current
->text
[0]) && !strlen(current
->text
[1])) {
840 previous_sub_end
= 0;
847 static subtitle
*sub_read_line_subrip09(stream_t
*st
,subtitle
*current
,
848 struct readline_args
*args
)
850 int utf16
= args
->utf16
;
851 char line
[LINE_LEN
+1];
857 // try to locate next subtitle
858 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
860 if (!((len
=sscanf (line
, "[%d:%d:%d]",&a1
,&a2
,&a3
)) < 3))
864 current
->start
= a1
*360000+a2
*6000+a3
*100;
866 previous_sub_end
= (current
->start
) ? current
->start
- 1 : 0;
868 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
873 current
->text
[0]=""; // just to be sure that string is clear
875 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
876 if (current
->text
[i
]==ERR
) {return ERR
;}
878 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
882 if (!strlen(current
->text
[0]) && (i
==0)) {
883 previous_sub_end
= 0;
890 static subtitle
*sub_read_line_jacosub(stream_t
* st
, subtitle
* current
,
891 struct readline_args
*args
)
893 int utf16
= args
->utf16
;
894 char line1
[LINE_LEN
], line2
[LINE_LEN
], directive
[LINE_LEN
], *p
, *q
;
895 unsigned a1
, a2
, a3
, a4
, b1
, b2
, b3
, b4
, comment
= 0;
896 static unsigned jacoTimeres
= 30;
897 static int jacoShift
= 0;
899 memset(current
, 0, sizeof(subtitle
));
900 memset(line1
, 0, LINE_LEN
);
901 memset(line2
, 0, LINE_LEN
);
902 memset(directive
, 0, LINE_LEN
);
903 while (!current
->text
[0]) {
904 if (!stream_read_line(st
, line1
, LINE_LEN
, utf16
)) {
908 (line1
, "%u:%u:%u.%u %u:%u:%u.%u %[^\n\r]", &a1
, &a2
, &a3
, &a4
,
909 &b1
, &b2
, &b3
, &b4
, line2
) < 9) {
910 if (sscanf(line1
, "@%u @%u %[^\n\r]", &a4
, &b4
, line2
) < 3) {
911 if (line1
[0] == '#') {
912 int hours
= 0, minutes
= 0, seconds
, delta
, inverter
=
914 unsigned units
= jacoShift
;
915 switch (toupper(line1
[1])) {
917 if (isalpha(line1
[2])) {
922 if (sscanf(&line1
[delta
], "%d", &hours
)) {
927 if (sscanf(&line1
[delta
], "%*d:%d", &minutes
)) {
929 (&line1
[delta
], "%*d:%*d:%d",
931 sscanf(&line1
[delta
], "%*d:%*d:%*d.%d",
935 sscanf(&line1
[delta
], "%d:%d.%d",
936 &minutes
, &seconds
, &units
);
941 sscanf(&line1
[delta
], "%d.%d", &seconds
,
946 ((hours
* 3600 + minutes
* 60 +
947 seconds
) * jacoTimeres
+
952 if (isalpha(line1
[2])) {
957 sscanf(&line1
[delta
], "%u", &jacoTimeres
);
964 (unsigned long) ((a4
+ jacoShift
) * 100.0 /
967 (unsigned long) ((b4
+ jacoShift
) * 100.0 /
973 long) (((a1
* 3600 + a2
* 60 + a3
) * jacoTimeres
+ a4
+
974 jacoShift
) * 100.0 / jacoTimeres
);
977 long) (((b1
* 3600 + b2
* 60 + b3
) * jacoTimeres
+ b4
+
978 jacoShift
) * 100.0 / jacoTimeres
);
982 while ((*p
== ' ') || (*p
== '\t')) {
985 if (isalpha(*p
)||*p
== '[') {
988 if (sscanf(p
, "%s %[^\n\r]", directive
, line1
) < 2)
989 return (subtitle
*) ERR
;
990 jLength
= strlen(directive
);
991 for (cont
= 0; cont
< jLength
; ++cont
) {
992 if (isalpha(*(directive
+ cont
)))
993 *(directive
+ cont
) = toupper(*(directive
+ cont
));
995 if ((strstr(directive
, "RDB") != NULL
)
996 || (strstr(directive
, "RDC") != NULL
)
997 || (strstr(directive
, "RLB") != NULL
)
998 || (strstr(directive
, "RLG") != NULL
)) {
1001 if (strstr(directive
, "JL") != NULL
) {
1002 current
->alignment
= SUB_ALIGNMENT_BOTTOMLEFT
;
1003 } else if (strstr(directive
, "JR") != NULL
) {
1004 current
->alignment
= SUB_ALIGNMENT_BOTTOMRIGHT
;
1006 current
->alignment
= SUB_ALIGNMENT_BOTTOMCENTER
;
1008 strcpy(line2
, line1
);
1011 for (q
= line1
; (!eol(*p
)) && (current
->lines
< SUB_MAX_TEXT
); ++p
) {
1019 //the next line to get rid of a blank after the comment
1020 if ((*(p
+ 1)) == ' ')
1032 if ((*(p
+ 1) == ' ') || (*(p
+ 1) == '\t'))
1040 if (*(p
+ 1) == 'n') {
1043 current
->text
[current
->lines
++] = strdup(line1
);
1047 if ((toupper(*(p
+ 1)) == 'C')
1048 || (toupper(*(p
+ 1)) == 'F')) {
1052 if ((*(p
+ 1) == 'B') || (*(p
+ 1) == 'b') || (*(p
+ 1) == 'D') || //actually this means "insert current date here"
1053 (*(p
+ 1) == 'I') || (*(p
+ 1) == 'i') || (*(p
+ 1) == 'N') || (*(p
+ 1) == 'T') || //actually this means "insert current time here"
1054 (*(p
+ 1) == 'U') || (*(p
+ 1) == 'u')) {
1058 if ((*(p
+ 1) == '\\') ||
1059 (*(p
+ 1) == '~') || (*(p
+ 1) == '{')) {
1061 } else if (eol(*(p
+ 1))) {
1062 if (!stream_read_line(st
, directive
, LINE_LEN
, utf16
))
1064 trail_space(directive
);
1065 av_strlcat(line2
, directive
, LINE_LEN
);
1076 if (current
->lines
< SUB_MAX_TEXT
)
1077 current
->text
[current
->lines
] = strdup(line1
);
1079 if (current
->lines
< SUB_MAX_TEXT
)
1084 static int sub_autodetect (stream_t
* st
, int *uses_time
, int utf16
) {
1085 char line
[LINE_LEN
+1];
1090 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
1093 if (sscanf (line
, "{%d}{%d}", &i
, &i
)==2)
1094 {*uses_time
=0;return SUB_MICRODVD
;}
1095 if (sscanf (line
, "{%d}{}", &i
)==1)
1096 {*uses_time
=0;return SUB_MICRODVD
;}
1097 if (sscanf (line
, "[%d][%d]", &i
, &i
)==2)
1098 {*uses_time
=1;return SUB_MPL2
;}
1099 if (sscanf (line
, "%d:%d:%d.%d,%d:%d:%d.%d", &i
, &i
, &i
, &i
, &i
, &i
, &i
, &i
)==8)
1100 {*uses_time
=1;return SUB_SUBRIP
;}
1101 if (sscanf (line
, "%d:%d:%d%*1[,.:]%d --> %d:%d:%d%*1[,.:]%d", &i
, &i
, &i
, &i
, &i
, &i
, &i
, &i
) == 8)
1102 {*uses_time
=1;return SUB_SUBVIEWER
;}
1103 if (sscanf (line
, "{T %d:%d:%d:%d",&i
, &i
, &i
, &i
)==4)
1104 {*uses_time
=1;return SUB_SUBVIEWER2
;}
1105 if (strstr (line
, "<SAMI>"))
1106 {*uses_time
=1; return SUB_SAMI
;}
1107 if (sscanf(line
, "%d:%d:%d.%d %d:%d:%d.%d", &i
, &i
, &i
, &i
, &i
, &i
, &i
, &i
) == 8)
1108 {*uses_time
= 1; return SUB_JACOSUB
;}
1109 if (sscanf(line
, "@%d @%d", &i
, &i
) == 2)
1110 {*uses_time
= 1; return SUB_JACOSUB
;}
1111 if (sscanf (line
, "%d:%d:%d:", &i
, &i
, &i
)==3)
1112 {*uses_time
=1;return SUB_VPLAYER
;}
1113 if (sscanf (line
, "%d:%d:%d ", &i
, &i
, &i
)==3)
1114 {*uses_time
=1;return SUB_VPLAYER
;}
1115 if (!strncasecmp(line
, "<window", 7))
1116 {*uses_time
=1;return SUB_RT
;}
1117 if (!memcmp(line
, "Dialogue: Marked", 16))
1118 {*uses_time
=1; return SUB_SSA
;}
1119 if (!memcmp(line
, "Dialogue: ", 10))
1120 {*uses_time
=1; return SUB_SSA
;}
1121 if (sscanf (line
, "%d,%d,\"%c", &i
, &i
, (char *) &i
) == 3)
1122 {*uses_time
=1;return SUB_PJS
;}
1123 if (sscanf (line
, "FORMAT=%d", &i
) == 1)
1124 {*uses_time
=0; return SUB_MPSUB
;}
1125 if (!memcmp(line
, "FORMAT=TIME", 11))
1126 {*uses_time
=1; return SUB_MPSUB
;}
1127 if (strstr (line
, "-->>"))
1128 {*uses_time
=0; return SUB_AQTITLE
;}
1129 if (sscanf (line
, "[%d:%d:%d]", &i
, &i
, &i
)==3)
1130 {*uses_time
=1;return SUB_SUBRIP09
;}
1133 return SUB_INVALID
; // too many bad lines
1136 extern float sub_delay
;
1137 extern float sub_fps
;
1140 static iconv_t icdsc
= (iconv_t
)(-1);
1142 void subcp_open (stream_t
*st
)
1144 char *tocp
= "UTF-8";
1147 const char *cp_tmp
= sub_cp
;
1149 char enca_lang
[3], enca_fallback
[100];
1150 if (sscanf(sub_cp
, "enca:%2s:%99s", enca_lang
, enca_fallback
) == 2
1151 || sscanf(sub_cp
, "ENCA:%2s:%99s", enca_lang
, enca_fallback
) == 2) {
1152 if (st
&& st
->flags
& MP_STREAM_SEEK
) {
1153 cp_tmp
= guess_cp(st
, enca_lang
, enca_fallback
);
1155 cp_tmp
= enca_fallback
;
1157 mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"SUB: enca failed, stream must be seekable.\n");
1161 if ((icdsc
= iconv_open (tocp
, cp_tmp
)) != (iconv_t
)(-1)){
1162 mp_msg(MSGT_SUBREADER
,MSGL_V
,"SUB: opened iconv descriptor.\n");
1164 mp_msg(MSGT_SUBREADER
,MSGL_ERR
,"SUB: error opening iconv descriptor.\n");
1168 void subcp_close (void)
1170 if (icdsc
!= (iconv_t
)(-1)){
1171 (void) iconv_close (icdsc
);
1172 icdsc
= (iconv_t
)(-1);
1173 mp_msg(MSGT_SUBREADER
,MSGL_V
,"SUB: closed iconv descriptor.\n");
1177 subtitle
* subcp_recode (subtitle
*sub
)
1180 size_t ileft
, oleft
;
1182 if(icdsc
== (iconv_t
)(-1)) return sub
;
1185 ip
= sub
->text
[--l
];
1189 if (!(ot
= malloc(oleft
+ 1)))
1192 if (iconv(icdsc
, &ip
, &ileft
,
1193 &op
, &oleft
) == (size_t)(-1)) {
1194 mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"SUB: error recoding line.\n");
1198 // In some stateful encodings, we must clear the state to handle the last character
1199 if (iconv(icdsc
, NULL
, NULL
,
1200 &op
, &oleft
) == (size_t)(-1)) {
1201 mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"SUB: error recoding line, can't clear encoding state.\n");
1204 free (sub
->text
[l
]);
1211 static void adjust_subs_time(subtitle
* sub
, float subtime
, float fps
, int block
,
1212 int sub_num
, int sub_uses_time
) {
1216 unsigned long subfms
= (sub_uses_time
? 100 : fps
) * subtime
;
1217 unsigned long overlap
= (sub_uses_time
? 100 : fps
) / 5; // 0.2s
1221 if (sub
->end
<= sub
->start
){
1222 sub
->end
= sub
->start
+ subfms
;
1229 if ((sub
->end
> nextsub
->start
) && (sub
->end
<= nextsub
->start
+ overlap
)) {
1230 // these subtitles overlap for less than 0.2 seconds
1231 // and would result in very short overlapping subtitle
1232 // so let's fix the problem here, before overlapping code
1233 // get its hands on them
1234 unsigned delta
= sub
->end
- nextsub
->start
, half
= delta
/ 2;
1235 sub
->end
-= half
+ 1;
1236 nextsub
->start
+= delta
- half
;
1238 if (sub
->end
>= nextsub
->start
){
1239 sub
->end
= nextsub
->start
- 1;
1240 if (sub
->end
- sub
->start
> subfms
)
1241 sub
->end
= sub
->start
+ subfms
;
1248 * Movies are often converted from FILM (24 fps)
1249 * to PAL (25) by simply speeding it up, so we
1250 * to multiply the original timestmaps by
1251 * (Movie's FPS / Subtitle's (guessed) FPS)
1252 * so eg. for 23.98 fps movie and PAL time based
1253 * subtitles we say -subfps 25 and we're fine!
1256 /* timed sub fps correction ::atmos */
1257 /* the frame-based case is handled in mpcommon.c
1258 * where find_sub is called */
1259 if(sub_uses_time
&& sub_fps
) {
1260 sub
->start
*= sub_fps
/fps
;
1261 sub
->end
*= sub_fps
/fps
;
1267 if (n
) mp_msg(MSGT_SUBREADER
,MSGL_V
,"SUB: Adjusted %d subtitle(s).\n", n
);
1271 subtitle
* (*read
)(stream_t
*st
, subtitle
*dest
,
1272 struct readline_args
*args
);
1273 void (*post
)(subtitle
*dest
);
1278 const char* guess_buffer_cp(unsigned char* buffer
, int buflen
, const char *preferred_language
, const char *fallback
)
1280 const char **languages
;
1282 EncaAnalyser analyser
;
1283 EncaEncoding encoding
;
1284 const char *detected_sub_cp
= NULL
;
1287 languages
= enca_get_languages(&langcnt
);
1288 mp_msg(MSGT_SUBREADER
, MSGL_V
, "ENCA supported languages: ");
1289 for (i
= 0; i
< langcnt
; i
++) {
1290 mp_msg(MSGT_SUBREADER
, MSGL_V
, "%s ", languages
[i
]);
1292 mp_msg(MSGT_SUBREADER
, MSGL_V
, "\n");
1294 for (i
= 0; i
< langcnt
; i
++) {
1295 if (strcasecmp(languages
[i
], preferred_language
) != 0) continue;
1296 analyser
= enca_analyser_alloc(languages
[i
]);
1297 encoding
= enca_analyse_const(analyser
, buffer
, buflen
);
1298 enca_analyser_free(analyser
);
1299 if (encoding
.charset
!= ENCA_CS_UNKNOWN
) {
1300 detected_sub_cp
= enca_charset_name(encoding
.charset
, ENCA_NAME_STYLE_ICONV
);
1307 if (!detected_sub_cp
) {
1308 detected_sub_cp
= fallback
;
1309 mp_msg(MSGT_SUBREADER
, MSGL_INFO
, "ENCA detection failed: fallback to %s\n", fallback
);
1311 mp_msg(MSGT_SUBREADER
, MSGL_INFO
, "ENCA detected charset: %s\n", detected_sub_cp
);
1314 return detected_sub_cp
;
1317 #define MAX_GUESS_BUFFER_SIZE (256*1024)
1318 const char* guess_cp(stream_t
*st
, const char *preferred_language
, const char *fallback
)
1321 unsigned char *buffer
;
1322 const char *detected_sub_cp
= NULL
;
1324 buffer
= malloc(MAX_GUESS_BUFFER_SIZE
);
1325 buflen
= stream_read(st
,buffer
, MAX_GUESS_BUFFER_SIZE
);
1327 detected_sub_cp
= guess_buffer_cp(buffer
, buflen
, preferred_language
, fallback
);
1333 return detected_sub_cp
;
1335 #undef MAX_GUESS_BUFFER_SIZE
1338 sub_data
* sub_read_file(char *filename
, float fps
, struct MPOpts
*opts
)
1342 int n_max
, n_first
, i
, j
, sub_first
, sub_orig
;
1343 subtitle
*first
, *second
, *sub
, *return_sub
, *alloced_sub
= NULL
;
1344 sub_data
*subt_data
;
1345 int uses_time
= 0, sub_num
= 0, sub_errs
= 0;
1346 static const struct subreader sr
[]=
1348 { sub_read_line_microdvd
, NULL
, "microdvd" },
1349 { sub_read_line_subrip
, NULL
, "subrip" },
1350 { sub_read_line_subviewer
, NULL
, "subviewer" },
1351 { sub_read_line_sami
, NULL
, "sami" },
1352 { sub_read_line_vplayer
, NULL
, "vplayer" },
1353 { sub_read_line_rt
, NULL
, "rt" },
1354 { sub_read_line_ssa
, sub_pp_ssa
, "ssa" },
1355 { sub_read_line_pjs
, NULL
, "pjs" },
1356 { sub_read_line_mpsub
, NULL
, "mpsub" },
1357 { sub_read_line_aqt
, NULL
, "aqt" },
1358 { sub_read_line_subviewer2
, NULL
, "subviewer 2.0" },
1359 { sub_read_line_subrip09
, NULL
, "subrip 0.9" },
1360 { sub_read_line_jacosub
, NULL
, "jacosub" },
1361 { sub_read_line_mpl2
, NULL
, "mpl2" }
1363 const struct subreader
*srp
;
1365 if(filename
==NULL
) return NULL
; //qnx segfault
1366 fd
=open_stream (filename
, opts
, NULL
); if (!fd
) return NULL
;
1368 sub_format
= SUB_INVALID
;
1369 for (utf16
= 0; sub_format
== SUB_INVALID
&& utf16
< 3; utf16
++) {
1370 sub_format
=sub_autodetect (fd
, &uses_time
, utf16
);
1376 mpsub_multiplier
= (uses_time
? 100.0 : 1.0);
1377 if (sub_format
==SUB_INVALID
) {mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"SUB: Could not determine file format\n");return NULL
;}
1379 mp_msg(MSGT_SUBREADER
, MSGL_V
, "SUB: Detected subtitle file format: %s\n", srp
->name
);
1385 if ((l
=strlen(filename
))>4){
1386 char *exts
[] = {".utf", ".utf8", ".utf-8" };
1388 if (l
>= strlen(exts
[k
]) && !strcasecmp(filename
+(l
- strlen(exts
[k
])), exts
[k
])){
1392 if (k
<0) subcp_open(fd
);
1397 first
=malloc(n_max
*sizeof(subtitle
));
1402 sub
= malloc(sizeof(subtitle
));
1403 //This is to deal with those formats (AQT & Subrip) which define the end of a subtitle
1404 //as the beginning of the following
1405 previous_sub_end
= 0;
1409 first
=realloc(first
,n_max
*sizeof(subtitle
));
1411 memset(sub
, '\0', sizeof(subtitle
));
1412 sub
=srp
->read(fd
, sub
, &(struct readline_args
){utf16
, opts
});
1413 if(!sub
) break; // EOF
1415 if (sub
!=ERR
) sub
=subcp_recode(sub
);
1426 // Apply any post processing that needs recoding first
1427 if ((sub
!=ERR
) && !sub_no_text_pp
&& srp
->post
) srp
->post(sub
);
1428 if(!sub_num
|| (first
[sub_num
- 1].start
<= sub
->start
)){
1429 first
[sub_num
].start
= sub
->start
;
1430 first
[sub_num
].end
= sub
->end
;
1431 first
[sub_num
].lines
= sub
->lines
;
1432 first
[sub_num
].alignment
= sub
->alignment
;
1433 for(i
= 0; i
< sub
->lines
; ++i
){
1434 first
[sub_num
].text
[i
] = sub
->text
[i
];
1436 if (previous_sub_end
){
1437 first
[sub_num
- 1].end
= previous_sub_end
;
1438 previous_sub_end
= 0;
1441 for(j
= sub_num
- 1; j
>= 0; --j
){
1442 first
[j
+ 1].start
= first
[j
].start
;
1443 first
[j
+ 1].end
= first
[j
].end
;
1444 first
[j
+ 1].lines
= first
[j
].lines
;
1445 first
[j
+ 1].alignment
= first
[j
].alignment
;
1446 for(i
= 0; i
< first
[j
].lines
; ++i
){
1447 first
[j
+ 1].text
[i
] = first
[j
].text
[i
];
1449 if(!j
|| (first
[j
- 1].start
<= sub
->start
)){
1450 first
[j
].start
= sub
->start
;
1451 first
[j
].end
= sub
->end
;
1452 first
[j
].lines
= sub
->lines
;
1453 first
[j
].alignment
= sub
->alignment
;
1454 for(i
= 0; i
< SUB_MAX_TEXT
; ++i
){
1455 first
[j
].text
[i
] = sub
->text
[i
];
1457 if (previous_sub_end
){
1458 first
[j
].end
= first
[j
- 1].end
;
1459 first
[j
- 1].end
= previous_sub_end
;
1460 previous_sub_end
= 0;
1466 if(sub
==ERR
) ++sub_errs
; else ++sub_num
; // Error vs. Valid
1476 // printf ("SUB: Subtitle format %s time.\n", uses_time?"uses":"doesn't use");
1477 mp_msg(MSGT_SUBREADER
, MSGL_V
,"SUB: Read %i subtitles, %i bad line(s).\n",
1485 // we do overlap if the user forced it (suboverlap_enable == 2) or
1486 // the user didn't forced no-overlapsub and the format is Jacosub or Ssa.
1487 // this is because usually overlapping subtitles are found in these formats,
1488 // while in others they are probably result of bad timing
1489 if ((suboverlap_enabled
== 2) ||
1490 ((suboverlap_enabled
) && ((sub_format
== SUB_JACOSUB
) || (sub_format
== SUB_SSA
)))) {
1491 adjust_subs_time(first
, 6.0, fps
, 0, sub_num
, uses_time
);/*~6 secs AST*/
1492 // here we manage overlapping subtitles
1497 // for each subtitle in first[] we deal with its 'block' of
1499 for (sub_first
= 0; sub_first
< n_first
; ++sub_first
) {
1500 unsigned long global_start
= first
[sub_first
].start
,
1501 global_end
= first
[sub_first
].end
, local_start
, local_end
;
1502 int lines_to_add
= first
[sub_first
].lines
, sub_to_add
= 0,
1503 **placeholder
= NULL
, higher_line
= 0, counter
, start_block_sub
= sub_num
;
1504 char real_block
= 1;
1506 // here we find the number of subtitles inside the 'block'
1507 // and its span interval. this works well only with sorted
1509 while ((sub_first
+ sub_to_add
+ 1 < n_first
) && (first
[sub_first
+ sub_to_add
+ 1].start
< global_end
)) {
1511 lines_to_add
+= first
[sub_first
+ sub_to_add
].lines
;
1512 if (first
[sub_first
+ sub_to_add
].start
< global_start
) {
1513 global_start
= first
[sub_first
+ sub_to_add
].start
;
1515 if (first
[sub_first
+ sub_to_add
].end
> global_end
) {
1516 global_end
= first
[sub_first
+ sub_to_add
].end
;
1520 /* Avoid n^2 memory use for the "placeholder" data structure
1521 * below with subtitles that have a huge number of
1522 * consecutive overlapping lines. */
1523 lines_to_add
= FFMIN(lines_to_add
, SUB_MAX_TEXT
);
1525 // we need a structure to keep trace of the screen lines
1526 // used by the subs, a 'placeholder'
1527 counter
= 2 * sub_to_add
+ 1; // the maximum number of subs derived
1528 // from a block of sub_to_add+1 subs
1529 placeholder
= malloc(sizeof(int *) * counter
);
1530 for (i
= 0; i
< counter
; ++i
) {
1531 placeholder
[i
] = malloc(sizeof(int) * lines_to_add
);
1532 for (j
= 0; j
< lines_to_add
; ++j
) {
1533 placeholder
[i
][j
] = -1;
1538 local_end
= global_start
- 1;
1542 // here we find the beginning and the end of a new
1543 // subtitle in the block
1544 local_start
= local_end
+ 1;
1545 local_end
= global_end
;
1546 for (j
= 0; j
<= sub_to_add
; ++j
) {
1547 if ((first
[sub_first
+ j
].start
- 1 > local_start
) && (first
[sub_first
+ j
].start
- 1 < local_end
)) {
1548 local_end
= first
[sub_first
+ j
].start
- 1;
1549 } else if ((first
[sub_first
+ j
].end
> local_start
) && (first
[sub_first
+ j
].end
< local_end
)) {
1550 local_end
= first
[sub_first
+ j
].end
;
1553 // here we allocate the screen lines to subs we must
1554 // display in current local_start-local_end interval.
1555 // if the subs were yet presents in the previous interval
1556 // they keep the same lines, otherside they get unused lines
1557 for (j
= 0; j
<= sub_to_add
; ++j
) {
1558 if ((first
[sub_first
+ j
].start
<= local_end
) && (first
[sub_first
+ j
].end
> local_start
)) {
1559 unsigned long sub_lines
= first
[sub_first
+ j
].lines
, fragment_length
= lines_to_add
+ 1,
1562 int fragment_position
= -1;
1564 // if this is not the first new sub of the block
1565 // we find if this sub was present in the previous
1568 for (i
= 0; i
< lines_to_add
; ++i
) {
1569 if (placeholder
[counter
- 1][i
] == sub_first
+ j
) {
1570 placeholder
[counter
][i
] = sub_first
+ j
;
1577 // we are looking for the shortest among all groups of
1578 // sequential blank lines whose length is greater than or
1579 // equal to sub_lines. we store in fragment_position the
1580 // position of the shortest group, in fragment_length its
1581 // length, and in tmp the length of the group currently
1583 for (i
= 0; i
< lines_to_add
; ++i
) {
1584 if (placeholder
[counter
][i
] == -1) {
1585 // placeholder[counter][i] is part of the current group
1589 if (tmp
== sub_lines
) {
1590 // current group's size fits exactly the one we
1591 // need, so we stop looking
1592 fragment_position
= i
- tmp
;
1596 if ((tmp
) && (tmp
> sub_lines
) && (tmp
< fragment_length
)) {
1597 // current group is the best we found till here,
1598 // but is still bigger than the one we are looking
1599 // for, so we keep on looking
1600 fragment_length
= tmp
;
1601 fragment_position
= i
- tmp
;
1604 // current group doesn't fit at all, so we forget it
1610 // last screen line is blank, a group ends with it
1611 if ((tmp
>= sub_lines
) && (tmp
< fragment_length
)) {
1612 fragment_position
= i
- tmp
;
1615 if (fragment_position
== -1) {
1616 // it was not possible to find free screen line(s) for a subtitle,
1617 // usually this means a bug in the code; however we do not overlap
1618 mp_msg(MSGT_SUBREADER
, MSGL_WARN
, "SUB: we could not find a suitable position for an overlapping subtitle\n");
1619 higher_line
= SUB_MAX_TEXT
+ 1;
1622 for (tmp
= 0; tmp
< sub_lines
; ++tmp
) {
1623 placeholder
[counter
][fragment_position
+ tmp
] = sub_first
+ j
;
1628 for (j
= higher_line
+ 1; j
< lines_to_add
; ++j
) {
1629 if (placeholder
[counter
][j
] != -1)
1634 if (higher_line
>= SUB_MAX_TEXT
) {
1635 // the 'block' has too much lines, so we don't overlap the
1637 second
= realloc(second
, (sub_num
+ sub_to_add
+ 1) * sizeof(subtitle
));
1638 for (j
= 0; j
<= sub_to_add
; ++j
) {
1640 memset(&second
[sub_num
+ j
], '\0', sizeof(subtitle
));
1641 second
[sub_num
+ j
].start
= first
[sub_first
+ j
].start
;
1642 second
[sub_num
+ j
].end
= first
[sub_first
+ j
].end
;
1643 second
[sub_num
+ j
].lines
= first
[sub_first
+ j
].lines
;
1644 second
[sub_num
+ j
].alignment
= first
[sub_first
+ j
].alignment
;
1645 for (ls
= 0; ls
< second
[sub_num
+ j
].lines
; ls
++) {
1646 second
[sub_num
+ j
].text
[ls
] = strdup(first
[sub_first
+ j
].text
[ls
]);
1649 sub_num
+= sub_to_add
+ 1;
1650 sub_first
+= sub_to_add
;
1655 // we read the placeholder structure and create the new
1657 second
= realloc(second
, (sub_num
+ 1) * sizeof(subtitle
));
1658 memset(&second
[sub_num
], '\0', sizeof(subtitle
));
1659 second
[sub_num
].start
= local_start
;
1660 second
[sub_num
].end
= local_end
;
1661 second
[sub_num
].alignment
= first
[sub_first
].alignment
;
1662 n_max
= (lines_to_add
< SUB_MAX_TEXT
) ? lines_to_add
: SUB_MAX_TEXT
;
1663 for (i
= 0, j
= 0; j
< n_max
; ++j
) {
1664 if (placeholder
[counter
][j
] != -1) {
1665 int lines
= first
[placeholder
[counter
][j
]].lines
;
1666 for (ls
= 0; ls
< lines
; ++ls
) {
1667 second
[sub_num
].text
[i
++] = strdup(first
[placeholder
[counter
][j
]].text
[ls
]);
1671 second
[sub_num
].text
[i
++] = strdup(" ");
1676 } while (local_end
< global_end
);
1678 for (i
= 0; i
< counter
; ++i
)
1679 second
[start_block_sub
+ i
].lines
= higher_line
+ 1;
1681 counter
= 2 * sub_to_add
+ 1;
1682 for (i
= 0; i
< counter
; ++i
) {
1683 free(placeholder
[i
]);
1686 sub_first
+= sub_to_add
;
1689 for (j
= sub_orig
- 1; j
>= 0; --j
) {
1690 for (i
= first
[j
].lines
- 1; i
>= 0; --i
) {
1691 free(first
[j
].text
[i
]);
1696 return_sub
= second
;
1697 } else { //if(suboverlap_enabled)
1698 adjust_subs_time(first
, 6.0, fps
, 1, sub_num
, uses_time
);/*~6 secs AST*/
1701 if (return_sub
== NULL
) return NULL
;
1702 subt_data
= malloc(sizeof(sub_data
));
1703 subt_data
->filename
= strdup(filename
);
1704 subt_data
->sub_uses_time
= uses_time
;
1705 subt_data
->sub_num
= sub_num
;
1706 subt_data
->sub_errs
= sub_errs
;
1707 subt_data
->subtitles
= return_sub
;
1711 void list_sub_file(sub_data
* subd
){
1713 subtitle
*subs
= subd
->subtitles
;
1715 for(j
=0; j
< subd
->sub_num
; j
++){
1716 subtitle
* egysub
=&subs
[j
];
1717 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"%i line%c (%li-%li)\n",
1719 (1==egysub
->lines
)?' ':'s',
1722 for (i
=0; i
<egysub
->lines
; i
++) {
1723 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"\t\t%d: %s%s", i
,egysub
->text
[i
], i
==egysub
->lines
-1?"":" \n ");
1725 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"\n");
1728 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"Subtitle format %s time.\n",
1729 subd
->sub_uses_time
? "uses":"doesn't use");
1730 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"Read %i subtitles, %i errors.\n", subd
->sub_num
, subd
->sub_errs
);
1733 void dump_srt(sub_data
* subd
, float fps
){
1739 subtitle
*subs
= subd
->subtitles
;
1741 if (!subd
->sub_uses_time
&& sub_fps
== 0)
1743 fd
=fopen("dumpsub.srt","w");
1746 perror("dump_srt: fopen");
1749 for(i
=0; i
< subd
->sub_num
; i
++)
1751 onesub
=subs
+i
; //=&subs[i];
1752 fprintf(fd
,"%d\n",i
+1);//line number
1755 if (!subd
->sub_uses_time
)
1756 temp
= temp
* 100 / sub_fps
;
1757 temp
-= sub_delay
* 100;
1758 h
=temp
/360000;temp
%=360000; //h =1*100*60*60
1759 m
=temp
/6000; temp
%=6000; //m =1*100*60
1760 s
=temp
/100; temp
%=100; //s =1*100
1761 ms
=temp
*10; //ms=1*10
1762 fprintf(fd
,"%02d:%02d:%02d,%03d --> ",h
,m
,s
,ms
);
1765 if (!subd
->sub_uses_time
)
1766 temp
= temp
* 100 / sub_fps
;
1767 temp
-= sub_delay
* 100;
1768 h
=temp
/360000;temp
%=360000;
1769 m
=temp
/6000; temp
%=6000;
1770 s
=temp
/100; temp
%=100;
1772 fprintf(fd
,"%02d:%02d:%02d,%03d\n",h
,m
,s
,ms
);
1774 for(j
=0;j
<onesub
->lines
;j
++)
1775 fprintf(fd
,"%s\n",onesub
->text
[j
]);
1780 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dumpsub.srt\'.\n");
1783 void dump_mpsub(sub_data
* subd
, float fps
){
1787 subtitle
*subs
= subd
->subtitles
;
1789 mpsub_position
= subd
->sub_uses_time
? (sub_delay
*100) : (sub_delay
*fps
);
1790 if (sub_fps
==0) sub_fps
=fps
;
1792 fd
=fopen ("dump.mpsub", "w");
1794 perror ("dump_mpsub: fopen");
1799 if (subd
->sub_uses_time
) fprintf (fd
,"FORMAT=TIME\n\n");
1800 else fprintf (fd
, "FORMAT=%5.2f\n\n", fps
);
1802 for(j
=0; j
< subd
->sub_num
; j
++){
1803 subtitle
* egysub
=&subs
[j
];
1804 if (subd
->sub_uses_time
) {
1805 a
=((egysub
->start
-mpsub_position
)/100.0);
1806 b
=((egysub
->end
-egysub
->start
)/100.0);
1807 if ( (float)((int)a
) == a
)
1808 fprintf (fd
, "%.0f",a
);
1810 fprintf (fd
, "%.2f",a
);
1812 if ( (float)((int)b
) == b
)
1813 fprintf (fd
, " %.0f\n",b
);
1815 fprintf (fd
, " %.2f\n",b
);
1817 fprintf (fd
, "%ld %ld\n", (long)((egysub
->start
*(fps
/sub_fps
))-((mpsub_position
*(fps
/sub_fps
)))),
1818 (long)(((egysub
->end
)-(egysub
->start
))*(fps
/sub_fps
)));
1821 mpsub_position
= egysub
->end
;
1822 for (i
=0; i
<egysub
->lines
; i
++) {
1823 fprintf (fd
, "%s\n",egysub
->text
[i
]);
1828 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dump.mpsub\'.\n");
1831 void dump_microdvd(sub_data
* subd
, float fps
) {
1834 subtitle
*subs
= subd
->subtitles
;
1837 fd
= fopen("dumpsub.sub", "w");
1839 perror("dumpsub.sub: fopen");
1842 delay
= sub_delay
* sub_fps
;
1843 for (i
= 0; i
< subd
->sub_num
; ++i
) {
1845 start
= subs
[i
].start
;
1847 if (subd
->sub_uses_time
) {
1848 start
= start
* sub_fps
/ 100 ;
1849 end
= end
* sub_fps
/ 100;
1852 start
= start
* sub_fps
/ fps
;
1853 end
= end
* sub_fps
/ fps
;
1857 fprintf(fd
, "{%d}{%d}", start
, end
);
1858 for (j
= 0; j
< subs
[i
].lines
; ++j
)
1859 fprintf(fd
, "%s%s", j
? "|" : "", subs
[i
].text
[j
]);
1863 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dumpsub.sub\'.\n");
1866 void dump_jacosub(sub_data
* subd
, float fps
) {
1872 subtitle
*subs
= subd
->subtitles
;
1874 if (!subd
->sub_uses_time
&& sub_fps
== 0)
1876 fd
=fopen("dumpsub.jss","w");
1879 perror("dump_jacosub: fopen");
1882 fprintf(fd
, "#TIMERES %d\n", (subd
->sub_uses_time
) ? 100 : (int)sub_fps
);
1883 for(i
=0; i
< subd
->sub_num
; i
++)
1885 onesub
=subs
+i
; //=&subs[i];
1888 if (!subd
->sub_uses_time
)
1889 temp
= temp
* 100 / sub_fps
;
1890 temp
-= sub_delay
* 100;
1891 h
=temp
/360000;temp
%=360000; //h =1*100*60*60
1892 m
=temp
/6000; temp
%=6000; //m =1*100*60
1893 s
=temp
/100; temp
%=100; //s =1*100
1895 fprintf(fd
,"%02d:%02d:%02d.%02d ",h
,m
,s
,cs
);
1898 if (!subd
->sub_uses_time
)
1899 temp
= temp
* 100 / sub_fps
;
1900 temp
-= sub_delay
* 100;
1901 h
=temp
/360000;temp
%=360000;
1902 m
=temp
/6000; temp
%=6000;
1903 s
=temp
/100; temp
%=100;
1905 fprintf(fd
,"%02d:%02d:%02d.%02d {~} ",h
,m
,s
,cs
);
1907 for(j
=0;j
<onesub
->lines
;j
++)
1908 fprintf(fd
,"%s%s",j
? "\\n" : "", onesub
->text
[j
]);
1913 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dumpsub.js\'.\n");
1916 void dump_sami(sub_data
* subd
, float fps
) {
1921 subtitle
*subs
= subd
->subtitles
;
1923 if (!subd
->sub_uses_time
&& sub_fps
== 0)
1925 fd
=fopen("dumpsub.smi","w");
1928 perror("dump_jacosub: fopen");
1931 fprintf(fd
, "<SAMI>\n"
1933 " <STYLE TYPE=\"Text/css\">\n"
1935 " P {margin-left: 29pt; margin-right: 29pt; font-size: 24pt; text-align: center; font-family: Tahoma; font-weight: bold; color: #FCDD03; background-color: #000000;}\n"
1936 " .SUBTTL {Name: 'Subtitles'; Lang: en-US; SAMIType: CC;}\n"
1941 for(i
=0; i
< subd
->sub_num
; i
++)
1943 onesub
=subs
+i
; //=&subs[i];
1946 if (!subd
->sub_uses_time
)
1947 temp
= temp
* 100 / sub_fps
;
1948 temp
-= sub_delay
* 100;
1949 fprintf(fd
,"\t<SYNC Start=%lu>\n"
1950 "\t <P>", temp
* 10);
1952 for(j
=0;j
<onesub
->lines
;j
++)
1953 fprintf(fd
,"%s%s",j
? "<br>" : "", onesub
->text
[j
]);
1958 if (!subd
->sub_uses_time
)
1959 temp
= temp
* 100 / sub_fps
;
1960 temp
-= sub_delay
* 100;
1961 fprintf(fd
,"\t<SYNC Start=%lu>\n"
1962 "\t <P> \n", temp
* 10);
1964 fprintf(fd
, "</BODY>\n"
1967 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dumpsub.smi\'.\n");
1970 void sub_free( sub_data
* subd
)
1974 if ( !subd
) return;
1976 for (i
= 0; i
< subd
->sub_num
; i
++)
1977 for (j
= 0; j
< subd
->subtitles
[i
].lines
; j
++)
1978 free( subd
->subtitles
[i
].text
[j
] );
1979 free( subd
->subtitles
);
1980 free( subd
->filename
);
1984 #define MAX_SUBLINE 512
1986 * \brief parse text and append it to subtitle in sub
1987 * \param sub subtitle struct to add text to
1988 * \param txt text to parse
1989 * \param len length of text in txt
1990 * \param endpts pts at which this subtitle text should be removed again
1992 * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r'
1993 * and '\0' are interpreted as newlines, duplicate, leading and trailing
1994 * newlines are ignored.
1996 void sub_add_text(subtitle
*sub
, const char *txt
, int len
, double endpts
) {
1998 int double_newline
= 1; // ignore newlines at the beginning
2001 if (sub
->lines
>= SUB_MAX_TEXT
) return;
2003 buf
= malloc(MAX_SUBLINE
+ 1);
2004 sub
->text
[sub
->lines
] = buf
;
2005 sub
->endpts
[sub
->lines
] = endpts
;
2006 for (i
= 0; i
< len
&& pos
< MAX_SUBLINE
; i
++) {
2008 if (c
== '<') comment
|= 1;
2009 if (c
== '{') comment
|= 2;
2011 if (c
== '}') comment
&= ~2;
2012 if (c
== '>') comment
&= ~1;
2015 if (pos
== MAX_SUBLINE
- 1) {
2019 if (c
== '\\' && i
+ 1 < len
) {
2021 if (c
== 'n' || c
== 'N') c
= 0;
2023 if (c
== '\n' || c
== '\r') c
= 0;
2027 } else if (!double_newline
) {
2028 if (sub
->lines
>= SUB_MAX_TEXT
- 1) {
2029 mp_msg(MSGT_VO
, MSGL_WARN
, "Too many subtitle lines\n");
2036 buf
= malloc(MAX_SUBLINE
+ 1);
2037 sub
->text
[sub
->lines
] = buf
;
2038 sub
->endpts
[sub
->lines
] = endpts
;
2042 if (sub
->lines
< SUB_MAX_TEXT
&&
2043 strlen(sub
->text
[sub
->lines
]))
2048 * \brief remove outdated subtitle lines.
2049 * \param sub subtitle struct to modify
2050 * \param pts current pts. All lines with endpts <= this will be removed.
2051 * Use MP_NOPTS_VALUE to remove all lines
2052 * \return 1 if sub was modified, 0 otherwise.
2054 int sub_clear_text(subtitle
*sub
, double pts
) {
2057 while (i
< sub
->lines
) {
2058 double endpts
= sub
->endpts
[i
];
2059 if (pts
== MP_NOPTS_VALUE
|| (endpts
!= MP_NOPTS_VALUE
&& pts
>= endpts
)) {
2062 for (j
= i
+ 1; j
< sub
->lines
; j
++) {
2063 sub
->text
[j
- 1] = sub
->text
[j
];
2064 sub
->endpts
[j
- 1] = sub
->endpts
[j
];