2 * Subtitle reader with format autodetection
4 * Copyright (c) 2001 laaz
5 * Some code cleanup & realloc() by A'rpi/ESP-team
7 * This file is part of MPlayer.
9 * MPlayer is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * MPlayer is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 #include <sys/types.h>
32 #include "subreader.h"
34 #include "subassconvert.h"
36 #include "stream/stream.h"
37 #include "libavutil/common.h"
38 #include "libavutil/avstring.h"
44 #define ERR ((void *) -1)
51 int suboverlap_enabled
= 1;
53 // Parameter struct for the format-specific readline functions
54 struct readline_args
{
59 /* Maximal length of line of a subtitle */
61 static float mpsub_position
=0;
62 static float mpsub_multiplier
=1.;
63 static int sub_slacktime
= 20000; //20 sec
65 int sub_no_text_pp
=0; // 1 => do not apply text post-processing
66 // like {\...} elimination in SSA format.
68 int sub_match_fuzziness
=0; // level of sub name matching fuzziness
70 /* Use the SUB_* constant defined in the header file */
71 int sub_format
=SUB_INVALID
;
73 Some subtitling formats, namely AQT and Subrip09, define the end of a
74 subtitle as the beginning of the following. Since currently we read one
75 subtitle at time, previous_sub_end is used to communicate the end of the
76 previous subtitle from the code reading the next. The previous subtitle
77 is then modified to have the correct end time.
79 unsigned long previous_sub_end
;
81 static int eol(char p
) {
82 return p
=='\r' || p
=='\n' || p
=='\0';
85 /* Remove leading and trailing space */
86 static void trail_space(char *s
) {
88 while (isspace(s
[i
])) ++i
;
89 if (i
) strcpy(s
, s
+ i
);
91 while (i
> 0 && isspace(s
[i
])) s
[i
--] = '\0';
94 static char *stristr(const char *haystack
, const char *needle
) {
96 const char *p
= haystack
;
98 if (!(haystack
&& needle
)) return NULL
;
102 if (strncasecmp(p
, needle
, len
) == 0) return (char*)p
;
109 static void sami_add_line(subtitle
*current
, char *buffer
, char **pos
) {
113 if (*buffer
&& current
->lines
< SUB_MAX_TEXT
)
114 current
->text
[current
->lines
++] = strdup(buffer
);
118 static subtitle
*sub_read_line_sami(stream_t
* st
, subtitle
*current
,
119 struct readline_args
*args
)
121 int utf16
= args
->utf16
;
122 static char line
[LINE_LEN
+1];
123 static char *s
= NULL
, *slacktime_s
;
124 char text
[LINE_LEN
+1], *p
=NULL
, *q
;
127 current
->lines
= current
->start
= current
->end
= 0;
128 current
->alignment
= SUB_ALIGNMENT_BOTTOMCENTER
;
131 /* read the first line */
133 if (!(s
= stream_read_line(st
, line
, LINE_LEN
, utf16
))) return 0;
138 case 0: /* find "START=" or "Slacktime:" */
139 slacktime_s
= stristr (s
, "Slacktime:");
141 sub_slacktime
= strtol (slacktime_s
+10, NULL
, 0) / 10;
143 s
= stristr (s
, "Start=");
145 current
->start
= strtol (s
+ 6, &s
, 0) / 10;
147 for (; *s
!= '>' && *s
!= '\0'; s
++);
153 case 1: /* find (optional) "<P", skip other TAGs */
154 for (; *s
== ' ' || *s
== '\t'; s
++); /* strip blanks, if any */
155 if (*s
== '\0') break;
156 if (*s
!= '<') { state
= 3; p
= text
; continue; } /* not a TAG */
158 if (*s
== 'P' || *s
== 'p') { s
++; state
= 2; continue; } /* found '<P' */
159 for (; *s
!= '>' && *s
!= '\0'; s
++); /* skip remains of non-<P> TAG */
165 case 2: /* find ">" */
166 if ((s
= strchr (s
, '>'))) { s
++; state
= 3; p
= text
; continue; }
169 case 3: /* get all text until '<' appears */
170 if (p
- text
>= LINE_LEN
)
171 sami_add_line(current
, text
, &p
);
172 if (*s
== '\0') break;
173 else if (!strncasecmp (s
, "<br>", 4)) {
174 sami_add_line(current
, text
, &p
);
177 else if ((*s
== '{') && !sub_no_text_pp
) { state
= 5; ++s
; continue; }
178 else if (*s
== '<') { state
= 4; }
179 else if (!strncasecmp (s
, " ", 6)) { *p
++ = ' '; s
+= 6; }
180 else if (*s
== '\t') { *p
++ = ' '; s
++; }
181 else if (*s
== '\r' || *s
== '\n') { s
++; }
184 /* skip duplicated space */
185 if (p
> text
+ 2) if (*(p
-1) == ' ' && *(p
-2) == ' ') p
--;
189 case 4: /* get current->end or skip <TAG> */
190 q
= stristr (s
, "Start=");
192 current
->end
= strtol (q
+ 6, &q
, 0) / 10 - 1;
193 *p
= '\0'; trail_space (text
);
195 current
->text
[current
->lines
++] = strdup (text
);
196 if (current
->lines
> 0) { state
= 99; break; }
200 if (s
) { s
++; state
= 3; continue; }
202 case 5: /* get rid of {...} text, but read the alignment code */
203 if ((*s
== '\\') && (*(s
+ 1) == 'a') && !sub_no_text_pp
) {
204 if (stristr(s
, "\\a1") != NULL
) {
205 current
->alignment
= SUB_ALIGNMENT_BOTTOMLEFT
;
208 if (stristr(s
, "\\a2") != NULL
) {
209 current
->alignment
= SUB_ALIGNMENT_BOTTOMCENTER
;
211 } else if (stristr(s
, "\\a3") != NULL
) {
212 current
->alignment
= SUB_ALIGNMENT_BOTTOMRIGHT
;
214 } else if ((stristr(s
, "\\a4") != NULL
) || (stristr(s
, "\\a5") != NULL
) || (stristr(s
, "\\a8") != NULL
)) {
215 current
->alignment
= SUB_ALIGNMENT_TOPLEFT
;
217 } else if (stristr(s
, "\\a6") != NULL
) {
218 current
->alignment
= SUB_ALIGNMENT_TOPCENTER
;
220 } else if (stristr(s
, "\\a7") != NULL
) {
221 current
->alignment
= SUB_ALIGNMENT_TOPRIGHT
;
223 } else if (stristr(s
, "\\a9") != NULL
) {
224 current
->alignment
= SUB_ALIGNMENT_MIDDLELEFT
;
226 } else if (stristr(s
, "\\a10") != NULL
) {
227 current
->alignment
= SUB_ALIGNMENT_MIDDLECENTER
;
229 } else if (stristr(s
, "\\a11") != NULL
) {
230 current
->alignment
= SUB_ALIGNMENT_MIDDLERIGHT
;
234 if (*s
== '}') state
= 3;
240 if (state
!= 99 && !(s
= stream_read_line (st
, line
, LINE_LEN
, utf16
))) {
241 if (current
->start
> 0) {
242 break; // if it is the last subtitle
248 } while (state
!= 99);
250 // For the last subtitle
251 if (current
->end
<= 0) {
252 current
->end
= current
->start
+ sub_slacktime
;
253 sami_add_line(current
, text
, &p
);
260 static char *sub_readtext(char *source
, char **dest
) {
264 // printf("src=%p dest=%p \n",source,dest);
266 while ( !eol(*p
) && *p
!= '|' ) {
270 *dest
= malloc (len
+1);
271 if (!dest
) {return ERR
;}
273 strncpy(*dest
, source
, len
);
276 while (*p
=='\r' || *p
=='\n' || *p
=='|') p
++;
278 if (*p
) return p
; // not-last text field
279 else return NULL
; // last text field
282 static subtitle
*sub_read_line_microdvd(stream_t
*st
,subtitle
*current
,
283 struct readline_args
*args
)
285 int utf16
= args
->utf16
;
286 char line
[LINE_LEN
+1];
287 char line2
[LINE_LEN
+1];
292 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
293 } while ((sscanf (line
,
295 &(current
->start
), line2
) < 2) &&
297 "{%ld}{%ld}%[^\r\n]",
298 &(current
->start
), &(current
->end
), line2
) < 3));
300 if (args
->opts
->ass_enabled
) {
301 subassconvert_microdvd(line2
, line
, LINE_LEN
+ 1);
307 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
308 if (current
->text
[i
]==ERR
) {return ERR
;}
310 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
317 static subtitle
*sub_read_line_mpl2(stream_t
*st
,subtitle
*current
,
318 struct readline_args
*args
)
320 int utf16
= args
->utf16
;
321 char line
[LINE_LEN
+1];
322 char line2
[LINE_LEN
+1];
327 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
328 } while ((sscanf (line
,
329 "[%ld][%ld]%[^\r\n]",
330 &(current
->start
), &(current
->end
), line2
) < 3));
331 current
->start
*= 10;
336 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
337 if (current
->text
[i
]==ERR
) {return ERR
;}
339 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
346 static subtitle
*sub_read_line_subrip(stream_t
* st
, subtitle
*current
,
347 struct readline_args
*args
)
349 int utf16
= args
->utf16
;
350 char line
[LINE_LEN
+1];
351 int a1
,a2
,a3
,a4
,b1
,b2
,b3
,b4
;
352 char *p
=NULL
, *q
=NULL
;
356 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
357 if (sscanf (line
, "%d:%d:%d.%d,%d:%d:%d.%d",&a1
,&a2
,&a3
,&a4
,&b1
,&b2
,&b3
,&b4
) < 8) continue;
358 current
->start
= a1
*360000+a2
*6000+a3
*100+a4
;
359 current
->end
= b1
*360000+b2
*6000+b3
*100+b4
;
361 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
364 for (current
->lines
=1; current
->lines
< SUB_MAX_TEXT
; current
->lines
++) {
365 for (q
=p
,len
=0; *p
&& *p
!='\r' && *p
!='\n' && *p
!='|' && strncmp(p
,"[br]",4); p
++,len
++);
366 current
->text
[current
->lines
-1]=malloc (len
+1);
367 if (!current
->text
[current
->lines
-1]) return ERR
;
368 strncpy (current
->text
[current
->lines
-1], q
, len
);
369 current
->text
[current
->lines
-1][len
]='\0';
370 if (!*p
|| *p
=='\r' || *p
=='\n') break;
372 else while (*p
++!=']');
379 static subtitle
*sub_ass_read_line_subviewer(stream_t
*st
, subtitle
*current
,
380 struct readline_args
*args
)
382 int utf16
= args
->utf16
;
383 int a1
, a2
, a3
, a4
, b1
, b2
, b3
, b4
, j
= 0;
385 while (!current
->text
[0]) {
386 char line
[LINE_LEN
+ 1], full_line
[LINE_LEN
+ 1], sep
;
389 /* Parse SubRip header */
390 if (!stream_read_line(st
, line
, LINE_LEN
, utf16
))
392 if (sscanf(line
, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",
393 &a1
, &a2
, &a3
, &sep
, &a4
, &b1
, &b2
, &b3
, &sep
, &b4
) < 10)
396 current
->start
= a1
* 360000 + a2
* 6000 + a3
* 100 + a4
/ 10;
397 current
->end
= b1
* 360000 + b2
* 6000 + b3
* 100 + b4
/ 10;
401 for (i
= 0; i
< SUB_MAX_TEXT
; i
++) {
402 int blank
= 1, len
= 0;
405 if (!stream_read_line(st
, line
, LINE_LEN
, utf16
))
408 for (p
= line
; *p
!= '\n' && *p
!= '\r' && *p
; p
++, len
++)
409 if (*p
!= ' ' && *p
!= '\t')
417 if (!(j
+ 1 + len
< sizeof(full_line
) - 1))
421 full_line
[j
++] = '\n';
422 strcpy(&full_line
[j
], line
);
426 /* Use the ASS/SSA converter to transform the whole lines */
428 char converted_line
[LINE_LEN
+ 1];
429 subassconvert_subrip(full_line
, converted_line
, LINE_LEN
+ 1);
430 current
->text
[0] = strdup(converted_line
);
437 static subtitle
*sub_read_line_subviewer(stream_t
*st
,subtitle
*current
,
438 struct readline_args
*args
)
440 int utf16
= args
->utf16
;
441 char line
[LINE_LEN
+1];
442 int a1
,a2
,a3
,a4
,b1
,b2
,b3
,b4
;
446 if (args
->opts
->ass_enabled
)
447 return sub_ass_read_line_subviewer(st
, current
, args
);
448 while (!current
->text
[0]) {
449 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
450 if ((len
=sscanf (line
, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",&a1
,&a2
,&a3
,(char *)&i
,&a4
,&b1
,&b2
,&b3
,(char *)&i
,&b4
)) < 10)
452 current
->start
= a1
*360000+a2
*6000+a3
*100+a4
/10;
453 current
->end
= b1
*360000+b2
*6000+b3
*100+b4
/10;
454 for (i
=0; i
<SUB_MAX_TEXT
;) {
456 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) break;
458 for (p
=line
; *p
!='\n' && *p
!='\r' && *p
; p
++,len
++)
459 if (*p
!= ' ' && *p
!= '\t')
463 char *curptr
=current
->text
[i
]=malloc (len
+1);
464 if (!current
->text
[i
]) return ERR
;
465 //strncpy (current->text[i], line, len); current->text[i][len]='\0';
467 /* let's filter html tags ::atmos */
494 static subtitle
*sub_read_line_subviewer2(stream_t
*st
,subtitle
*current
,
495 struct readline_args
*args
)
497 int utf16
= args
->utf16
;
498 char line
[LINE_LEN
+1];
503 while (!current
->text
[0]) {
504 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
507 if ((len
=sscanf (line
, "{T %d:%d:%d:%d",&a1
,&a2
,&a3
,&a4
)) < 4)
509 current
->start
= a1
*360000+a2
*6000+a3
*100+a4
/10;
510 for (i
=0; i
<SUB_MAX_TEXT
;) {
511 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) break;
512 if (line
[0]=='}') break;
514 for (p
=line
; *p
!='\n' && *p
!='\r' && *p
; ++p
,++len
);
516 current
->text
[i
]=malloc (len
+1);
517 if (!current
->text
[i
]) return ERR
;
518 strncpy (current
->text
[i
], line
, len
); current
->text
[i
][len
]='\0';
530 static subtitle
*sub_read_line_vplayer(stream_t
*st
,subtitle
*current
,
531 struct readline_args
*args
)
533 int utf16
= args
->utf16
;
534 char line
[LINE_LEN
+1];
536 char *p
=NULL
, *next
,separator
;
539 while (!current
->text
[0]) {
540 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
541 if ((len
=sscanf (line
, "%d:%d:%d%c%n",&a1
,&a2
,&a3
,&separator
,&plen
)) < 4)
544 if (!(current
->start
= a1
*360000+a2
*6000+a3
*100))
548 // finds the body of the subtitle
555 printf("SUB: Skipping incorrect subtitle line!\n");
559 // by wodzu: hey! this time we know what length it has! what is
560 // that magic for? it can't deal with space instead of third
561 // colon! look, what simple it can be:
568 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
569 if (current
->text
[i
]==ERR
) {return ERR
;}
571 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
579 static subtitle
*sub_read_line_rt(stream_t
*st
,subtitle
*current
,
580 struct readline_args
*args
)
582 int utf16
= args
->utf16
;
584 //TODO: This format uses quite rich (sub/super)set of xhtml
585 // I couldn't check it since DTD is not included.
586 // WARNING: full XML parses can be required for proper parsing
587 char line
[LINE_LEN
+1];
588 int a1
,a2
,a3
,a4
,b1
,b2
,b3
,b4
;
589 char *p
=NULL
,*next
=NULL
;
592 while (!current
->text
[0]) {
593 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
594 //TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
595 //to describe the same moment in time. Maybe there are even more formats in use.
596 //if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
597 plen
=a1
=a2
=a3
=a4
=b1
=b2
=b3
=b4
=0;
599 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d.%d\" %*[Ee]nd=\"%d.%d\"%*[^<]<clear/>%n",&a3
,&a4
,&b3
,&b4
,&plen
)) < 4) &&
600 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a3
,&a4
,&b2
,&b3
,&b4
,&plen
)) < 5) &&
601 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2
,&a3
,&b2
,&b3
,&plen
)) < 4) &&
602 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2
,&a3
,&b2
,&b3
,&b4
,&plen
)) < 5) &&
603 // ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) &&
604 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2
,&a3
,&a4
,&b2
,&b3
,&b4
,&plen
)) < 6) &&
605 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1
,&a2
,&a3
,&a4
,&b1
,&b2
,&b3
,&b4
,&plen
)) < 8) &&
606 //now try it without end time
607 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d.%d\"%*[^<]<clear/>%n",&a3
,&a4
,&plen
)) < 2) &&
608 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d\"%*[^<]<clear/>%n",&a2
,&a3
,&plen
)) < 2) &&
609 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2
,&a3
,&a4
,&plen
)) < 3) &&
610 ((len
=sscanf (line
, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1
,&a2
,&a3
,&a4
,&plen
)) < 4)
613 current
->start
= a1
*360000+a2
*6000+a3
*100+a4
/10;
614 current
->end
= b1
*360000+b2
*6000+b3
*100+b4
/10;
615 if (b1
== 0 && b2
== 0 && b3
== 0 && b4
== 0)
616 current
->end
= current
->start
+200;
618 // TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml?
619 next
= strstr(line
,"<clear/>");
620 if(next
&& strlen(next
)>8){
622 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
623 if (current
->text
[i
]==ERR
) {return ERR
;}
625 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
633 static subtitle
*sub_read_line_ssa(stream_t
*st
,subtitle
*current
,
634 struct readline_args
*args
)
636 /* Instead of hardcoding the expected fields and their order on
637 * each dialogue line, this code should parse the "Format: " line
638 * which lists the fields used in the script. As is, this may not
639 * work correctly with all scripts.
642 int utf16
= args
->utf16
;
645 int hour1
, min1
, sec1
, hunsec1
,
646 hour2
, min2
, sec2
, hunsec2
, nothing
;
649 char line
[LINE_LEN
+1],
655 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) return NULL
;
656 } while (sscanf (line
, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d"
657 "%[^\n\r]", ¬hing
,
658 &hour1
, &min1
, &sec1
, &hunsec1
,
659 &hour2
, &min2
, &sec2
, &hunsec2
,
662 sscanf (line
, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d"
663 "%[^\n\r]", ¬hing
,
664 &hour1
, &min1
, &sec1
, &hunsec1
,
665 &hour2
, &min2
, &sec2
, &hunsec2
,
668 line2
=strchr(line3
, ',');
669 if (!line2
) return NULL
;
671 for (comma
= 3; comma
< 9; comma
++)
672 if (!(line2
= strchr(++line2
, ',')))
676 current
->lines
=0;num
=0;
677 current
->start
= 360000*hour1
+ 6000*min1
+ 100*sec1
+ hunsec1
;
678 current
->end
= 360000*hour2
+ 6000*min2
+ 100*sec2
+ hunsec2
;
680 while (((tmp
=strstr(line2
, "\\n")) != NULL
) || ((tmp
=strstr(line2
, "\\N")) != NULL
) ){
681 current
->text
[num
]=malloc(tmp
-line2
+1);
682 strncpy (current
->text
[num
], line2
, tmp
-line2
);
683 current
->text
[num
][tmp
-line2
]='\0';
687 if (current
->lines
>= SUB_MAX_TEXT
) return current
;
690 current
->text
[num
]=strdup(line2
);
696 static void sub_pp_ssa(subtitle
*sub
)
698 for (int i
= 0; i
< sub
->lines
; i
++) {
700 s
= d
= sub
->text
[i
];
703 while (*s
&& *s
++ != '}');
711 * PJS subtitles reader.
712 * That's the "Phoenix Japanimation Society" format.
713 * I found some of them in http://www.scriptsclub.org/ (used for anime).
714 * The time is in tenths of second.
716 * by set, based on code by szabi (dunnowhat sub format ;-)
718 static subtitle
*sub_read_line_pjs(stream_t
*st
,subtitle
*current
,
719 struct readline_args
*args
)
721 int utf16
= args
->utf16
;
722 char line
[LINE_LEN
+1];
723 char text
[LINE_LEN
+1], *s
, *d
;
725 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
728 for (s
=line
; *s
&& isspace(*s
); s
++);
729 /* allow empty lines at the end of the file */
733 if (sscanf (s
, "%ld,%ld,", &(current
->start
),
734 &(current
->end
)) <2) {
737 /* the files I have are in tenths of second */
738 current
->start
*= 10;
740 /* walk to the beggining of the string */
741 for (; *s
; s
++) if (*s
==',') break;
743 for (s
++; *s
; s
++) if (*s
==',') break;
749 /* copy the string to the text buffer */
750 for (s
++, d
=text
; *s
&& *s
!='"'; s
++, d
++)
753 current
->text
[0] = strdup(text
);
759 static subtitle
*sub_read_line_mpsub(stream_t
*st
, subtitle
*current
,
760 struct readline_args
*args
)
762 int utf16
= args
->utf16
;
763 char line
[LINE_LEN
+1];
770 if (!stream_read_line(st
, line
, LINE_LEN
, utf16
)) return NULL
;
771 } while (sscanf (line
, "%f %f", &a
, &b
) !=2);
773 mpsub_position
+= a
*mpsub_multiplier
;
774 current
->start
=(int) mpsub_position
;
775 mpsub_position
+= b
*mpsub_multiplier
;
776 current
->end
=(int) mpsub_position
;
778 while (num
< SUB_MAX_TEXT
) {
779 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
)) {
780 if (num
== 0) return NULL
;
784 while (isspace(*p
)) p
++;
785 if (eol(*p
) && num
> 0) return current
;
786 if (eol(*p
)) return NULL
;
788 for (q
=p
; !eol(*q
); q
++);
791 current
->text
[num
]=strdup(p
);
792 // printf (">%s<\n",p);
793 current
->lines
= ++num
;
795 if (num
) return current
;
799 return NULL
; // we should have returned before if it's OK
802 static subtitle
*sub_read_line_aqt(stream_t
*st
,subtitle
*current
,
803 struct readline_args
*args
)
805 int utf16
= args
->utf16
;
806 char line
[LINE_LEN
+1];
811 // try to locate next subtitle
812 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
814 if (!(sscanf (line
, "-->> %ld", &(current
->start
)) <1))
818 previous_sub_end
= (current
->start
) ? current
->start
- 1 : 0;
820 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
823 sub_readtext((char *) &line
,¤t
->text
[0]);
825 current
->end
= current
->start
; // will be corrected by next subtitle
827 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
831 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
832 if (current
->text
[i
]==ERR
) {return ERR
;}
834 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
838 if (!strlen(current
->text
[0]) && !strlen(current
->text
[1])) {
839 previous_sub_end
= 0;
846 static subtitle
*sub_read_line_subrip09(stream_t
*st
,subtitle
*current
,
847 struct readline_args
*args
)
849 int utf16
= args
->utf16
;
850 char line
[LINE_LEN
+1];
856 // try to locate next subtitle
857 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
859 if (!((len
=sscanf (line
, "[%d:%d:%d]",&a1
,&a2
,&a3
)) < 3))
863 current
->start
= a1
*360000+a2
*6000+a3
*100;
865 previous_sub_end
= (current
->start
) ? current
->start
- 1 : 0;
867 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
872 current
->text
[0]=""; // just to be sure that string is clear
874 while ((next
=sub_readtext (next
, &(current
->text
[i
])))) {
875 if (current
->text
[i
]==ERR
) {return ERR
;}
877 if (i
>=SUB_MAX_TEXT
) { mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"Too many lines in a subtitle\n");current
->lines
=i
;return current
;}
881 if (!strlen(current
->text
[0]) && (i
==0)) {
882 previous_sub_end
= 0;
889 static subtitle
*sub_read_line_jacosub(stream_t
* st
, subtitle
* current
,
890 struct readline_args
*args
)
892 int utf16
= args
->utf16
;
893 char line1
[LINE_LEN
], line2
[LINE_LEN
], directive
[LINE_LEN
], *p
, *q
;
894 unsigned a1
, a2
, a3
, a4
, b1
, b2
, b3
, b4
, comment
= 0;
895 static unsigned jacoTimeres
= 30;
896 static int jacoShift
= 0;
898 memset(current
, 0, sizeof(subtitle
));
899 memset(line1
, 0, LINE_LEN
);
900 memset(line2
, 0, LINE_LEN
);
901 memset(directive
, 0, LINE_LEN
);
902 while (!current
->text
[0]) {
903 if (!stream_read_line(st
, line1
, LINE_LEN
, utf16
)) {
907 (line1
, "%u:%u:%u.%u %u:%u:%u.%u %[^\n\r]", &a1
, &a2
, &a3
, &a4
,
908 &b1
, &b2
, &b3
, &b4
, line2
) < 9) {
909 if (sscanf(line1
, "@%u @%u %[^\n\r]", &a4
, &b4
, line2
) < 3) {
910 if (line1
[0] == '#') {
911 int hours
= 0, minutes
= 0, seconds
, delta
, inverter
=
913 unsigned units
= jacoShift
;
914 switch (toupper(line1
[1])) {
916 if (isalpha(line1
[2])) {
921 if (sscanf(&line1
[delta
], "%d", &hours
)) {
926 if (sscanf(&line1
[delta
], "%*d:%d", &minutes
)) {
928 (&line1
[delta
], "%*d:%*d:%d",
930 sscanf(&line1
[delta
], "%*d:%*d:%*d.%d",
934 sscanf(&line1
[delta
], "%d:%d.%d",
935 &minutes
, &seconds
, &units
);
940 sscanf(&line1
[delta
], "%d.%d", &seconds
,
945 ((hours
* 3600 + minutes
* 60 +
946 seconds
) * jacoTimeres
+
951 if (isalpha(line1
[2])) {
956 sscanf(&line1
[delta
], "%u", &jacoTimeres
);
963 (unsigned long) ((a4
+ jacoShift
) * 100.0 /
966 (unsigned long) ((b4
+ jacoShift
) * 100.0 /
972 long) (((a1
* 3600 + a2
* 60 + a3
) * jacoTimeres
+ a4
+
973 jacoShift
) * 100.0 / jacoTimeres
);
976 long) (((b1
* 3600 + b2
* 60 + b3
) * jacoTimeres
+ b4
+
977 jacoShift
) * 100.0 / jacoTimeres
);
981 while ((*p
== ' ') || (*p
== '\t')) {
984 if (isalpha(*p
)||*p
== '[') {
987 if (sscanf(p
, "%s %[^\n\r]", directive
, line1
) < 2)
988 return (subtitle
*) ERR
;
989 jLength
= strlen(directive
);
990 for (cont
= 0; cont
< jLength
; ++cont
) {
991 if (isalpha(*(directive
+ cont
)))
992 *(directive
+ cont
) = toupper(*(directive
+ cont
));
994 if ((strstr(directive
, "RDB") != NULL
)
995 || (strstr(directive
, "RDC") != NULL
)
996 || (strstr(directive
, "RLB") != NULL
)
997 || (strstr(directive
, "RLG") != NULL
)) {
1000 if (strstr(directive
, "JL") != NULL
) {
1001 current
->alignment
= SUB_ALIGNMENT_BOTTOMLEFT
;
1002 } else if (strstr(directive
, "JR") != NULL
) {
1003 current
->alignment
= SUB_ALIGNMENT_BOTTOMRIGHT
;
1005 current
->alignment
= SUB_ALIGNMENT_BOTTOMCENTER
;
1007 strcpy(line2
, line1
);
1010 for (q
= line1
; (!eol(*p
)) && (current
->lines
< SUB_MAX_TEXT
); ++p
) {
1018 //the next line to get rid of a blank after the comment
1019 if ((*(p
+ 1)) == ' ')
1031 if ((*(p
+ 1) == ' ') || (*(p
+ 1) == '\t'))
1039 if (*(p
+ 1) == 'n') {
1042 current
->text
[current
->lines
++] = strdup(line1
);
1046 if ((toupper(*(p
+ 1)) == 'C')
1047 || (toupper(*(p
+ 1)) == 'F')) {
1051 if ((*(p
+ 1) == 'B') || (*(p
+ 1) == 'b') || (*(p
+ 1) == 'D') || //actually this means "insert current date here"
1052 (*(p
+ 1) == 'I') || (*(p
+ 1) == 'i') || (*(p
+ 1) == 'N') || (*(p
+ 1) == 'T') || //actually this means "insert current time here"
1053 (*(p
+ 1) == 'U') || (*(p
+ 1) == 'u')) {
1057 if ((*(p
+ 1) == '\\') ||
1058 (*(p
+ 1) == '~') || (*(p
+ 1) == '{')) {
1060 } else if (eol(*(p
+ 1))) {
1061 if (!stream_read_line(st
, directive
, LINE_LEN
, utf16
))
1063 trail_space(directive
);
1064 av_strlcat(line2
, directive
, LINE_LEN
);
1075 if (current
->lines
< SUB_MAX_TEXT
)
1076 current
->text
[current
->lines
] = strdup(line1
);
1078 if (current
->lines
< SUB_MAX_TEXT
)
1083 static int sub_autodetect (stream_t
* st
, int *uses_time
, int utf16
) {
1084 char line
[LINE_LEN
+1];
1089 if (!stream_read_line (st
, line
, LINE_LEN
, utf16
))
1092 if (sscanf (line
, "{%d}{%d}", &i
, &i
)==2)
1093 {*uses_time
=0;return SUB_MICRODVD
;}
1094 if (sscanf (line
, "{%d}{}", &i
)==1)
1095 {*uses_time
=0;return SUB_MICRODVD
;}
1096 if (sscanf (line
, "[%d][%d]", &i
, &i
)==2)
1097 {*uses_time
=1;return SUB_MPL2
;}
1098 if (sscanf (line
, "%d:%d:%d.%d,%d:%d:%d.%d", &i
, &i
, &i
, &i
, &i
, &i
, &i
, &i
)==8)
1099 {*uses_time
=1;return SUB_SUBRIP
;}
1100 if (sscanf (line
, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d", &i
, &i
, &i
, (char *)&i
, &i
, &i
, &i
, &i
, (char *)&i
, &i
)==10)
1101 {*uses_time
=1;return SUB_SUBVIEWER
;}
1102 if (sscanf (line
, "{T %d:%d:%d:%d",&i
, &i
, &i
, &i
)==4)
1103 {*uses_time
=1;return SUB_SUBVIEWER2
;}
1104 if (strstr (line
, "<SAMI>"))
1105 {*uses_time
=1; return SUB_SAMI
;}
1106 if (sscanf(line
, "%d:%d:%d.%d %d:%d:%d.%d", &i
, &i
, &i
, &i
, &i
, &i
, &i
, &i
) == 8)
1107 {*uses_time
= 1; return SUB_JACOSUB
;}
1108 if (sscanf(line
, "@%d @%d", &i
, &i
) == 2)
1109 {*uses_time
= 1; return SUB_JACOSUB
;}
1110 if (sscanf (line
, "%d:%d:%d:", &i
, &i
, &i
)==3)
1111 {*uses_time
=1;return SUB_VPLAYER
;}
1112 if (sscanf (line
, "%d:%d:%d ", &i
, &i
, &i
)==3)
1113 {*uses_time
=1;return SUB_VPLAYER
;}
1114 if (!strncasecmp(line
, "<window", 7))
1115 {*uses_time
=1;return SUB_RT
;}
1116 if (!memcmp(line
, "Dialogue: Marked", 16))
1117 {*uses_time
=1; return SUB_SSA
;}
1118 if (!memcmp(line
, "Dialogue: ", 10))
1119 {*uses_time
=1; return SUB_SSA
;}
1120 if (sscanf (line
, "%d,%d,\"%c", &i
, &i
, (char *) &i
) == 3)
1121 {*uses_time
=1;return SUB_PJS
;}
1122 if (sscanf (line
, "FORMAT=%d", &i
) == 1)
1123 {*uses_time
=0; return SUB_MPSUB
;}
1124 if (!memcmp(line
, "FORMAT=TIME", 11))
1125 {*uses_time
=1; return SUB_MPSUB
;}
1126 if (strstr (line
, "-->>"))
1127 {*uses_time
=0; return SUB_AQTITLE
;}
1128 if (sscanf (line
, "[%d:%d:%d]", &i
, &i
, &i
)==3)
1129 {*uses_time
=1;return SUB_SUBRIP09
;}
1132 return SUB_INVALID
; // too many bad lines
1135 extern float sub_delay
;
1136 extern float sub_fps
;
1139 static iconv_t icdsc
= (iconv_t
)(-1);
1141 void subcp_open (stream_t
*st
)
1143 char *tocp
= "UTF-8";
1146 const char *cp_tmp
= sub_cp
;
1148 char enca_lang
[3], enca_fallback
[100];
1149 if (sscanf(sub_cp
, "enca:%2s:%99s", enca_lang
, enca_fallback
) == 2
1150 || sscanf(sub_cp
, "ENCA:%2s:%99s", enca_lang
, enca_fallback
) == 2) {
1151 if (st
&& st
->flags
& MP_STREAM_SEEK
) {
1152 cp_tmp
= guess_cp(st
, enca_lang
, enca_fallback
);
1154 cp_tmp
= enca_fallback
;
1156 mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"SUB: enca failed, stream must be seekable.\n");
1160 if ((icdsc
= iconv_open (tocp
, cp_tmp
)) != (iconv_t
)(-1)){
1161 mp_msg(MSGT_SUBREADER
,MSGL_V
,"SUB: opened iconv descriptor.\n");
1163 mp_msg(MSGT_SUBREADER
,MSGL_ERR
,"SUB: error opening iconv descriptor.\n");
1167 void subcp_close (void)
1169 if (icdsc
!= (iconv_t
)(-1)){
1170 (void) iconv_close (icdsc
);
1171 icdsc
= (iconv_t
)(-1);
1172 mp_msg(MSGT_SUBREADER
,MSGL_V
,"SUB: closed iconv descriptor.\n");
1176 subtitle
* subcp_recode (subtitle
*sub
)
1179 size_t ileft
, oleft
;
1181 if(icdsc
== (iconv_t
)(-1)) return sub
;
1184 ip
= sub
->text
[--l
];
1188 if (!(ot
= malloc(oleft
+ 1)))
1191 if (iconv(icdsc
, &ip
, &ileft
,
1192 &op
, &oleft
) == (size_t)(-1)) {
1193 mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"SUB: error recoding line.\n");
1197 // In some stateful encodings, we must clear the state to handle the last character
1198 if (iconv(icdsc
, NULL
, NULL
,
1199 &op
, &oleft
) == (size_t)(-1)) {
1200 mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"SUB: error recoding line, can't clear encoding state.\n");
1203 free (sub
->text
[l
]);
1210 static void adjust_subs_time(subtitle
* sub
, float subtime
, float fps
, int block
,
1211 int sub_num
, int sub_uses_time
) {
1215 unsigned long subfms
= (sub_uses_time
? 100 : fps
) * subtime
;
1216 unsigned long overlap
= (sub_uses_time
? 100 : fps
) / 5; // 0.2s
1220 if (sub
->end
<= sub
->start
){
1221 sub
->end
= sub
->start
+ subfms
;
1228 if ((sub
->end
> nextsub
->start
) && (sub
->end
<= nextsub
->start
+ overlap
)) {
1229 // these subtitles overlap for less than 0.2 seconds
1230 // and would result in very short overlapping subtitle
1231 // so let's fix the problem here, before overlapping code
1232 // get its hands on them
1233 unsigned delta
= sub
->end
- nextsub
->start
, half
= delta
/ 2;
1234 sub
->end
-= half
+ 1;
1235 nextsub
->start
+= delta
- half
;
1237 if (sub
->end
>= nextsub
->start
){
1238 sub
->end
= nextsub
->start
- 1;
1239 if (sub
->end
- sub
->start
> subfms
)
1240 sub
->end
= sub
->start
+ subfms
;
1247 * Movies are often converted from FILM (24 fps)
1248 * to PAL (25) by simply speeding it up, so we
1249 * to multiply the original timestmaps by
1250 * (Movie's FPS / Subtitle's (guessed) FPS)
1251 * so eg. for 23.98 fps movie and PAL time based
1252 * subtitles we say -subfps 25 and we're fine!
1255 /* timed sub fps correction ::atmos */
1256 /* the frame-based case is handled in mpcommon.c
1257 * where find_sub is called */
1258 if(sub_uses_time
&& sub_fps
) {
1259 sub
->start
*= sub_fps
/fps
;
1260 sub
->end
*= sub_fps
/fps
;
1266 if (n
) mp_msg(MSGT_SUBREADER
,MSGL_V
,"SUB: Adjusted %d subtitle(s).\n", n
);
1270 subtitle
* (*read
)(stream_t
*st
, subtitle
*dest
,
1271 struct readline_args
*args
);
1272 void (*post
)(subtitle
*dest
);
1277 const char* guess_buffer_cp(unsigned char* buffer
, int buflen
, const char *preferred_language
, const char *fallback
)
1279 const char **languages
;
1281 EncaAnalyser analyser
;
1282 EncaEncoding encoding
;
1283 const char *detected_sub_cp
= NULL
;
1286 languages
= enca_get_languages(&langcnt
);
1287 mp_msg(MSGT_SUBREADER
, MSGL_V
, "ENCA supported languages: ");
1288 for (i
= 0; i
< langcnt
; i
++) {
1289 mp_msg(MSGT_SUBREADER
, MSGL_V
, "%s ", languages
[i
]);
1291 mp_msg(MSGT_SUBREADER
, MSGL_V
, "\n");
1293 for (i
= 0; i
< langcnt
; i
++) {
1294 if (strcasecmp(languages
[i
], preferred_language
) != 0) continue;
1295 analyser
= enca_analyser_alloc(languages
[i
]);
1296 encoding
= enca_analyse_const(analyser
, buffer
, buflen
);
1297 enca_analyser_free(analyser
);
1298 if (encoding
.charset
!= ENCA_CS_UNKNOWN
) {
1299 detected_sub_cp
= enca_charset_name(encoding
.charset
, ENCA_NAME_STYLE_ICONV
);
1306 if (!detected_sub_cp
) {
1307 detected_sub_cp
= fallback
;
1308 mp_msg(MSGT_SUBREADER
, MSGL_INFO
, "ENCA detection failed: fallback to %s\n", fallback
);
1310 mp_msg(MSGT_SUBREADER
, MSGL_INFO
, "ENCA detected charset: %s\n", detected_sub_cp
);
1313 return detected_sub_cp
;
1316 #define MAX_GUESS_BUFFER_SIZE (256*1024)
1317 const char* guess_cp(stream_t
*st
, const char *preferred_language
, const char *fallback
)
1320 unsigned char *buffer
;
1321 const char *detected_sub_cp
= NULL
;
1323 buffer
= malloc(MAX_GUESS_BUFFER_SIZE
);
1324 buflen
= stream_read(st
,buffer
, MAX_GUESS_BUFFER_SIZE
);
1326 detected_sub_cp
= guess_buffer_cp(buffer
, buflen
, preferred_language
, fallback
);
1332 return detected_sub_cp
;
1334 #undef MAX_GUESS_BUFFER_SIZE
1337 sub_data
* sub_read_file(char *filename
, float fps
, struct MPOpts
*opts
)
1341 int n_max
, n_first
, i
, j
, sub_first
, sub_orig
;
1342 subtitle
*first
, *second
, *sub
, *return_sub
, *alloced_sub
= NULL
;
1343 sub_data
*subt_data
;
1344 int uses_time
= 0, sub_num
= 0, sub_errs
= 0;
1345 static const struct subreader sr
[]=
1347 { sub_read_line_microdvd
, NULL
, "microdvd" },
1348 { sub_read_line_subrip
, NULL
, "subrip" },
1349 { sub_read_line_subviewer
, NULL
, "subviewer" },
1350 { sub_read_line_sami
, NULL
, "sami" },
1351 { sub_read_line_vplayer
, NULL
, "vplayer" },
1352 { sub_read_line_rt
, NULL
, "rt" },
1353 { sub_read_line_ssa
, sub_pp_ssa
, "ssa" },
1354 { sub_read_line_pjs
, NULL
, "pjs" },
1355 { sub_read_line_mpsub
, NULL
, "mpsub" },
1356 { sub_read_line_aqt
, NULL
, "aqt" },
1357 { sub_read_line_subviewer2
, NULL
, "subviewer 2.0" },
1358 { sub_read_line_subrip09
, NULL
, "subrip 0.9" },
1359 { sub_read_line_jacosub
, NULL
, "jacosub" },
1360 { sub_read_line_mpl2
, NULL
, "mpl2" }
1362 const struct subreader
*srp
;
1364 if(filename
==NULL
) return NULL
; //qnx segfault
1365 fd
=open_stream (filename
, opts
, NULL
); if (!fd
) return NULL
;
1367 sub_format
= SUB_INVALID
;
1368 for (utf16
= 0; sub_format
== SUB_INVALID
&& utf16
< 3; utf16
++) {
1369 sub_format
=sub_autodetect (fd
, &uses_time
, utf16
);
1375 mpsub_multiplier
= (uses_time
? 100.0 : 1.0);
1376 if (sub_format
==SUB_INVALID
) {mp_msg(MSGT_SUBREADER
,MSGL_WARN
,"SUB: Could not determine file format\n");return NULL
;}
1378 mp_msg(MSGT_SUBREADER
, MSGL_V
, "SUB: Detected subtitle file format: %s\n", srp
->name
);
1384 if ((l
=strlen(filename
))>4){
1385 char *exts
[] = {".utf", ".utf8", ".utf-8" };
1387 if (l
>= strlen(exts
[k
]) && !strcasecmp(filename
+(l
- strlen(exts
[k
])), exts
[k
])){
1391 if (k
<0) subcp_open(fd
);
1396 first
=malloc(n_max
*sizeof(subtitle
));
1401 sub
= malloc(sizeof(subtitle
));
1402 //This is to deal with those formats (AQT & Subrip) which define the end of a subtitle
1403 //as the beginning of the following
1404 previous_sub_end
= 0;
1408 first
=realloc(first
,n_max
*sizeof(subtitle
));
1410 memset(sub
, '\0', sizeof(subtitle
));
1411 sub
=srp
->read(fd
, sub
, &(struct readline_args
){utf16
, opts
});
1412 if(!sub
) break; // EOF
1414 if (sub
!=ERR
) sub
=subcp_recode(sub
);
1425 // Apply any post processing that needs recoding first
1426 if ((sub
!=ERR
) && !sub_no_text_pp
&& srp
->post
) srp
->post(sub
);
1427 if(!sub_num
|| (first
[sub_num
- 1].start
<= sub
->start
)){
1428 first
[sub_num
].start
= sub
->start
;
1429 first
[sub_num
].end
= sub
->end
;
1430 first
[sub_num
].lines
= sub
->lines
;
1431 first
[sub_num
].alignment
= sub
->alignment
;
1432 for(i
= 0; i
< sub
->lines
; ++i
){
1433 first
[sub_num
].text
[i
] = sub
->text
[i
];
1435 if (previous_sub_end
){
1436 first
[sub_num
- 1].end
= previous_sub_end
;
1437 previous_sub_end
= 0;
1440 for(j
= sub_num
- 1; j
>= 0; --j
){
1441 first
[j
+ 1].start
= first
[j
].start
;
1442 first
[j
+ 1].end
= first
[j
].end
;
1443 first
[j
+ 1].lines
= first
[j
].lines
;
1444 first
[j
+ 1].alignment
= first
[j
].alignment
;
1445 for(i
= 0; i
< first
[j
].lines
; ++i
){
1446 first
[j
+ 1].text
[i
] = first
[j
].text
[i
];
1448 if(!j
|| (first
[j
- 1].start
<= sub
->start
)){
1449 first
[j
].start
= sub
->start
;
1450 first
[j
].end
= sub
->end
;
1451 first
[j
].lines
= sub
->lines
;
1452 first
[j
].alignment
= sub
->alignment
;
1453 for(i
= 0; i
< SUB_MAX_TEXT
; ++i
){
1454 first
[j
].text
[i
] = sub
->text
[i
];
1456 if (previous_sub_end
){
1457 first
[j
].end
= first
[j
- 1].end
;
1458 first
[j
- 1].end
= previous_sub_end
;
1459 previous_sub_end
= 0;
1465 if(sub
==ERR
) ++sub_errs
; else ++sub_num
; // Error vs. Valid
1475 // printf ("SUB: Subtitle format %s time.\n", uses_time?"uses":"doesn't use");
1476 mp_msg(MSGT_SUBREADER
, MSGL_V
,"SUB: Read %i subtitles, %i bad line(s).\n",
1484 // we do overlap if the user forced it (suboverlap_enable == 2) or
1485 // the user didn't forced no-overlapsub and the format is Jacosub or Ssa.
1486 // this is because usually overlapping subtitles are found in these formats,
1487 // while in others they are probably result of bad timing
1488 if ((suboverlap_enabled
== 2) ||
1489 ((suboverlap_enabled
) && ((sub_format
== SUB_JACOSUB
) || (sub_format
== SUB_SSA
)))) {
1490 adjust_subs_time(first
, 6.0, fps
, 0, sub_num
, uses_time
);/*~6 secs AST*/
1491 // here we manage overlapping subtitles
1496 // for each subtitle in first[] we deal with its 'block' of
1498 for (sub_first
= 0; sub_first
< n_first
; ++sub_first
) {
1499 unsigned long global_start
= first
[sub_first
].start
,
1500 global_end
= first
[sub_first
].end
, local_start
, local_end
;
1501 int lines_to_add
= first
[sub_first
].lines
, sub_to_add
= 0,
1502 **placeholder
= NULL
, higher_line
= 0, counter
, start_block_sub
= sub_num
;
1503 char real_block
= 1;
1505 // here we find the number of subtitles inside the 'block'
1506 // and its span interval. this works well only with sorted
1508 while ((sub_first
+ sub_to_add
+ 1 < n_first
) && (first
[sub_first
+ sub_to_add
+ 1].start
< global_end
)) {
1510 lines_to_add
+= first
[sub_first
+ sub_to_add
].lines
;
1511 if (first
[sub_first
+ sub_to_add
].start
< global_start
) {
1512 global_start
= first
[sub_first
+ sub_to_add
].start
;
1514 if (first
[sub_first
+ sub_to_add
].end
> global_end
) {
1515 global_end
= first
[sub_first
+ sub_to_add
].end
;
1519 /* Avoid n^2 memory use for the "placeholder" data structure
1520 * below with subtitles that have a huge number of
1521 * consecutive overlapping lines. */
1522 lines_to_add
= FFMIN(lines_to_add
, SUB_MAX_TEXT
);
1524 // we need a structure to keep trace of the screen lines
1525 // used by the subs, a 'placeholder'
1526 counter
= 2 * sub_to_add
+ 1; // the maximum number of subs derived
1527 // from a block of sub_to_add+1 subs
1528 placeholder
= malloc(sizeof(int *) * counter
);
1529 for (i
= 0; i
< counter
; ++i
) {
1530 placeholder
[i
] = malloc(sizeof(int) * lines_to_add
);
1531 for (j
= 0; j
< lines_to_add
; ++j
) {
1532 placeholder
[i
][j
] = -1;
1537 local_end
= global_start
- 1;
1541 // here we find the beginning and the end of a new
1542 // subtitle in the block
1543 local_start
= local_end
+ 1;
1544 local_end
= global_end
;
1545 for (j
= 0; j
<= sub_to_add
; ++j
) {
1546 if ((first
[sub_first
+ j
].start
- 1 > local_start
) && (first
[sub_first
+ j
].start
- 1 < local_end
)) {
1547 local_end
= first
[sub_first
+ j
].start
- 1;
1548 } else if ((first
[sub_first
+ j
].end
> local_start
) && (first
[sub_first
+ j
].end
< local_end
)) {
1549 local_end
= first
[sub_first
+ j
].end
;
1552 // here we allocate the screen lines to subs we must
1553 // display in current local_start-local_end interval.
1554 // if the subs were yet presents in the previous interval
1555 // they keep the same lines, otherside they get unused lines
1556 for (j
= 0; j
<= sub_to_add
; ++j
) {
1557 if ((first
[sub_first
+ j
].start
<= local_end
) && (first
[sub_first
+ j
].end
> local_start
)) {
1558 unsigned long sub_lines
= first
[sub_first
+ j
].lines
, fragment_length
= lines_to_add
+ 1,
1561 int fragment_position
= -1;
1563 // if this is not the first new sub of the block
1564 // we find if this sub was present in the previous
1567 for (i
= 0; i
< lines_to_add
; ++i
) {
1568 if (placeholder
[counter
- 1][i
] == sub_first
+ j
) {
1569 placeholder
[counter
][i
] = sub_first
+ j
;
1576 // we are looking for the shortest among all groups of
1577 // sequential blank lines whose length is greater than or
1578 // equal to sub_lines. we store in fragment_position the
1579 // position of the shortest group, in fragment_length its
1580 // length, and in tmp the length of the group currently
1582 for (i
= 0; i
< lines_to_add
; ++i
) {
1583 if (placeholder
[counter
][i
] == -1) {
1584 // placeholder[counter][i] is part of the current group
1588 if (tmp
== sub_lines
) {
1589 // current group's size fits exactly the one we
1590 // need, so we stop looking
1591 fragment_position
= i
- tmp
;
1595 if ((tmp
) && (tmp
> sub_lines
) && (tmp
< fragment_length
)) {
1596 // current group is the best we found till here,
1597 // but is still bigger than the one we are looking
1598 // for, so we keep on looking
1599 fragment_length
= tmp
;
1600 fragment_position
= i
- tmp
;
1603 // current group doesn't fit at all, so we forget it
1609 // last screen line is blank, a group ends with it
1610 if ((tmp
>= sub_lines
) && (tmp
< fragment_length
)) {
1611 fragment_position
= i
- tmp
;
1614 if (fragment_position
== -1) {
1615 // it was not possible to find free screen line(s) for a subtitle,
1616 // usually this means a bug in the code; however we do not overlap
1617 mp_msg(MSGT_SUBREADER
, MSGL_WARN
, "SUB: we could not find a suitable position for an overlapping subtitle\n");
1618 higher_line
= SUB_MAX_TEXT
+ 1;
1621 for (tmp
= 0; tmp
< sub_lines
; ++tmp
) {
1622 placeholder
[counter
][fragment_position
+ tmp
] = sub_first
+ j
;
1627 for (j
= higher_line
+ 1; j
< lines_to_add
; ++j
) {
1628 if (placeholder
[counter
][j
] != -1)
1633 if (higher_line
>= SUB_MAX_TEXT
) {
1634 // the 'block' has too much lines, so we don't overlap the
1636 second
= realloc(second
, (sub_num
+ sub_to_add
+ 1) * sizeof(subtitle
));
1637 for (j
= 0; j
<= sub_to_add
; ++j
) {
1639 memset(&second
[sub_num
+ j
], '\0', sizeof(subtitle
));
1640 second
[sub_num
+ j
].start
= first
[sub_first
+ j
].start
;
1641 second
[sub_num
+ j
].end
= first
[sub_first
+ j
].end
;
1642 second
[sub_num
+ j
].lines
= first
[sub_first
+ j
].lines
;
1643 second
[sub_num
+ j
].alignment
= first
[sub_first
+ j
].alignment
;
1644 for (ls
= 0; ls
< second
[sub_num
+ j
].lines
; ls
++) {
1645 second
[sub_num
+ j
].text
[ls
] = strdup(first
[sub_first
+ j
].text
[ls
]);
1648 sub_num
+= sub_to_add
+ 1;
1649 sub_first
+= sub_to_add
;
1654 // we read the placeholder structure and create the new
1656 second
= realloc(second
, (sub_num
+ 1) * sizeof(subtitle
));
1657 memset(&second
[sub_num
], '\0', sizeof(subtitle
));
1658 second
[sub_num
].start
= local_start
;
1659 second
[sub_num
].end
= local_end
;
1660 second
[sub_num
].alignment
= first
[sub_first
].alignment
;
1661 n_max
= (lines_to_add
< SUB_MAX_TEXT
) ? lines_to_add
: SUB_MAX_TEXT
;
1662 for (i
= 0, j
= 0; j
< n_max
; ++j
) {
1663 if (placeholder
[counter
][j
] != -1) {
1664 int lines
= first
[placeholder
[counter
][j
]].lines
;
1665 for (ls
= 0; ls
< lines
; ++ls
) {
1666 second
[sub_num
].text
[i
++] = strdup(first
[placeholder
[counter
][j
]].text
[ls
]);
1670 second
[sub_num
].text
[i
++] = strdup(" ");
1675 } while (local_end
< global_end
);
1677 for (i
= 0; i
< counter
; ++i
)
1678 second
[start_block_sub
+ i
].lines
= higher_line
+ 1;
1680 counter
= 2 * sub_to_add
+ 1;
1681 for (i
= 0; i
< counter
; ++i
) {
1682 free(placeholder
[i
]);
1685 sub_first
+= sub_to_add
;
1688 for (j
= sub_orig
- 1; j
>= 0; --j
) {
1689 for (i
= first
[j
].lines
- 1; i
>= 0; --i
) {
1690 free(first
[j
].text
[i
]);
1695 return_sub
= second
;
1696 } else { //if(suboverlap_enabled)
1697 adjust_subs_time(first
, 6.0, fps
, 1, sub_num
, uses_time
);/*~6 secs AST*/
1700 if (return_sub
== NULL
) return NULL
;
1701 subt_data
= malloc(sizeof(sub_data
));
1702 subt_data
->filename
= strdup(filename
);
1703 subt_data
->sub_uses_time
= uses_time
;
1704 subt_data
->sub_num
= sub_num
;
1705 subt_data
->sub_errs
= sub_errs
;
1706 subt_data
->subtitles
= return_sub
;
1710 void list_sub_file(sub_data
* subd
){
1712 subtitle
*subs
= subd
->subtitles
;
1714 for(j
=0; j
< subd
->sub_num
; j
++){
1715 subtitle
* egysub
=&subs
[j
];
1716 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"%i line%c (%li-%li)\n",
1718 (1==egysub
->lines
)?' ':'s',
1721 for (i
=0; i
<egysub
->lines
; i
++) {
1722 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"\t\t%d: %s%s", i
,egysub
->text
[i
], i
==egysub
->lines
-1?"":" \n ");
1724 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"\n");
1727 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"Subtitle format %s time.\n",
1728 subd
->sub_uses_time
? "uses":"doesn't use");
1729 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"Read %i subtitles, %i errors.\n", subd
->sub_num
, subd
->sub_errs
);
1732 void dump_srt(sub_data
* subd
, float fps
){
1738 subtitle
*subs
= subd
->subtitles
;
1740 if (!subd
->sub_uses_time
&& sub_fps
== 0)
1742 fd
=fopen("dumpsub.srt","w");
1745 perror("dump_srt: fopen");
1748 for(i
=0; i
< subd
->sub_num
; i
++)
1750 onesub
=subs
+i
; //=&subs[i];
1751 fprintf(fd
,"%d\n",i
+1);//line number
1754 if (!subd
->sub_uses_time
)
1755 temp
= temp
* 100 / sub_fps
;
1756 temp
-= sub_delay
* 100;
1757 h
=temp
/360000;temp
%=360000; //h =1*100*60*60
1758 m
=temp
/6000; temp
%=6000; //m =1*100*60
1759 s
=temp
/100; temp
%=100; //s =1*100
1760 ms
=temp
*10; //ms=1*10
1761 fprintf(fd
,"%02d:%02d:%02d,%03d --> ",h
,m
,s
,ms
);
1764 if (!subd
->sub_uses_time
)
1765 temp
= temp
* 100 / sub_fps
;
1766 temp
-= sub_delay
* 100;
1767 h
=temp
/360000;temp
%=360000;
1768 m
=temp
/6000; temp
%=6000;
1769 s
=temp
/100; temp
%=100;
1771 fprintf(fd
,"%02d:%02d:%02d,%03d\n",h
,m
,s
,ms
);
1773 for(j
=0;j
<onesub
->lines
;j
++)
1774 fprintf(fd
,"%s\n",onesub
->text
[j
]);
1779 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dumpsub.srt\'.\n");
1782 void dump_mpsub(sub_data
* subd
, float fps
){
1786 subtitle
*subs
= subd
->subtitles
;
1788 mpsub_position
= subd
->sub_uses_time
? (sub_delay
*100) : (sub_delay
*fps
);
1789 if (sub_fps
==0) sub_fps
=fps
;
1791 fd
=fopen ("dump.mpsub", "w");
1793 perror ("dump_mpsub: fopen");
1798 if (subd
->sub_uses_time
) fprintf (fd
,"FORMAT=TIME\n\n");
1799 else fprintf (fd
, "FORMAT=%5.2f\n\n", fps
);
1801 for(j
=0; j
< subd
->sub_num
; j
++){
1802 subtitle
* egysub
=&subs
[j
];
1803 if (subd
->sub_uses_time
) {
1804 a
=((egysub
->start
-mpsub_position
)/100.0);
1805 b
=((egysub
->end
-egysub
->start
)/100.0);
1806 if ( (float)((int)a
) == a
)
1807 fprintf (fd
, "%.0f",a
);
1809 fprintf (fd
, "%.2f",a
);
1811 if ( (float)((int)b
) == b
)
1812 fprintf (fd
, " %.0f\n",b
);
1814 fprintf (fd
, " %.2f\n",b
);
1816 fprintf (fd
, "%ld %ld\n", (long)((egysub
->start
*(fps
/sub_fps
))-((mpsub_position
*(fps
/sub_fps
)))),
1817 (long)(((egysub
->end
)-(egysub
->start
))*(fps
/sub_fps
)));
1820 mpsub_position
= egysub
->end
;
1821 for (i
=0; i
<egysub
->lines
; i
++) {
1822 fprintf (fd
, "%s\n",egysub
->text
[i
]);
1827 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dump.mpsub\'.\n");
1830 void dump_microdvd(sub_data
* subd
, float fps
) {
1833 subtitle
*subs
= subd
->subtitles
;
1836 fd
= fopen("dumpsub.sub", "w");
1838 perror("dumpsub.sub: fopen");
1841 delay
= sub_delay
* sub_fps
;
1842 for (i
= 0; i
< subd
->sub_num
; ++i
) {
1844 start
= subs
[i
].start
;
1846 if (subd
->sub_uses_time
) {
1847 start
= start
* sub_fps
/ 100 ;
1848 end
= end
* sub_fps
/ 100;
1851 start
= start
* sub_fps
/ fps
;
1852 end
= end
* sub_fps
/ fps
;
1856 fprintf(fd
, "{%d}{%d}", start
, end
);
1857 for (j
= 0; j
< subs
[i
].lines
; ++j
)
1858 fprintf(fd
, "%s%s", j
? "|" : "", subs
[i
].text
[j
]);
1862 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dumpsub.sub\'.\n");
1865 void dump_jacosub(sub_data
* subd
, float fps
) {
1871 subtitle
*subs
= subd
->subtitles
;
1873 if (!subd
->sub_uses_time
&& sub_fps
== 0)
1875 fd
=fopen("dumpsub.jss","w");
1878 perror("dump_jacosub: fopen");
1881 fprintf(fd
, "#TIMERES %d\n", (subd
->sub_uses_time
) ? 100 : (int)sub_fps
);
1882 for(i
=0; i
< subd
->sub_num
; i
++)
1884 onesub
=subs
+i
; //=&subs[i];
1887 if (!subd
->sub_uses_time
)
1888 temp
= temp
* 100 / sub_fps
;
1889 temp
-= sub_delay
* 100;
1890 h
=temp
/360000;temp
%=360000; //h =1*100*60*60
1891 m
=temp
/6000; temp
%=6000; //m =1*100*60
1892 s
=temp
/100; temp
%=100; //s =1*100
1894 fprintf(fd
,"%02d:%02d:%02d.%02d ",h
,m
,s
,cs
);
1897 if (!subd
->sub_uses_time
)
1898 temp
= temp
* 100 / sub_fps
;
1899 temp
-= sub_delay
* 100;
1900 h
=temp
/360000;temp
%=360000;
1901 m
=temp
/6000; temp
%=6000;
1902 s
=temp
/100; temp
%=100;
1904 fprintf(fd
,"%02d:%02d:%02d.%02d {~} ",h
,m
,s
,cs
);
1906 for(j
=0;j
<onesub
->lines
;j
++)
1907 fprintf(fd
,"%s%s",j
? "\\n" : "", onesub
->text
[j
]);
1912 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dumpsub.js\'.\n");
1915 void dump_sami(sub_data
* subd
, float fps
) {
1920 subtitle
*subs
= subd
->subtitles
;
1922 if (!subd
->sub_uses_time
&& sub_fps
== 0)
1924 fd
=fopen("dumpsub.smi","w");
1927 perror("dump_jacosub: fopen");
1930 fprintf(fd
, "<SAMI>\n"
1932 " <STYLE TYPE=\"Text/css\">\n"
1934 " P {margin-left: 29pt; margin-right: 29pt; font-size: 24pt; text-align: center; font-family: Tahoma; font-weight: bold; color: #FCDD03; background-color: #000000;}\n"
1935 " .SUBTTL {Name: 'Subtitles'; Lang: en-US; SAMIType: CC;}\n"
1940 for(i
=0; i
< subd
->sub_num
; i
++)
1942 onesub
=subs
+i
; //=&subs[i];
1945 if (!subd
->sub_uses_time
)
1946 temp
= temp
* 100 / sub_fps
;
1947 temp
-= sub_delay
* 100;
1948 fprintf(fd
,"\t<SYNC Start=%lu>\n"
1949 "\t <P>", temp
* 10);
1951 for(j
=0;j
<onesub
->lines
;j
++)
1952 fprintf(fd
,"%s%s",j
? "<br>" : "", onesub
->text
[j
]);
1957 if (!subd
->sub_uses_time
)
1958 temp
= temp
* 100 / sub_fps
;
1959 temp
-= sub_delay
* 100;
1960 fprintf(fd
,"\t<SYNC Start=%lu>\n"
1961 "\t <P> \n", temp
* 10);
1963 fprintf(fd
, "</BODY>\n"
1966 mp_msg(MSGT_SUBREADER
,MSGL_INFO
,"SUB: Subtitles dumped in \'dumpsub.smi\'.\n");
1969 void sub_free( sub_data
* subd
)
1973 if ( !subd
) return;
1975 for (i
= 0; i
< subd
->sub_num
; i
++)
1976 for (j
= 0; j
< subd
->subtitles
[i
].lines
; j
++)
1977 free( subd
->subtitles
[i
].text
[j
] );
1978 free( subd
->subtitles
);
1979 free( subd
->filename
);
1983 #define MAX_SUBLINE 512
1985 * \brief parse text and append it to subtitle in sub
1986 * \param sub subtitle struct to add text to
1987 * \param txt text to parse
1988 * \param len length of text in txt
1989 * \param endpts pts at which this subtitle text should be removed again
1991 * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r'
1992 * and '\0' are interpreted as newlines, duplicate, leading and trailing
1993 * newlines are ignored.
1995 void sub_add_text(subtitle
*sub
, const char *txt
, int len
, double endpts
) {
1997 int double_newline
= 1; // ignore newlines at the beginning
2000 if (sub
->lines
>= SUB_MAX_TEXT
) return;
2002 buf
= malloc(MAX_SUBLINE
+ 1);
2003 sub
->text
[sub
->lines
] = buf
;
2004 sub
->endpts
[sub
->lines
] = endpts
;
2005 for (i
= 0; i
< len
&& pos
< MAX_SUBLINE
; i
++) {
2007 if (c
== '<') comment
|= 1;
2008 if (c
== '{') comment
|= 2;
2010 if (c
== '}') comment
&= ~2;
2011 if (c
== '>') comment
&= ~1;
2014 if (pos
== MAX_SUBLINE
- 1) {
2018 if (c
== '\\' && i
+ 1 < len
) {
2020 if (c
== 'n' || c
== 'N') c
= 0;
2022 if (c
== '\n' || c
== '\r') c
= 0;
2026 } else if (!double_newline
) {
2027 if (sub
->lines
>= SUB_MAX_TEXT
- 1) {
2028 mp_msg(MSGT_VO
, MSGL_WARN
, "Too many subtitle lines\n");
2035 buf
= malloc(MAX_SUBLINE
+ 1);
2036 sub
->text
[sub
->lines
] = buf
;
2037 sub
->endpts
[sub
->lines
] = endpts
;
2041 if (sub
->lines
< SUB_MAX_TEXT
&&
2042 strlen(sub
->text
[sub
->lines
]))
2047 * \brief remove outdated subtitle lines.
2048 * \param sub subtitle struct to modify
2049 * \param pts current pts. All lines with endpts <= this will be removed.
2050 * Use MP_NOPTS_VALUE to remove all lines
2051 * \return 1 if sub was modified, 0 otherwise.
2053 int sub_clear_text(subtitle
*sub
, double pts
) {
2056 while (i
< sub
->lines
) {
2057 double endpts
= sub
->endpts
[i
];
2058 if (pts
== MP_NOPTS_VALUE
|| (endpts
!= MP_NOPTS_VALUE
&& pts
>= endpts
)) {
2061 for (j
= i
+ 1; j
< sub
->lines
; j
++) {
2062 sub
->text
[j
- 1] = sub
->text
[j
];
2063 sub
->endpts
[j
- 1] = sub
->endpts
[j
];