sub/subreader.c

   1 /*
   2  * Subtitle reader with format autodetection
   3  *
   4  * Copyright (c) 2001 laaz
   5  * Some code cleanup & realloc() by A'rpi/ESP-team
   6  *
   7  * This file is part of MPlayer.
   8  *
   9  * MPlayer is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * MPlayer is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License along
  20  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
  21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  22  */
  23
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <ctype.h>
  28
  29 #include <sys/types.h>
  30 #include <dirent.h>
  31
  32 #include "config.h"
  33 #include "mp_msg.h"
  34 #include "subreader.h"
  35 #include "mpcommon.h"
  36 #include "subassconvert.h"
  37 #include "options.h"
  38 #include "stream/stream.h"
  39 #include "libavutil/common.h"
  40 #include "libavutil/avstring.h"
  41
  42 #ifdef CONFIG_ENCA
  43 #include <enca.h>
  44 #endif
  45
  46 #define ERR ((void *) -1)
  47
  48 #ifdef CONFIG_ICONV
  49 #include <iconv.h>
  50 char *sub_cp=NULL;
  51 #endif
  52 #ifdef CONFIG_FRIBIDI
  53 #include <fribidi/fribidi.h>
  54 char *fribidi_charset = NULL;   ///character set that will be passed to FriBiDi
  55 int flip_hebrew = 1;            ///flip subtitles using fribidi
  56 int fribidi_flip_commas = 0;    ///flip comma when fribidi is used
  57 #endif
  58
  59 // Parameter struct for the format-specific readline functions
  60 struct readline_args {
  61     int utf16;
  62     struct MPOpts *opts;
  63 };
  64
  65 /* Maximal length of line of a subtitle */
  66 #define LINE_LEN 1000
  67 static float mpsub_position=0;
  68 static float mpsub_multiplier=1.;
  69 static int sub_slacktime = 20000; //20 sec
  70
  71 int sub_no_text_pp=0;   // 1 => do not apply text post-processing
  72                         // like {\...} elimination in SSA format.
  73
  74 int sub_match_fuzziness=0; // level of sub name matching fuzziness
  75
  76 /* Use the SUB_* constant defined in the header file */
  77 int sub_format=SUB_INVALID;
  78 #ifdef CONFIG_SORTSUB
  79 /*
  80    Some subtitling formats, namely AQT and Subrip09, define the end of a
  81    subtitle as the beginning of the following. Since currently we read one
  82    subtitle at time, for these format we keep two global *subtitle,
  83    previous_aqt_sub and previous_subrip09_sub, pointing to previous subtitle,
  84    so we can change its end when we read current subtitle starting time.
  85    When CONFIG_SORTSUB is defined, we use a single global unsigned long,
  86    previous_sub_end, for both (and even future) formats, to store the end of
  87    the previous sub: it is initialized to 0 in sub_read_file and eventually
  88    modified by sub_read_aqt_line or sub_read_subrip09_line.
  89  */
  90 unsigned long previous_sub_end;
  91 #endif
  92
  93 static int eol(char p) {
  94         return p=='\r' || p=='\n' || p=='\0';
  95 }
  96
  97 /* Remove leading and trailing space */
  98 static void trail_space(char *s) {
  99         int i = 0;
 100         while (isspace(s[i])) ++i;
 101         if (i) strcpy(s, s + i);
 102         i = strlen(s) - 1;
 103         while (i > 0 && isspace(s[i])) s[i--] = '\0';
 104 }
 105
 106 static char *stristr(const char *haystack, const char *needle) {
 107     int len = 0;
 108     const char *p = haystack;
 109
 110     if (!(haystack && needle)) return NULL;
 111
 112     len=strlen(needle);
 113     while (*p != '\0') {
 114         if (strncasecmp(p, needle, len) == 0) return (char*)p;
 115         p++;
 116     }
 117
 118     return NULL;
 119 }
 120
 121 static void sami_add_line(subtitle *current, char *buffer, char **pos) {
 122     char *p = *pos;
 123     *p = 0;
 124     trail_space(buffer);
 125     if (*buffer && current->lines < SUB_MAX_TEXT)
 126         current->text[current->lines++] = strdup(buffer);
 127     *pos = buffer;
 128 }
 129
 130 static subtitle *sub_read_line_sami(stream_t* st, subtitle *current,
 131                                     struct readline_args *args)
 132 {
 133     int utf16 = args->utf16;
 134     static char line[LINE_LEN+1];
 135     static char *s = NULL, *slacktime_s;
 136     char text[LINE_LEN+1], *p=NULL, *q;
 137     int state;
 138
 139     current->lines = current->start = current->end = 0;
 140     current->alignment = SUB_ALIGNMENT_BOTTOMCENTER;
 141     state = 0;
 142
 143     /* read the first line */
 144     if (!s)
 145             if (!(s = stream_read_line(st, line, LINE_LEN, utf16))) return 0;
 146
 147     do {
 148         switch (state) {
 149
 150         case 0: /* find "START=" or "Slacktime:" */
 151             slacktime_s = stristr (s, "Slacktime:");
 152             if (slacktime_s)
 153                 sub_slacktime = strtol (slacktime_s+10, NULL, 0) / 10;
 154
 155             s = stristr (s, "Start=");
 156             if (s) {
 157                 current->start = strtol (s + 6, &s, 0) / 10;
 158                 /* eat '>' */
 159                 for (; *s != '>' && *s != '\0'; s++);
 160                 s++;
 161                 state = 1; continue;
 162             }
 163             break;
 164
 165         case 1: /* find (optional) "<P", skip other TAGs */
 166             for  (; *s == ' ' || *s == '\t'; s++); /* strip blanks, if any */
 167             if (*s == '\0') break;
 168             if (*s != '<') { state = 3; p = text; continue; } /* not a TAG */
 169             s++;
 170             if (*s == 'P' || *s == 'p') { s++; state = 2; continue; } /* found '<P' */
 171             for (; *s != '>' && *s != '\0'; s++); /* skip remains of non-<P> TAG */
 172             if (s == '\0')
 173               break;
 174             s++;
 175             continue;
 176
 177         case 2: /* find ">" */
 178             if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; }
 179             break;
 180
 181         case 3: /* get all text until '<' appears */
 182             if (*s == '\0') break;
 183             else if (!strncasecmp (s, "<br>", 4)) {
 184                 sami_add_line(current, text, &p);
 185                 s += 4;
 186             }
 187             else if ((*s == '{') && !sub_no_text_pp) { state = 5; ++s; continue; }
 188             else if (*s == '<') { state = 4; }
 189             else if (!strncasecmp (s, "&nbsp;", 6)) { *p++ = ' '; s += 6; }
 190             else if (*s == '\t') { *p++ = ' '; s++; }
 191             else if (*s == '\r' || *s == '\n') { s++; }
 192             else *p++ = *s++;
 193
 194             /* skip duplicated space */
 195             if (p > text + 2) if (*(p-1) == ' ' && *(p-2) == ' ') p--;
 196
 197             continue;
 198
 199         case 4: /* get current->end or skip <TAG> */
 200             q = stristr (s, "Start=");
 201             if (q) {
 202                 current->end = strtol (q + 6, &q, 0) / 10 - 1;
 203                 *p = '\0'; trail_space (text);
 204                 if (text[0] != '\0')
 205                     current->text[current->lines++] = strdup (text);
 206                 if (current->lines > 0) { state = 99; break; }
 207                 state = 0; continue;
 208             }
 209             s = strchr (s, '>');
 210             if (s) { s++; state = 3; continue; }
 211             break;
 212        case 5: /* get rid of {...} text, but read the alignment code */
 213             if ((*s == '\\') && (*(s + 1) == 'a') && !sub_no_text_pp) {
 214                if (stristr(s, "\\a1") != NULL) {
 215                    current->alignment = SUB_ALIGNMENT_BOTTOMLEFT;
 216                    s = s + 3;
 217                }
 218                if (stristr(s, "\\a2") != NULL) {
 219                    current->alignment = SUB_ALIGNMENT_BOTTOMCENTER;
 220                    s = s + 3;
 221                } else if (stristr(s, "\\a3") != NULL) {
 222                    current->alignment = SUB_ALIGNMENT_BOTTOMRIGHT;
 223                    s = s + 3;
 224                } else if ((stristr(s, "\\a4") != NULL) || (stristr(s, "\\a5") != NULL) || (stristr(s, "\\a8") != NULL)) {
 225                    current->alignment = SUB_ALIGNMENT_TOPLEFT;
 226                    s = s + 3;
 227                } else if (stristr(s, "\\a6") != NULL) {
 228                    current->alignment = SUB_ALIGNMENT_TOPCENTER;
 229                    s = s + 3;
 230                } else if (stristr(s, "\\a7") != NULL) {
 231                    current->alignment = SUB_ALIGNMENT_TOPRIGHT;
 232                    s = s + 3;
 233                } else if (stristr(s, "\\a9") != NULL) {
 234                    current->alignment = SUB_ALIGNMENT_MIDDLELEFT;
 235                    s = s + 3;
 236                } else if (stristr(s, "\\a10") != NULL) {
 237                    current->alignment = SUB_ALIGNMENT_MIDDLECENTER;
 238                    s = s + 4;
 239                } else if (stristr(s, "\\a11") != NULL) {
 240                    current->alignment = SUB_ALIGNMENT_MIDDLERIGHT;
 241                    s = s + 4;
 242                }
 243             }
 244             if (*s == '}') state = 3;
 245             ++s;
 246             continue;
 247         }
 248
 249         /* read next line */
 250         if (state != 99 && !(s = stream_read_line (st, line, LINE_LEN, utf16))) {
 251             if (current->start > 0) {
 252                 break; // if it is the last subtitle
 253             } else {
 254                 return 0;
 255             }
 256         }
 257
 258     } while (state != 99);
 259
 260     // For the last subtitle
 261     if (current->end <= 0) {
 262         current->end = current->start + sub_slacktime;
 263         sami_add_line(current, text, &p);
 264     }
 265
 266     return current;
 267 }
 268
 269
 270 static char *sub_readtext(char *source, char **dest) {
 271     int len=0;
 272     char *p=source;
 273
 274 //    printf("src=%p  dest=%p  \n",source,dest);
 275
 276     while ( !eol(*p) && *p!= '|' ) {
 277         p++,len++;
 278     }
 279
 280     *dest= malloc (len+1);
 281     if (!dest) {return ERR;}
 282
 283     strncpy(*dest, source, len);
 284     (*dest)[len]=0;
 285
 286     while (*p=='\r' || *p=='\n' || *p=='|') p++;
 287
 288     if (*p) return p;  // not-last text field
 289     else return NULL;  // last text field
 290 }
 291
 292 static subtitle *sub_read_line_microdvd(stream_t *st,subtitle *current,
 293                                         struct readline_args *args)
 294 {
 295     int utf16 = args->utf16;
 296     char line[LINE_LEN+1];
 297     char line2[LINE_LEN+1];
 298     char *p, *next;
 299     int i;
 300
 301     do {
 302         if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 303     } while ((sscanf (line,
 304                       "{%ld}{}%[^\r\n]",
 305                       &(current->start), line2) < 2) &&
 306              (sscanf (line,
 307                       "{%ld}{%ld}%[^\r\n]",
 308                       &(current->start), &(current->end), line2) < 3));
 309
 310     if (args->opts->ass_enabled) {
 311         subassconvert_microdvd(line2, line, LINE_LEN + 1);
 312         p = line;
 313     } else
 314         p = line2;
 315
 316     next=p, i=0;
 317     while ((next =sub_readtext (next, &(current->text[i])))) {
 318         if (current->text[i]==ERR) {return ERR;}
 319         i++;
 320         if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
 321     }
 322     current->lines= ++i;
 323
 324     return current;
 325 }
 326
 327 static subtitle *sub_read_line_mpl2(stream_t *st,subtitle *current,
 328                                     struct readline_args *args)
 329 {
 330     int utf16 = args->utf16;
 331     char line[LINE_LEN+1];
 332     char line2[LINE_LEN+1];
 333     char *p, *next;
 334     int i;
 335
 336     do {
 337         if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 338     } while ((sscanf (line,
 339                       "[%ld][%ld]%[^\r\n]",
 340                       &(current->start), &(current->end), line2) < 3));
 341     current->start *= 10;
 342     current->end *= 10;
 343     p=line2;
 344
 345     next=p, i=0;
 346     while ((next =sub_readtext (next, &(current->text[i])))) {
 347         if (current->text[i]==ERR) {return ERR;}
 348         i++;
 349         if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
 350     }
 351     current->lines= ++i;
 352
 353     return current;
 354 }
 355
 356 static subtitle *sub_read_line_subrip(stream_t* st, subtitle *current,
 357                                     struct readline_args *args)
 358 {
 359     int utf16 = args->utf16;
 360     char line[LINE_LEN+1];
 361     int a1,a2,a3,a4,b1,b2,b3,b4;
 362     char *p=NULL, *q=NULL;
 363     int len;
 364
 365     while (1) {
 366         if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 367         if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) continue;
 368         current->start = a1*360000+a2*6000+a3*100+a4;
 369         current->end   = b1*360000+b2*6000+b3*100+b4;
 370
 371         if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 372
 373         p=q=line;
 374         for (current->lines=1; current->lines < SUB_MAX_TEXT; current->lines++) {
 375             for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' && strncmp(p,"[br]",4); p++,len++);
 376             current->text[current->lines-1]=malloc (len+1);
 377             if (!current->text[current->lines-1]) return ERR;
 378             strncpy (current->text[current->lines-1], q, len);
 379             current->text[current->lines-1][len]='\0';
 380             if (!*p || *p=='\r' || *p=='\n') break;
 381             if (*p=='|') p++;
 382             else while (*p++!=']');
 383         }
 384         break;
 385     }
 386     return current;
 387 }
 388
 389 static subtitle *sub_ass_read_line_subviewer(stream_t *st, subtitle *current,
 390                                              struct readline_args *args)
 391 {
 392     int utf16 = args->utf16;
 393     int a1, a2, a3, a4, b1, b2, b3, b4, j = 0;
 394
 395     while (!current->text[0]) {
 396         char line[LINE_LEN + 1], full_line[LINE_LEN + 1], sep;
 397         int i;
 398
 399         /* Parse SubRip header */
 400         if (!stream_read_line(st, line, LINE_LEN, utf16))
 401             return NULL;
 402         if (sscanf(line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",
 403                      &a1, &a2, &a3, &sep, &a4, &b1, &b2, &b3, &sep, &b4) < 10)
 404             continue;
 405
 406         current->start = a1 * 360000 + a2 * 6000 + a3 * 100 + a4 / 10;
 407         current->end   = b1 * 360000 + b2 * 6000 + b3 * 100 + b4 / 10;
 408
 409         /* Concat lines */
 410         full_line[0] = 0;
 411         for (i = 0; i < SUB_MAX_TEXT; i++) {
 412             int blank = 1, len = 0;
 413             char *p;
 414
 415             if (!stream_read_line(st, line, LINE_LEN, utf16))
 416                 break;
 417
 418             for (p = line; *p != '\n' && *p != '\r' && *p; p++, len++)
 419                 if (*p != ' ' && *p != '\t')
 420                     blank = 0;
 421
 422             if (blank)
 423                 break;
 424
 425             *p = 0;
 426
 427             if (!(j + 1 + len < sizeof(full_line) - 1))
 428                 break;
 429
 430             if (j != 0)
 431                 full_line[j++] = '\n';
 432             strcpy(&full_line[j], line);
 433             j += len;
 434         }
 435
 436         /* Use the ASS/SSA converter to transform the whole lines */
 437         if (full_line[0]) {
 438             char converted_line[LINE_LEN + 1];
 439             subassconvert_subrip(full_line, converted_line, LINE_LEN + 1);
 440             current->text[0] = strdup(converted_line);
 441             current->lines = 1;
 442         }
 443     }
 444     return current;
 445 }
 446
 447 static subtitle *sub_read_line_subviewer(stream_t *st,subtitle *current,
 448                                          struct readline_args *args)
 449 {
 450     int utf16 = args->utf16;
 451     char line[LINE_LEN+1];
 452     int a1,a2,a3,a4,b1,b2,b3,b4;
 453     char *p=NULL;
 454     int i,len;
 455
 456     if (args->opts->ass_enabled)
 457         return sub_ass_read_line_subviewer(st, current, args);
 458     while (!current->text[0]) {
 459         if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 460         if ((len=sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d",&a1,&a2,&a3,(char *)&i,&a4,&b1,&b2,&b3,(char *)&i,&b4)) < 10)
 461             continue;
 462         current->start = a1*360000+a2*6000+a3*100+a4/10;
 463         current->end   = b1*360000+b2*6000+b3*100+b4/10;
 464         for (i=0; i<SUB_MAX_TEXT;) {
 465             int blank = 1;
 466             if (!stream_read_line (st, line, LINE_LEN, utf16)) break;
 467             len=0;
 468             for (p=line; *p!='\n' && *p!='\r' && *p; p++,len++)
 469                 if (*p != ' ' && *p != '\t')
 470                     blank = 0;
 471             if (len && !blank) {
 472                 int j=0,skip=0;
 473                 char *curptr=current->text[i]=malloc (len+1);
 474                 if (!current->text[i]) return ERR;
 475                 //strncpy (current->text[i], line, len); current->text[i][len]='\0';
 476                 for(; j<len; j++) {
 477                     /* let's filter html tags ::atmos */
 478                     if(line[j]=='>') {
 479                         skip=0;
 480                         continue;
 481                     }
 482                     if(line[j]=='<') {
 483                         skip=1;
 484                         continue;
 485                     }
 486                     if(skip) {
 487                         continue;
 488                     }
 489                     *curptr=line[j];
 490                     curptr++;
 491                 }
 492                 *curptr='\0';
 493
 494                 i++;
 495             } else {
 496                 break;
 497             }
 498         }
 499         current->lines=i;
 500     }
 501     return current;
 502 }
 503
 504 static subtitle *sub_read_line_subviewer2(stream_t *st,subtitle *current,
 505                                           struct readline_args *args)
 506 {
 507     int utf16 = args->utf16;
 508     char line[LINE_LEN+1];
 509     int a1,a2,a3,a4;
 510     char *p=NULL;
 511     int i,len;
 512
 513     while (!current->text[0]) {
 514         if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 515         if (line[0]!='{')
 516             continue;
 517         if ((len=sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4)) < 4)
 518             continue;
 519         current->start = a1*360000+a2*6000+a3*100+a4/10;
 520         for (i=0; i<SUB_MAX_TEXT;) {
 521             if (!stream_read_line (st, line, LINE_LEN, utf16)) break;
 522             if (line[0]=='}') break;
 523             len=0;
 524             for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
 525             if (len) {
 526                 current->text[i]=malloc (len+1);
 527                 if (!current->text[i]) return ERR;
 528                 strncpy (current->text[i], line, len); current->text[i][len]='\0';
 529                 ++i;
 530             } else {
 531                 break;
 532             }
 533         }
 534         current->lines=i;
 535     }
 536     return current;
 537 }
 538
 539
 540 static subtitle *sub_read_line_vplayer(stream_t *st,subtitle *current,
 541                                        struct readline_args *args)
 542 {
 543         int utf16 = args->utf16;
 544         char line[LINE_LEN+1];
 545         int a1,a2,a3;
 546         char *p=NULL, *next,separator;
 547         int i,len,plen;
 548
 549         while (!current->text[0]) {
 550                 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 551                 if ((len=sscanf (line, "%d:%d:%d%c%n",&a1,&a2,&a3,&separator,&plen)) < 4)
 552                         continue;
 553
 554                 if (!(current->start = a1*360000+a2*6000+a3*100))
 555                         continue;
 556                 /* removed by wodzu
 557                 p=line;
 558                 // finds the body of the subtitle
 559                 for (i=0; i<3; i++){
 560                    p=strchr(p,':');
 561                    if (p==NULL) break;
 562                    ++p;
 563                 }
 564                 if (p==NULL) {
 565                     printf("SUB: Skipping incorrect subtitle line!\n");
 566                     continue;
 567                 }
 568                 */
 569                 // by wodzu: hey! this time we know what length it has! what is
 570                 // that magic for? it can't deal with space instead of third
 571                 // colon! look, what simple it can be:
 572                 p = &line[ plen ];
 573
 574                 i=0;
 575                 if (*p!='|') {
 576                         //
 577                         next = p,i=0;
 578                         while ((next =sub_readtext (next, &(current->text[i])))) {
 579                                 if (current->text[i]==ERR) {return ERR;}
 580                                 i++;
 581                                 if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
 582                         }
 583                         current->lines=i+1;
 584                 }
 585         }
 586         return current;
 587 }
 588
 589 static subtitle *sub_read_line_rt(stream_t *st,subtitle *current,
 590                                     struct readline_args *args)
 591 {
 592     int utf16 = args->utf16;
 593
 594         //TODO: This format uses quite rich (sub/super)set of xhtml
 595         // I couldn't check it since DTD is not included.
 596         // WARNING: full XML parses can be required for proper parsing
 597     char line[LINE_LEN+1];
 598     int a1,a2,a3,a4,b1,b2,b3,b4;
 599     char *p=NULL,*next=NULL;
 600     int i,len,plen;
 601
 602     while (!current->text[0]) {
 603         if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 604         //TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
 605         //to describe the same moment in time. Maybe there are even more formats in use.
 606         //if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
 607         plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
 608         if (
 609         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d.%d\" %*[Ee]nd=\"%d.%d\"%*[^<]<clear/>%n",&a3,&a4,&b3,&b4,&plen)) < 4) &&
 610         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a3,&a4,&b2,&b3,&b4,&plen)) < 5) &&
 611         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&plen)) < 4) &&
 612         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&b4,&plen)) < 5) &&
 613 //      ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) &&
 614         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&b4,&plen)) < 6) &&
 615         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen)) < 8) &&
 616         //now try it without end time
 617         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d.%d\"%*[^<]<clear/>%n",&a3,&a4,&plen)) < 2) &&
 618         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&plen)) < 2) &&
 619         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&plen)) < 3) &&
 620         ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&plen)) < 4)
 621         )
 622             continue;
 623         current->start = a1*360000+a2*6000+a3*100+a4/10;
 624         current->end   = b1*360000+b2*6000+b3*100+b4/10;
 625         if (b1 == 0 && b2 == 0 && b3 == 0 && b4 == 0)
 626           current->end = current->start+200;
 627         p=line; p+=plen;i=0;
 628         // TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml?
 629         next = strstr(line,"<clear/>");
 630         if(next && strlen(next)>8){
 631           next+=8;i=0;
 632           while ((next =sub_readtext (next, &(current->text[i])))) {
 633                 if (current->text[i]==ERR) {return ERR;}
 634                 i++;
 635                 if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
 636           }
 637         }
 638                         current->lines=i+1;
 639     }
 640     return current;
 641 }
 642
 643 static subtitle *sub_read_line_ssa(stream_t *st,subtitle *current,
 644                                     struct readline_args *args)
 645 {
 646 /*
 647  * Sub Station Alpha v4 (and v2?) scripts have 9 commas before subtitle
 648  * other Sub Station Alpha scripts have only 8 commas before subtitle
 649  * Reading the "ScriptType:" field is not reliable since many scripts appear
 650  * w/o it
 651  *
 652  * http://www.scriptclub.org is a good place to find more examples
 653  * http://www.eswat.demon.co.uk is where the SSA specs can be found
 654  */
 655         int utf16 = args->utf16;
 656         int comma;
 657         static int max_comma = 32; /* let's use 32 for the case that the */
 658                     /*  amount of commas increase with newer SSA versions */
 659
 660         int hour1, min1, sec1, hunsec1,
 661             hour2, min2, sec2, hunsec2, nothing;
 662         int num;
 663
 664         char line[LINE_LEN+1],
 665              line3[LINE_LEN+1],
 666              *line2;
 667         char *tmp;
 668
 669         do {
 670                 if (!stream_read_line (st, line, LINE_LEN, utf16)) return NULL;
 671         } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d"
 672                         "%[^\n\r]", &nothing,
 673                         &hour1, &min1, &sec1, &hunsec1,
 674                         &hour2, &min2, &sec2, &hunsec2,
 675                         line3) < 9
 676                  &&
 677                  sscanf (line, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d"
 678                          "%[^\n\r]", &nothing,
 679                          &hour1, &min1, &sec1, &hunsec1,
 680                          &hour2, &min2, &sec2, &hunsec2,
 681                          line3) < 9         );
 682
 683         line2=strchr(line3, ',');
 684         if (!line2) return NULL;
 685
 686         for (comma = 4; comma < max_comma; comma ++)
 687           {
 688             tmp = line2;
 689             if(!(tmp=strchr(++tmp, ','))) break;
 690             if(*(++tmp) == ' ') break;
 691                   /* a space after a comma means we're already in a sentence */
 692             line2 = tmp;
 693           }
 694
 695         if(comma < max_comma)max_comma = comma;
 696         /* eliminate the trailing comma */
 697         if(*line2 == ',') line2++;
 698
 699         current->lines=0;num=0;
 700         current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
 701         current->end   = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
 702
 703         while (((tmp=strstr(line2, "\\n")) != NULL) || ((tmp=strstr(line2, "\\N")) != NULL) ){
 704                 current->text[num]=malloc(tmp-line2+1);
 705                 strncpy (current->text[num], line2, tmp-line2);
 706                 current->text[num][tmp-line2]='\0';
 707                 line2=tmp+2;
 708                 num++;
 709                 current->lines++;
 710                 if (current->lines >=  SUB_MAX_TEXT) return current;
 711         }
 712
 713         current->text[num]=strdup(line2);
 714         current->lines++;
 715
 716         return current;
 717 }
 718
 719 static void sub_pp_ssa(subtitle *sub) {
 720         int l=sub->lines;
 721         char *so,*de,*start;
 722
 723         while (l){
 724                 /* eliminate any text enclosed with {}, they are font and color settings */
 725                 so=de=sub->text[--l];
 726                 while (*so) {
 727                         if(*so == '{' && so[1]=='\\') {
 728                                 for (start=so; *so && *so!='}'; so++);
 729                                 if(*so) so++; else so=start;
 730                         }
 731                         if(*so) {
 732                                 *de=*so;
 733                                 so++; de++;
 734                         }
 735                 }
 736                 *de=*so;
 737         }
 738 }
 739
 740 /*
 741  * PJS subtitles reader.
 742  * That's the "Phoenix Japanimation Society" format.
 743  * I found some of them in http://www.scriptsclub.org/ (used for anime).
 744  * The time is in tenths of second.
 745  *
 746  * by set, based on code by szabi (dunnowhat sub format ;-)
 747  */
 748 static subtitle *sub_read_line_pjs(stream_t *st,subtitle *current,
 749                                    struct readline_args *args)
 750 {
 751     int utf16 = args->utf16;
 752     char line[LINE_LEN+1];
 753     char text[LINE_LEN+1], *s, *d;
 754
 755     if (!stream_read_line (st, line, LINE_LEN, utf16))
 756         return NULL;
 757     /* skip spaces */
 758     for (s=line; *s && isspace(*s); s++);
 759     /* allow empty lines at the end of the file */
 760     if (*s==0)
 761         return NULL;
 762     /* get the time */
 763     if (sscanf (s, "%ld,%ld,", &(current->start),
 764                 &(current->end)) <2) {
 765         return ERR;
 766     }
 767     /* the files I have are in tenths of second */
 768     current->start *= 10;
 769     current->end *= 10;
 770     /* walk to the beggining of the string */
 771     for (; *s; s++) if (*s==',') break;
 772     if (*s) {
 773         for (s++; *s; s++) if (*s==',') break;
 774         if (*s) s++;
 775     }
 776     if (*s!='"') {
 777         return ERR;
 778     }
 779     /* copy the string to the text buffer */
 780     for (s++, d=text; *s && *s!='"'; s++, d++)
 781         *d=*s;
 782     *d=0;
 783     current->text[0] = strdup(text);
 784     current->lines = 1;
 785
 786     return current;
 787 }
 788
 789 static subtitle *sub_read_line_mpsub(stream_t *st, subtitle *current,
 790                                      struct readline_args *args)
 791 {
 792         int utf16 = args->utf16;
 793         char line[LINE_LEN+1];
 794         float a,b;
 795         int num=0;
 796         char *p, *q;
 797
 798         do
 799         {
 800                 if (!stream_read_line(st, line, LINE_LEN, utf16)) return NULL;
 801         } while (sscanf (line, "%f %f", &a, &b) !=2);
 802
 803         mpsub_position += a*mpsub_multiplier;
 804         current->start=(int) mpsub_position;
 805         mpsub_position += b*mpsub_multiplier;
 806         current->end=(int) mpsub_position;
 807
 808         while (num < SUB_MAX_TEXT) {
 809                 if (!stream_read_line (st, line, LINE_LEN, utf16)) {
 810                         if (num == 0) return NULL;
 811                         else return current;
 812                 }
 813                 p=line;
 814                 while (isspace(*p)) p++;
 815                 if (eol(*p) && num > 0) return current;
 816                 if (eol(*p)) return NULL;
 817
 818                 for (q=p; !eol(*q); q++);
 819                 *q='\0';
 820                 if (strlen(p)) {
 821                         current->text[num]=strdup(p);
 822 //                      printf (">%s<\n",p);
 823                         current->lines = ++num;
 824                 } else {
 825                         if (num) return current;
 826                         else return NULL;
 827                 }
 828         }
 829         return NULL; // we should have returned before if it's OK
 830 }
 831
 832 #ifndef CONFIG_SORTSUB
 833 //we don't need this if we use previous_sub_end
 834 subtitle *previous_aqt_sub = NULL;
 835 #endif
 836
 837 static subtitle *sub_read_line_aqt(stream_t *st,subtitle *current,
 838                                    struct readline_args *args)
 839 {
 840     int utf16 = args->utf16;
 841     char line[LINE_LEN+1];
 842     char *next;
 843     int i;
 844
 845     while (1) {
 846     // try to locate next subtitle
 847         if (!stream_read_line (st, line, LINE_LEN, utf16))
 848                 return NULL;
 849         if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
 850                 break;
 851     }
 852
 853 #ifdef CONFIG_SORTSUB
 854     previous_sub_end = (current->start) ? current->start - 1 : 0;
 855 #else
 856     if (previous_aqt_sub != NULL)
 857         previous_aqt_sub->end = current->start-1;
 858
 859     previous_aqt_sub = current;
 860 #endif
 861
 862     if (!stream_read_line (st, line, LINE_LEN, utf16))
 863         return NULL;
 864
 865     sub_readtext((char *) &line,&current->text[0]);
 866     current->lines = 1;
 867     current->end = current->start; // will be corrected by next subtitle
 868
 869     if (!stream_read_line (st, line, LINE_LEN, utf16))
 870         return current;
 871
 872     next = line,i=1;
 873     while ((next =sub_readtext (next, &(current->text[i])))) {
 874         if (current->text[i]==ERR) {return ERR;}
 875         i++;
 876         if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
 877         }
 878     current->lines=i+1;
 879
 880     if (!strlen(current->text[0]) && !strlen(current->text[1])) {
 881 #ifdef CONFIG_SORTSUB
 882         previous_sub_end = 0;
 883 #else
 884         // void subtitle -> end of previous marked and exit
 885         previous_aqt_sub = NULL;
 886 #endif
 887         return NULL;
 888         }
 889
 890     return current;
 891 }
 892
 893 #ifndef CONFIG_SORTSUB
 894 subtitle *previous_subrip09_sub = NULL;
 895 #endif
 896
 897 static subtitle *sub_read_line_subrip09(stream_t *st,subtitle *current,
 898                                     struct readline_args *args)
 899 {
 900     int utf16 = args->utf16;
 901     char line[LINE_LEN+1];
 902     int a1,a2,a3;
 903     char * next=NULL;
 904     int i,len;
 905
 906     while (1) {
 907     // try to locate next subtitle
 908         if (!stream_read_line (st, line, LINE_LEN, utf16))
 909                 return NULL;
 910         if (!((len=sscanf (line, "[%d:%d:%d]",&a1,&a2,&a3)) < 3))
 911                 break;
 912     }
 913
 914     current->start = a1*360000+a2*6000+a3*100;
 915
 916 #ifdef CONFIG_SORTSUB
 917     previous_sub_end = (current->start) ? current->start - 1 : 0;
 918 #else
 919     if (previous_subrip09_sub != NULL)
 920         previous_subrip09_sub->end = current->start-1;
 921
 922     previous_subrip09_sub = current;
 923 #endif
 924
 925     if (!stream_read_line (st, line, LINE_LEN, utf16))
 926         return NULL;
 927
 928     next = line,i=0;
 929
 930     current->text[0]=""; // just to be sure that string is clear
 931
 932     while ((next =sub_readtext (next, &(current->text[i])))) {
 933         if (current->text[i]==ERR) {return ERR;}
 934         i++;
 935         if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
 936         }
 937     current->lines=i+1;
 938
 939     if (!strlen(current->text[0]) && (i==0)) {
 940 #ifdef CONFIG_SORTSUB
 941         previous_sub_end = 0;
 942 #else
 943         // void subtitle -> end of previous marked and exit
 944         previous_subrip09_sub = NULL;
 945 #endif
 946         return NULL;
 947         }
 948
 949     return current;
 950 }
 951
 952 static subtitle *sub_read_line_jacosub(stream_t* st, subtitle * current,
 953                                        struct readline_args *args)
 954 {
 955     int utf16 = args->utf16;
 956     char line1[LINE_LEN], line2[LINE_LEN], directive[LINE_LEN], *p, *q;
 957     unsigned a1, a2, a3, a4, b1, b2, b3, b4, comment = 0;
 958     static unsigned jacoTimeres = 30;
 959     static int jacoShift = 0;
 960
 961     memset(current, 0, sizeof(subtitle));
 962     memset(line1, 0, LINE_LEN);
 963     memset(line2, 0, LINE_LEN);
 964     memset(directive, 0, LINE_LEN);
 965     while (!current->text[0]) {
 966         if (!stream_read_line(st, line1, LINE_LEN, utf16)) {
 967             return NULL;
 968         }
 969         if (sscanf
 970             (line1, "%u:%u:%u.%u %u:%u:%u.%u %[^\n\r]", &a1, &a2, &a3, &a4,
 971              &b1, &b2, &b3, &b4, line2) < 9) {
 972             if (sscanf(line1, "@%u @%u %[^\n\r]", &a4, &b4, line2) < 3) {
 973                 if (line1[0] == '#') {
 974                     int hours = 0, minutes = 0, seconds, delta, inverter =
 975                         1;
 976                     unsigned units = jacoShift;
 977                     switch (toupper(line1[1])) {
 978                     case 'S':
 979                         if (isalpha(line1[2])) {
 980                             delta = 6;
 981                         } else {
 982                             delta = 2;
 983                         }
 984                         if (sscanf(&line1[delta], "%d", &hours)) {
 985                             if (hours < 0) {
 986                                 hours *= -1;
 987                                 inverter = -1;
 988                             }
 989                             if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
 990                                 if (sscanf
 991                                     (&line1[delta], "%*d:%*d:%d",
 992                                      &seconds)) {
 993                                     sscanf(&line1[delta], "%*d:%*d:%*d.%d",
 994                                            &units);
 995                                 } else {
 996                                     hours = 0;
 997                                     sscanf(&line1[delta], "%d:%d.%d",
 998                                            &minutes, &seconds, &units);
 999                                     minutes *= inverter;
1000                                 }
1001                             } else {
1002                                 hours = minutes = 0;
1003                                 sscanf(&line1[delta], "%d.%d", &seconds,
1004                                        &units);
1005                                 seconds *= inverter;
1006                             }
1007                             jacoShift =
1008                                 ((hours * 3600 + minutes * 60 +
1009                                   seconds) * jacoTimeres +
1010                                  units) * inverter;
1011                         }
1012                         break;
1013                     case 'T':
1014                         if (isalpha(line1[2])) {
1015                             delta = 8;
1016                         } else {
1017                             delta = 2;
1018                         }
1019                         sscanf(&line1[delta], "%u", &jacoTimeres);
1020                         break;
1021                     }
1022                 }
1023                 continue;
1024             } else {
1025                 current->start =
1026                     (unsigned long) ((a4 + jacoShift) * 100.0 /
1027                                      jacoTimeres);
1028                 current->end =
1029                     (unsigned long) ((b4 + jacoShift) * 100.0 /
1030                                      jacoTimeres);
1031             }
1032         } else {
1033             current->start =
1034                 (unsigned
1035                  long) (((a1 * 3600 + a2 * 60 + a3) * jacoTimeres + a4 +
1036                          jacoShift) * 100.0 / jacoTimeres);
1037             current->end =
1038                 (unsigned
1039                  long) (((b1 * 3600 + b2 * 60 + b3) * jacoTimeres + b4 +
1040                          jacoShift) * 100.0 / jacoTimeres);
1041         }
1042         current->lines = 0;
1043         p = line2;
1044         while ((*p == ' ') || (*p == '\t')) {
1045             ++p;
1046         }
1047         if (isalpha(*p)||*p == '[') {
1048             int cont, jLength;
1049
1050             if (sscanf(p, "%s %[^\n\r]", directive, line1) < 2)
1051                 return (subtitle *) ERR;
1052             jLength = strlen(directive);
1053             for (cont = 0; cont < jLength; ++cont) {
1054                 if (isalpha(*(directive + cont)))
1055                     *(directive + cont) = toupper(*(directive + cont));
1056             }
1057             if ((strstr(directive, "RDB") != NULL)
1058                 || (strstr(directive, "RDC") != NULL)
1059                 || (strstr(directive, "RLB") != NULL)
1060                 || (strstr(directive, "RLG") != NULL)) {
1061                 continue;
1062             }
1063             if (strstr(directive, "JL") != NULL) {
1064                 current->alignment = SUB_ALIGNMENT_BOTTOMLEFT;
1065             } else if (strstr(directive, "JR") != NULL) {
1066                 current->alignment = SUB_ALIGNMENT_BOTTOMRIGHT;
1067             } else {
1068                 current->alignment = SUB_ALIGNMENT_BOTTOMCENTER;
1069             }
1070             strcpy(line2, line1);
1071             p = line2;
1072         }
1073         for (q = line1; (!eol(*p)) && (current->lines < SUB_MAX_TEXT); ++p) {
1074             switch (*p) {
1075             case '{':
1076                 comment++;
1077                 break;
1078             case '}':
1079                 if (comment) {
1080                     --comment;
1081                     //the next line to get rid of a blank after the comment
1082                     if ((*(p + 1)) == ' ')
1083                         p++;
1084                 }
1085                 break;
1086             case '~':
1087                 if (!comment) {
1088                     *q = ' ';
1089                     ++q;
1090                 }
1091                 break;
1092             case ' ':
1093             case '\t':
1094                 if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
1095                     break;
1096                 if (!comment) {
1097                     *q = ' ';
1098                     ++q;
1099                 }
1100                 break;
1101             case '\\':
1102                 if (*(p + 1) == 'n') {
1103                     *q = '\0';
1104                     q = line1;
1105                     current->text[current->lines++] = strdup(line1);
1106                     ++p;
1107                     break;
1108                 }
1109                 if ((toupper(*(p + 1)) == 'C')
1110                     || (toupper(*(p + 1)) == 'F')) {
1111                     ++p,++p;
1112                     break;
1113                 }
1114                 if ((*(p + 1) == 'B') || (*(p + 1) == 'b') || (*(p + 1) == 'D') ||      //actually this means "insert current date here"
1115                     (*(p + 1) == 'I') || (*(p + 1) == 'i') || (*(p + 1) == 'N') || (*(p + 1) == 'T') || //actually this means "insert current time here"
1116                     (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
1117                     ++p;
1118                     break;
1119                 }
1120                 if ((*(p + 1) == '\\') ||
1121                     (*(p + 1) == '~') || (*(p + 1) == '{')) {
1122                     ++p;
1123                 } else if (eol(*(p + 1))) {
1124                     if (!stream_read_line(st, directive, LINE_LEN, utf16))
1125                         return NULL;
1126                     trail_space(directive);
1127                     av_strlcat(line2, directive, LINE_LEN);
1128                     break;
1129                 }
1130             default:
1131                 if (!comment) {
1132                     *q = *p;
1133                     ++q;
1134                 }
1135             }                   //-- switch
1136         }                       //-- for
1137         *q = '\0';
1138         current->text[current->lines] = strdup(line1);
1139     }                           //-- while
1140     current->lines++;
1141     return current;
1142 }
1143
1144 static int sub_autodetect (stream_t* st, int *uses_time, int utf16) {
1145     char line[LINE_LEN+1];
1146     int i,j=0;
1147
1148     while (j < 100) {
1149         j++;
1150         if (!stream_read_line (st, line, LINE_LEN, utf16))
1151             return SUB_INVALID;
1152
1153         if (sscanf (line, "{%d}{%d}", &i, &i)==2)
1154                 {*uses_time=0;return SUB_MICRODVD;}
1155         if (sscanf (line, "{%d}{}", &i)==1)
1156                 {*uses_time=0;return SUB_MICRODVD;}
1157         if (sscanf (line, "[%d][%d]", &i, &i)==2)
1158                 {*uses_time=1;return SUB_MPL2;}
1159         if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",     &i, &i, &i, &i, &i, &i, &i, &i)==8)
1160                 {*uses_time=1;return SUB_SUBRIP;}
1161         if (sscanf (line, "%d:%d:%d%[,.:]%d --> %d:%d:%d%[,.:]%d", &i, &i, &i, (char *)&i, &i, &i, &i, &i, (char *)&i, &i)==10)
1162                 {*uses_time=1;return SUB_SUBVIEWER;}
1163         if (sscanf (line, "{T %d:%d:%d:%d",&i, &i, &i, &i)==4)
1164                 {*uses_time=1;return SUB_SUBVIEWER2;}
1165         if (strstr (line, "<SAMI>"))
1166                 {*uses_time=1; return SUB_SAMI;}
1167         if (sscanf(line, "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8)
1168                 {*uses_time = 1; return SUB_JACOSUB;}
1169         if (sscanf(line, "@%d @%d", &i, &i) == 2)
1170                 {*uses_time = 1; return SUB_JACOSUB;}
1171         if (sscanf (line, "%d:%d:%d:",     &i, &i, &i )==3)
1172                 {*uses_time=1;return SUB_VPLAYER;}
1173         if (sscanf (line, "%d:%d:%d ",     &i, &i, &i )==3)
1174                 {*uses_time=1;return SUB_VPLAYER;}
1175         if (!strncasecmp(line, "<window", 7))
1176                 {*uses_time=1;return SUB_RT;}
1177         if (!memcmp(line, "Dialogue: Marked", 16))
1178                 {*uses_time=1; return SUB_SSA;}
1179         if (!memcmp(line, "Dialogue: ", 10))
1180                 {*uses_time=1; return SUB_SSA;}
1181         if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3)
1182                 {*uses_time=1;return SUB_PJS;}
1183         if (sscanf (line, "FORMAT=%d", &i) == 1)
1184                 {*uses_time=0; return SUB_MPSUB;}
1185         if (!memcmp(line, "FORMAT=TIME", 11))
1186                 {*uses_time=1; return SUB_MPSUB;}
1187         if (strstr (line, "-->>"))
1188                 {*uses_time=0; return SUB_AQTITLE;}
1189         if (sscanf (line, "[%d:%d:%d]", &i, &i, &i)==3)
1190                 {*uses_time=1;return SUB_SUBRIP09;}
1191     }
1192
1193     return SUB_INVALID;  // too many bad lines
1194 }
1195
1196 extern int sub_utf8;
1197 int sub_utf8_prev=0;
1198
1199 extern float sub_delay;
1200 extern float sub_fps;
1201
1202 #ifdef CONFIG_ICONV
1203 static iconv_t icdsc = (iconv_t)(-1);
1204
1205 void    subcp_open (stream_t *st)
1206 {
1207         char *tocp = "UTF-8";
1208
1209         if (sub_cp){
1210                 const char *cp_tmp = sub_cp;
1211 #ifdef CONFIG_ENCA
1212                 char enca_lang[3], enca_fallback[100];
1213                 if (sscanf(sub_cp, "enca:%2s:%99s", enca_lang, enca_fallback) == 2
1214                      || sscanf(sub_cp, "ENCA:%2s:%99s", enca_lang, enca_fallback) == 2) {
1215                   if (st && st->flags & MP_STREAM_SEEK ) {
1216                     cp_tmp = guess_cp(st, enca_lang, enca_fallback);
1217                   } else {
1218                     cp_tmp = enca_fallback;
1219                     if (st)
1220                       mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: enca failed, stream must be seekable.\n");
1221                   }
1222                 }
1223 #endif
1224                 if ((icdsc = iconv_open (tocp, cp_tmp)) != (iconv_t)(-1)){
1225                         mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n");
1226                         sub_utf8 = 2;
1227                 } else
1228                         mp_msg(MSGT_SUBREADER,MSGL_ERR,"SUB: error opening iconv descriptor.\n");
1229         }
1230 }
1231
1232 void    subcp_close (void)
1233 {
1234         if (icdsc != (iconv_t)(-1)){
1235                 (void) iconv_close (icdsc);
1236                 icdsc = (iconv_t)(-1);
1237                 mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: closed iconv descriptor.\n");
1238         }
1239 }
1240
1241 subtitle* subcp_recode (subtitle *sub)
1242 {
1243         int l=sub->lines;
1244         size_t ileft, oleft;
1245         char *op, *ip, *ot;
1246         if(icdsc == (iconv_t)(-1)) return sub;
1247
1248         while (l){
1249                 ip = sub->text[--l];
1250                 ileft = strlen(ip);
1251                 oleft = 4 * ileft;
1252
1253                 if (!(ot = malloc(oleft + 1))){
1254                         mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error allocating mem.\n");
1255                         continue;
1256                 }
1257                 op = ot;
1258                 if (iconv(icdsc, &ip, &ileft,
1259                           &op, &oleft) == (size_t)(-1)) {
1260                         mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error recoding line.\n");
1261                         free(ot);
1262                         continue;
1263                 }
1264                 // In some stateful encodings, we must clear the state to handle the last character
1265                 if (iconv(icdsc, NULL, NULL,
1266                           &op, &oleft) == (size_t)(-1)) {
1267                         mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error recoding line, can't clear encoding state.\n");
1268                 }
1269                 *op='\0' ;
1270                 free (sub->text[l]);
1271                 sub->text[l] = ot;
1272         }
1273         return sub;
1274 }
1275 #endif
1276
1277 #ifdef CONFIG_FRIBIDI
1278 /**
1279  * Do conversion necessary for right-to-left language support via fribidi.
1280  * @param sub subtitle to convert
1281  * @param sub_utf8 whether the subtitle is encoded in UTF-8
1282  * @param from first new subtitle, all lines before this are assumed to be already converted
1283  */
1284 static subtitle* sub_fribidi (subtitle *sub, int sub_utf8, int from)
1285 {
1286   FriBidiChar logical[LINE_LEN+1], visual[LINE_LEN+1]; // Hopefully these two won't smash the stack
1287   char        *ip      = NULL, *op     = NULL;
1288   size_t len,orig_len;
1289   int l=sub->lines;
1290   int char_set_num;
1291   fribidi_boolean log2vis;
1292   if (!flip_hebrew)
1293     return sub;
1294   fribidi_set_mirroring(1);
1295   fribidi_set_reorder_nsm(0);
1296
1297   if( sub_utf8 == 0 ) {
1298     char_set_num = fribidi_parse_charset (fribidi_charset?fribidi_charset:"ISO8859-8");
1299   }else {
1300     char_set_num = fribidi_parse_charset ("UTF-8");
1301   }
1302   while (l > from) {
1303     ip = sub->text[--l];
1304     orig_len = len = strlen( ip ); // We assume that we don't use full unicode, only UTF-8 or ISO8859-x
1305     if(len > LINE_LEN) {
1306       mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: sub->text is longer than LINE_LEN.\n");
1307       l++;
1308       break;
1309     }
1310     len = fribidi_charset_to_unicode (char_set_num, ip, len, logical);
1311 #if FRIBIDI_INTERFACE_VERSION < 3
1312     FriBidiCharType base = fribidi_flip_commas?FRIBIDI_TYPE_ON:FRIBIDI_TYPE_L;
1313 #else
1314     FriBidiParType base = fribidi_flip_commas?FRIBIDI_TYPE_ON:FRIBIDI_TYPE_L;
1315 #endif
1316     log2vis = fribidi_log2vis (logical, len, &base,
1317                                /* output */
1318                                visual, NULL, NULL, NULL);
1319     if(log2vis) {
1320       len = fribidi_remove_bidi_marks (visual, len, NULL, NULL,
1321                                        NULL);
1322       if((op = malloc((FFMAX(2*orig_len,2*len) + 1))) == NULL) {
1323         mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error allocating mem.\n");
1324         l++;
1325         break;
1326       }
1327       fribidi_unicode_to_charset ( char_set_num, visual, len,op);
1328       free (ip);
1329       sub->text[l] = op;
1330     }
1331   }
1332   if (!from && l){
1333     for (l = sub->lines; l;)
1334       free (sub->text[--l]);
1335     return ERR;
1336   }
1337   return sub;
1338 }
1339
1340 #endif
1341
1342 static void adjust_subs_time(subtitle* sub, float subtime, float fps, int block,
1343                              int sub_num, int sub_uses_time) {
1344         int n,m;
1345         subtitle* nextsub;
1346         int i = sub_num;
1347         unsigned long subfms = (sub_uses_time ? 100 : fps) * subtime;
1348         unsigned long overlap = (sub_uses_time ? 100 : fps) / 5; // 0.2s
1349
1350         n=m=0;
1351         if (i)  for (;;){
1352                 if (sub->end <= sub->start){
1353                         sub->end = sub->start + subfms;
1354                         m++;
1355                         n++;
1356                 }
1357                 if (!--i) break;
1358                 nextsub = sub + 1;
1359             if(block){
1360                 if ((sub->end > nextsub->start) && (sub->end <= nextsub->start + overlap)) {
1361                     // these subtitles overlap for less than 0.2 seconds
1362                     // and would result in very short overlapping subtitle
1363                     // so let's fix the problem here, before overlapping code
1364                     // get its hands on them
1365                     unsigned delta = sub->end - nextsub->start, half = delta / 2;
1366                     sub->end -= half + 1;
1367                     nextsub->start += delta - half;
1368                 }
1369                 if (sub->end >= nextsub->start){
1370                         sub->end = nextsub->start - 1;
1371                         if (sub->end - sub->start > subfms)
1372                                 sub->end = sub->start + subfms;
1373                         if (!m)
1374                                 n++;
1375                 }
1376             }
1377
1378                 /* Theory:
1379                  * Movies are often converted from FILM (24 fps)
1380                  * to PAL (25) by simply speeding it up, so we
1381                  * to multiply the original timestmaps by
1382                  * (Movie's FPS / Subtitle's (guessed) FPS)
1383                  * so eg. for 23.98 fps movie and PAL time based
1384                  * subtitles we say -subfps 25 and we're fine!
1385                  */
1386
1387                 /* timed sub fps correction ::atmos */
1388                 /* the frame-based case is handled in mpcommon.c
1389                  * where find_sub is called */
1390                 if(sub_uses_time && sub_fps) {
1391                         sub->start *= sub_fps/fps;
1392                         sub->end   *= sub_fps/fps;
1393                 }
1394
1395                 sub = nextsub;
1396                 m = 0;
1397         }
1398         if (n) mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: Adjusted %d subtitle(s).\n", n);
1399 }
1400
1401 struct subreader {
1402     subtitle * (*read)(stream_t *st, subtitle *dest,
1403                        struct readline_args *args);
1404     void       (*post)(subtitle *dest);
1405     const char *name;
1406 };
1407
1408 #ifdef CONFIG_ENCA
1409 const char* guess_buffer_cp(unsigned char* buffer, int buflen, const char *preferred_language, const char *fallback)
1410 {
1411     const char **languages;
1412     size_t langcnt;
1413     EncaAnalyser analyser;
1414     EncaEncoding encoding;
1415     const char *detected_sub_cp = NULL;
1416     int i;
1417
1418     languages = enca_get_languages(&langcnt);
1419     mp_msg(MSGT_SUBREADER, MSGL_V, "ENCA supported languages: ");
1420     for (i = 0; i < langcnt; i++) {
1421         mp_msg(MSGT_SUBREADER, MSGL_V, "%s ", languages[i]);
1422     }
1423     mp_msg(MSGT_SUBREADER, MSGL_V, "\n");
1424
1425     for (i = 0; i < langcnt; i++) {
1426         if (strcasecmp(languages[i], preferred_language) != 0) continue;
1427         analyser = enca_analyser_alloc(languages[i]);
1428         encoding = enca_analyse_const(analyser, buffer, buflen);
1429         enca_analyser_free(analyser);
1430         if (encoding.charset != ENCA_CS_UNKNOWN) {
1431             detected_sub_cp = enca_charset_name(encoding.charset, ENCA_NAME_STYLE_ICONV);
1432             break;
1433         }
1434     }
1435
1436     free(languages);
1437
1438     if (!detected_sub_cp) {
1439         detected_sub_cp = fallback;
1440         mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detection failed: fallback to %s\n", fallback);
1441     }else{
1442         mp_msg(MSGT_SUBREADER, MSGL_INFO, "ENCA detected charset: %s\n", detected_sub_cp);
1443     }
1444
1445     return detected_sub_cp;
1446 }
1447
1448 #define MAX_GUESS_BUFFER_SIZE (256*1024)
1449 const char* guess_cp(stream_t *st, const char *preferred_language, const char *fallback)
1450 {
1451     size_t buflen;
1452     unsigned char *buffer;
1453     const char *detected_sub_cp = NULL;
1454
1455     buffer = malloc(MAX_GUESS_BUFFER_SIZE);
1456     buflen = stream_read(st,buffer, MAX_GUESS_BUFFER_SIZE);
1457
1458     detected_sub_cp = guess_buffer_cp(buffer, buflen, preferred_language, fallback);
1459
1460     free(buffer);
1461     stream_reset(st);
1462     stream_seek(st,0);
1463
1464     return detected_sub_cp;
1465 }
1466 #undef MAX_GUESS_BUFFER_SIZE
1467 #endif
1468
1469 sub_data* sub_read_file(char *filename, float fps, struct MPOpts *opts)
1470 {
1471     int utf16;
1472     stream_t* fd;
1473     int n_max, n_first, i, j, sub_first, sub_orig;
1474     subtitle *first, *second, *sub, *return_sub, *alloced_sub = NULL;
1475     sub_data *subt_data;
1476     int uses_time = 0, sub_num = 0, sub_errs = 0;
1477     static const struct subreader sr[]=
1478     {
1479             { sub_read_line_microdvd, NULL, "microdvd" },
1480             { sub_read_line_subrip, NULL, "subrip" },
1481             { sub_read_line_subviewer, NULL, "subviewer" },
1482             { sub_read_line_sami, NULL, "sami" },
1483             { sub_read_line_vplayer, NULL, "vplayer" },
1484             { sub_read_line_rt, NULL, "rt" },
1485             { sub_read_line_ssa, sub_pp_ssa, "ssa" },
1486             { sub_read_line_pjs, NULL, "pjs" },
1487             { sub_read_line_mpsub, NULL, "mpsub" },
1488             { sub_read_line_aqt, NULL, "aqt" },
1489             { sub_read_line_subviewer2, NULL, "subviewer 2.0" },
1490             { sub_read_line_subrip09, NULL, "subrip 0.9" },
1491             { sub_read_line_jacosub, NULL, "jacosub" },
1492             { sub_read_line_mpl2, NULL, "mpl2" }
1493     };
1494     const struct subreader *srp;
1495
1496     if(filename==NULL) return NULL; //qnx segfault
1497     fd=open_stream (filename, NULL, NULL); if (!fd) return NULL;
1498
1499     sub_format = SUB_INVALID;
1500     for (utf16 = 0; sub_format == SUB_INVALID && utf16 < 3; utf16++) {
1501         sub_format=sub_autodetect (fd, &uses_time, utf16);
1502         stream_reset(fd);
1503         stream_seek(fd,0);
1504     }
1505     utf16--;
1506
1507     mpsub_multiplier = (uses_time ? 100.0 : 1.0);
1508     if (sub_format==SUB_INVALID) {mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: Could not determine file format\n");return NULL;}
1509     srp=sr+sub_format;
1510     mp_msg(MSGT_SUBREADER, MSGL_V, "SUB: Detected subtitle file format: %s\n", srp->name);
1511
1512 #ifdef CONFIG_ICONV
1513     sub_utf8_prev=sub_utf8;
1514     {
1515             int l,k;
1516             k = -1;
1517             if ((l=strlen(filename))>4){
1518                     char *exts[] = {".utf", ".utf8", ".utf-8" };
1519                     for (k=3;--k>=0;)
1520                         if (l >= strlen(exts[k]) && !strcasecmp(filename+(l - strlen(exts[k])), exts[k])){
1521                             sub_utf8 = 1;
1522                             break;
1523                         }
1524             }
1525             if (k<0) subcp_open(fd);
1526     }
1527 #endif
1528
1529     sub_num=0;n_max=32;
1530     first=malloc(n_max*sizeof(subtitle));
1531     if(!first){
1532 #ifdef CONFIG_ICONV
1533           subcp_close();
1534           sub_utf8=sub_utf8_prev;
1535 #endif
1536             return NULL;
1537     }
1538
1539 #ifdef CONFIG_SORTSUB
1540     alloced_sub =
1541     sub = malloc(sizeof(subtitle));
1542     //This is to deal with those formats (AQT & Subrip) which define the end of a subtitle
1543     //as the beginning of the following
1544     previous_sub_end = 0;
1545 #endif
1546     while(1){
1547         if(sub_num>=n_max){
1548             n_max+=16;
1549             first=realloc(first,n_max*sizeof(subtitle));
1550         }
1551 #ifndef CONFIG_SORTSUB
1552         sub = &first[sub_num];
1553 #endif
1554         memset(sub, '\0', sizeof(subtitle));
1555         sub=srp->read(fd, sub, &(struct readline_args){utf16, opts});
1556         if(!sub) break;   // EOF
1557 #ifdef CONFIG_ICONV
1558         if ((sub!=ERR) && sub_utf8 == 2) sub=subcp_recode(sub);
1559 #endif
1560 #ifdef CONFIG_FRIBIDI
1561         if (sub!=ERR) sub=sub_fribidi(sub,sub_utf8,0);
1562 #endif
1563         if ( sub == ERR )
1564          {
1565 #ifdef CONFIG_ICONV
1566           subcp_close();
1567 #endif
1568           free(first);
1569           free(alloced_sub);
1570           return NULL;
1571          }
1572         // Apply any post processing that needs recoding first
1573         if ((sub!=ERR) && !sub_no_text_pp && srp->post) srp->post(sub);
1574 #ifdef CONFIG_SORTSUB
1575         if(!sub_num || (first[sub_num - 1].start <= sub->start)){
1576             first[sub_num].start = sub->start;
1577             first[sub_num].end   = sub->end;
1578             first[sub_num].lines = sub->lines;
1579             first[sub_num].alignment = sub->alignment;
1580             for(i = 0; i < sub->lines; ++i){
1581                 first[sub_num].text[i] = sub->text[i];
1582             }
1583             if (previous_sub_end){
1584                 first[sub_num - 1].end = previous_sub_end;
1585                 previous_sub_end = 0;
1586             }
1587         } else {
1588             for(j = sub_num - 1; j >= 0; --j){
1589                 first[j + 1].start = first[j].start;
1590                 first[j + 1].end   = first[j].end;
1591                 first[j + 1].lines = first[j].lines;
1592                 first[j + 1].alignment = first[j].alignment;
1593                 for(i = 0; i < first[j].lines; ++i){
1594                     first[j + 1].text[i] = first[j].text[i];
1595                 }
1596                 if(!j || (first[j - 1].start <= sub->start)){
1597                     first[j].start = sub->start;
1598                     first[j].end   = sub->end;
1599                     first[j].lines = sub->lines;
1600                     first[j].alignment = sub->alignment;
1601                     for(i = 0; i < SUB_MAX_TEXT; ++i){
1602                         first[j].text[i] = sub->text[i];
1603                     }
1604                     if (previous_sub_end){
1605                         first[j].end = first[j - 1].end;
1606                         first[j - 1].end = previous_sub_end;
1607                         previous_sub_end = 0;
1608                     }
1609                     break;
1610                 }
1611             }
1612         }
1613 #endif
1614         if(sub==ERR) ++sub_errs; else ++sub_num; // Error vs. Valid
1615     }
1616
1617     free_stream(fd);
1618
1619 #ifdef CONFIG_ICONV
1620     subcp_close();
1621 #endif
1622     free(alloced_sub);
1623
1624 //    printf ("SUB: Subtitle format %s time.\n", uses_time?"uses":"doesn't use");
1625     mp_msg(MSGT_SUBREADER, MSGL_V,"SUB: Read %i subtitles, %i bad line(s).\n",
1626            sub_num, sub_errs);
1627
1628     if(sub_num<=0){
1629         free(first);
1630         return NULL;
1631     }
1632
1633     // we do overlap if the user forced it (suboverlap_enable == 2) or
1634     // the user didn't forced no-overlapsub and the format is Jacosub or Ssa.
1635     // this is because usually overlapping subtitles are found in these formats,
1636     // while in others they are probably result of bad timing
1637 if ((suboverlap_enabled == 2) ||
1638     ((suboverlap_enabled) && ((sub_format == SUB_JACOSUB) || (sub_format == SUB_SSA)))) {
1639     adjust_subs_time(first, 6.0, fps, 0, sub_num, uses_time);/*~6 secs AST*/
1640 // here we manage overlapping subtitles
1641     sub_orig = sub_num;
1642     n_first = sub_num;
1643     sub_num = 0;
1644     second = NULL;
1645     // for each subtitle in first[] we deal with its 'block' of
1646     // bonded subtitles
1647     for (sub_first = 0; sub_first < n_first; ++sub_first) {
1648         unsigned long global_start = first[sub_first].start,
1649                 global_end = first[sub_first].end, local_start, local_end;
1650         int lines_to_add = first[sub_first].lines, sub_to_add = 0,
1651                 **placeholder = NULL, higher_line = 0, counter, start_block_sub = sub_num;
1652         char real_block = 1;
1653
1654         // here we find the number of subtitles inside the 'block'
1655         // and its span interval. this works well only with sorted
1656         // subtitles
1657         while ((sub_first + sub_to_add + 1 < n_first) && (first[sub_first + sub_to_add + 1].start < global_end)) {
1658             ++sub_to_add;
1659             lines_to_add += first[sub_first + sub_to_add].lines;
1660             if (first[sub_first + sub_to_add].start < global_start) {
1661                 global_start = first[sub_first + sub_to_add].start;
1662             }
1663             if (first[sub_first + sub_to_add].end > global_end) {
1664                 global_end = first[sub_first + sub_to_add].end;
1665             }
1666         }
1667
1668         /* Avoid n^2 memory use for the "placeholder" data structure
1669          * below with subtitles that have a huge number of
1670          * consecutive overlapping lines. */
1671         lines_to_add = FFMIN(lines_to_add, SUB_MAX_TEXT);
1672
1673         // we need a structure to keep trace of the screen lines
1674         // used by the subs, a 'placeholder'
1675         counter = 2 * sub_to_add + 1;  // the maximum number of subs derived
1676                                        // from a block of sub_to_add+1 subs
1677         placeholder = malloc(sizeof(int *) * counter);
1678         for (i = 0; i < counter; ++i) {
1679             placeholder[i] = malloc(sizeof(int) * lines_to_add);
1680             for (j = 0; j < lines_to_add; ++j) {
1681                 placeholder[i][j] = -1;
1682             }
1683         }
1684
1685         counter = 0;
1686         local_end = global_start - 1;
1687         do {
1688             int ls;
1689
1690             // here we find the beginning and the end of a new
1691             // subtitle in the block
1692             local_start = local_end + 1;
1693             local_end   = global_end;
1694             for (j = 0; j <= sub_to_add; ++j) {
1695                 if ((first[sub_first + j].start - 1 > local_start) && (first[sub_first + j].start - 1 < local_end)) {
1696                     local_end = first[sub_first + j].start - 1;
1697                 } else if ((first[sub_first + j].end > local_start) && (first[sub_first + j].end < local_end)) {
1698                     local_end = first[sub_first + j].end;
1699                 }
1700             }
1701             // here we allocate the screen lines to subs we must
1702             // display in current local_start-local_end interval.
1703             // if the subs were yet presents in the previous interval
1704             // they keep the same lines, otherside they get unused lines
1705             for (j = 0; j <= sub_to_add; ++j) {
1706                 if ((first[sub_first + j].start <= local_end) && (first[sub_first + j].end > local_start)) {
1707                     unsigned long sub_lines = first[sub_first + j].lines, fragment_length = lines_to_add + 1,
1708                         tmp = 0;
1709                     char boolean = 0;
1710                     int fragment_position = -1;
1711
1712                     // if this is not the first new sub of the block
1713                     // we find if this sub was present in the previous
1714                     // new sub
1715                     if (counter)
1716                         for (i = 0; i < lines_to_add; ++i) {
1717                             if (placeholder[counter - 1][i] == sub_first + j) {
1718                                 placeholder[counter][i] = sub_first + j;
1719                                 boolean = 1;
1720                             }
1721                         }
1722                     if (boolean)
1723                         continue;
1724
1725                     // we are looking for the shortest among all groups of
1726                     // sequential blank lines whose length is greater than or
1727                     // equal to sub_lines. we store in fragment_position the
1728                     // position of the shortest group, in fragment_length its
1729                     // length, and in tmp the length of the group currently
1730                     // examinated
1731                     for (i = 0; i < lines_to_add; ++i) {
1732                         if (placeholder[counter][i] == -1) {
1733                             // placeholder[counter][i] is part of the current group
1734                             // of blank lines
1735                             ++tmp;
1736                         } else {
1737                             if (tmp == sub_lines) {
1738                                 // current group's size fits exactly the one we
1739                                 // need, so we stop looking
1740                                 fragment_position = i - tmp;
1741                                 tmp = 0;
1742                                 break;
1743                             }
1744                             if ((tmp) && (tmp > sub_lines) && (tmp < fragment_length)) {
1745                                 // current group is the best we found till here,
1746                                 // but is still bigger than the one we are looking
1747                                 // for, so we keep on looking
1748                                 fragment_length = tmp;
1749                                 fragment_position = i - tmp;
1750                                 tmp = 0;
1751                             } else {
1752                                 // current group doesn't fit at all, so we forget it
1753                                 tmp = 0;
1754                             }
1755                         }
1756                     }
1757                     if (tmp) {
1758                         // last screen line is blank, a group ends with it
1759                         if ((tmp >= sub_lines) && (tmp < fragment_length)) {
1760                             fragment_position = i - tmp;
1761                         }
1762                     }
1763                     if (fragment_position == -1) {
1764                         // it was not possible to find free screen line(s) for a subtitle,
1765                         // usually this means a bug in the code; however we do not overlap
1766                         mp_msg(MSGT_SUBREADER, MSGL_WARN, "SUB: we could not find a suitable position for an overlapping subtitle\n");
1767                         higher_line = SUB_MAX_TEXT + 1;
1768                         break;
1769                     } else {
1770                         for (tmp = 0; tmp < sub_lines; ++tmp) {
1771                             placeholder[counter][fragment_position + tmp] = sub_first + j;
1772                         }
1773                     }
1774                 }
1775             }
1776             for (j = higher_line + 1; j < lines_to_add; ++j) {
1777                 if (placeholder[counter][j] != -1)
1778                     higher_line = j;
1779                 else
1780                     break;
1781             }
1782             if (higher_line >= SUB_MAX_TEXT) {
1783                 // the 'block' has too much lines, so we don't overlap the
1784                 // subtitles
1785                 second = realloc(second, (sub_num + sub_to_add + 1) * sizeof(subtitle));
1786                 for (j = 0; j <= sub_to_add; ++j) {
1787                     int ls;
1788                     memset(&second[sub_num + j], '\0', sizeof(subtitle));
1789                     second[sub_num + j].start = first[sub_first + j].start;
1790                     second[sub_num + j].end   = first[sub_first + j].end;
1791                     second[sub_num + j].lines = first[sub_first + j].lines;
1792                     second[sub_num + j].alignment = first[sub_first + j].alignment;
1793                     for (ls = 0; ls < second[sub_num + j].lines; ls++) {
1794                         second[sub_num + j].text[ls] = strdup(first[sub_first + j].text[ls]);
1795                     }
1796                 }
1797                 sub_num += sub_to_add + 1;
1798                 sub_first += sub_to_add;
1799                 real_block = 0;
1800                 break;
1801             }
1802
1803             // we read the placeholder structure and create the new
1804             // subs.
1805             second = realloc(second, (sub_num + 1) * sizeof(subtitle));
1806             memset(&second[sub_num], '\0', sizeof(subtitle));
1807             second[sub_num].start = local_start;
1808             second[sub_num].end   = local_end;
1809             second[sub_num].alignment = first[sub_first].alignment;
1810             n_max = (lines_to_add < SUB_MAX_TEXT) ? lines_to_add : SUB_MAX_TEXT;
1811             for (i = 0, j = 0; j < n_max; ++j) {
1812                 if (placeholder[counter][j] != -1) {
1813                     int lines = first[placeholder[counter][j]].lines;
1814                     for (ls = 0; ls < lines; ++ls) {
1815                         second[sub_num].text[i++] = strdup(first[placeholder[counter][j]].text[ls]);
1816                     }
1817                     j += lines - 1;
1818                 } else {
1819                     second[sub_num].text[i++] = strdup(" ");
1820                 }
1821             }
1822             ++sub_num;
1823             ++counter;
1824         } while (local_end < global_end);
1825         if (real_block)
1826             for (i = 0; i < counter; ++i)
1827                 second[start_block_sub + i].lines = higher_line + 1;
1828
1829         counter = 2 * sub_to_add + 1;
1830         for (i = 0; i < counter; ++i) {
1831             free(placeholder[i]);
1832         }
1833         free(placeholder);
1834         sub_first += sub_to_add;
1835     }
1836
1837     for (j = sub_orig - 1; j >= 0; --j) {
1838         for (i = first[j].lines - 1; i >= 0; --i) {
1839             free(first[j].text[i]);
1840         }
1841     }
1842     free(first);
1843
1844     return_sub = second;
1845 } else { //if(suboverlap_enabled)
1846     adjust_subs_time(first, 6.0, fps, 1, sub_num, uses_time);/*~6 secs AST*/
1847     return_sub = first;
1848 }
1849     if (return_sub == NULL) return NULL;
1850     subt_data = malloc(sizeof(sub_data));
1851     subt_data->filename = strdup(filename);
1852     subt_data->sub_uses_time = uses_time;
1853     subt_data->sub_num = sub_num;
1854     subt_data->sub_errs = sub_errs;
1855     subt_data->subtitles = return_sub;
1856     return subt_data;
1857 }
1858
1859 #if 0
1860 char * strreplace( char * in,char * what,char * whereof )
1861 {
1862  int i;
1863  char * tmp;
1864
1865  if ( ( in == NULL )||( what == NULL )||( whereof == NULL )||( ( tmp=strstr( in,what ) ) == NULL ) ) return NULL;
1866  for( i=0;i<strlen( whereof );i++ ) tmp[i]=whereof[i];
1867  if ( strlen( what ) > strlen( whereof ) ) tmp[i]=0;
1868  return in;
1869 }
1870 #endif
1871
1872
1873 static void strcpy_trim(char *d, char *s)
1874 {
1875     // skip leading whitespace
1876     while (*s && isspace(*s)) {
1877         s++;
1878     }
1879     for (;;) {
1880         // copy word
1881         while (*s && !isspace(*s)) {
1882             *d = tolower(*s);
1883             s++; d++;
1884         }
1885         if (*s == 0) break;
1886         // trim excess whitespace
1887         while (*s && isspace(*s)) {
1888             s++;
1889         }
1890         if (*s == 0) break;
1891         *d++ = ' ';
1892     }
1893     *d = 0;
1894 }
1895
1896 static void strcpy_strip_ext(char *d, char *s)
1897 {
1898     char *tmp = strrchr(s,'.');
1899     if (!tmp) {
1900         strcpy(d, s);
1901         return;
1902     } else {
1903         strncpy(d, s, tmp-s);
1904         d[tmp-s] = 0;
1905     }
1906     while (*d) {
1907         *d = tolower(*d);
1908         d++;
1909     }
1910 }
1911
1912 static void strcpy_get_ext(char *d, char *s)
1913 {
1914     char *tmp = strrchr(s,'.');
1915     if (!tmp) {
1916         strcpy(d, "");
1917         return;
1918     } else {
1919         strcpy(d, tmp+1);
1920    }
1921 }
1922
1923 static int whiteonly(char *s)
1924 {
1925     while (*s) {
1926         if (!isspace(*s)) return 0;
1927         s++;
1928   }
1929     return 1;
1930 }
1931
1932 typedef struct subfn
1933 {
1934     int priority;
1935     char *fname;
1936 } subfn;
1937
1938 static int compare_sub_priority(const void *a, const void *b)
1939 {
1940     if (((const subfn*)a)->priority > ((const subfn*)b)->priority) {
1941         return -1;
1942     } else if (((const subfn*)a)->priority < ((const subfn*)b)->priority) {
1943         return 1;
1944     } else {
1945         return strcoll(((const subfn*)a)->fname, ((const subfn*)b)->fname);
1946     }
1947 }
1948
1949 char** sub_filenames(const char* path, char *fname)
1950 {
1951     char *f_dir, *f_fname, *f_fname_noext, *f_fname_trim, *tmp, *tmp_sub_id;
1952     char *tmp_fname_noext, *tmp_fname_trim, *tmp_fname_ext, *tmpresult;
1953
1954     int len, pos, found, i, j;
1955     char * sub_exts[] = {  "utf", "utf8", "utf-8", "sub", "srt", "smi", "rt", "txt", "ssa", "aqt", "jss", "js", "ass", NULL};
1956     subfn *result;
1957     char **result2;
1958
1959     int subcnt;
1960
1961     FILE *f;
1962
1963     DIR *d;
1964     struct dirent *de;
1965
1966     len = (strlen(fname) > 256 ? strlen(fname) : 256)
1967         +(strlen(path) > 256 ? strlen(path) : 256)+2;
1968
1969     f_dir = malloc(len);
1970     f_fname = malloc(len);
1971     f_fname_noext = malloc(len);
1972     f_fname_trim = malloc(len);
1973
1974     tmp_fname_noext = malloc(len);
1975     tmp_fname_trim = malloc(len);
1976     tmp_fname_ext = malloc(len);
1977
1978     tmpresult = malloc(len);
1979
1980     result = calloc(MAX_SUBTITLE_FILES, sizeof(*result));
1981
1982     subcnt = 0;
1983
1984     tmp = strrchr(fname,'/');
1985 #if HAVE_DOS_PATHS
1986     if(!tmp)tmp = strrchr(fname,'\\');
1987     if(!tmp)tmp = strrchr(fname,':');
1988 #endif
1989
1990     // extract filename & dirname from fname
1991     if (tmp) {
1992         strcpy(f_fname, tmp+1);
1993         pos = tmp - fname;
1994         strncpy(f_dir, fname, pos+1);
1995         f_dir[pos+1] = 0;
1996     } else {
1997         strcpy(f_fname, fname);
1998         strcpy(f_dir, "./");
1999     }
2000
2001     strcpy_strip_ext(f_fname_noext, f_fname);
2002     strcpy_trim(f_fname_trim, f_fname_noext);
2003
2004     /* The code using sub language here is broken - it assumes strict
2005      * "videoname languagename" syntax for the subtitle file, which is
2006      * very unlikely to match especially if language name uses "en,de"
2007      * syntax... */
2008     tmp_sub_id = NULL;
2009 #if 0
2010     if (dvdsub_lang && !whiteonly(dvdsub_lang)) {
2011         tmp_sub_id = malloc(strlen(dvdsub_lang)+1);
2012         strcpy_trim(tmp_sub_id, dvdsub_lang);
2013     }
2014 #endif
2015
2016     // 0 = nothing
2017     // 1 = any subtitle file
2018     // 2 = any sub file containing movie name
2019     // 3 = sub file containing movie name and the lang extension
2020     for (j = 0; j <= 1; j++) {
2021         d = opendir(j == 0 ? f_dir : path);
2022         if (d) {
2023             while ((de = readdir(d))) {
2024                 // retrieve various parts of the filename
2025                 strcpy_strip_ext(tmp_fname_noext, de->d_name);
2026                 strcpy_get_ext(tmp_fname_ext, de->d_name);
2027                 strcpy_trim(tmp_fname_trim, tmp_fname_noext);
2028
2029                 // does it end with a subtitle extension?
2030                 found = 0;
2031 #ifdef CONFIG_ICONV
2032 #ifdef CONFIG_ENCA
2033                 for (i = ((sub_cp && strncasecmp(sub_cp, "enca", 4) != 0) ? 3 : 0); sub_exts[i]; i++) {
2034 #else
2035                 for (i = (sub_cp ? 3 : 0); sub_exts[i]; i++) {
2036 #endif
2037 #else
2038                 for (i = 0; sub_exts[i]; i++) {
2039 #endif
2040                     if (strcasecmp(sub_exts[i], tmp_fname_ext) == 0) {
2041                         found = 1;
2042                         break;
2043                     }
2044                 }
2045
2046                 // we have a (likely) subtitle file
2047                 if (found) {
2048                     int prio = 0;
2049                     if (!prio && tmp_sub_id)
2050                     {
2051                         sprintf(tmpresult, "%s %s", f_fname_trim, tmp_sub_id);
2052                         if (strcmp(tmp_fname_trim, tmpresult) == 0 && sub_match_fuzziness >= 1) {
2053                             // matches the movie name + lang extension
2054                             prio = 5;
2055                         }
2056                     }
2057                     if (!prio && strcmp(tmp_fname_trim, f_fname_trim) == 0) {
2058                         // matches the movie name
2059                         prio = 4;
2060                     }
2061                     if (!prio && (tmp = strstr(tmp_fname_trim, f_fname_trim)) && (sub_match_fuzziness >= 1)) {
2062                         // contains the movie name
2063                         tmp += strlen(f_fname_trim);
2064                         if (tmp_sub_id && strstr(tmp, tmp_sub_id)) {
2065                             // with sub_id specified prefer localized subtitles
2066                             prio = 3;
2067                         } else if ((tmp_sub_id == NULL) && whiteonly(tmp)) {
2068                             // without sub_id prefer "plain" name
2069                             prio = 3;
2070                         } else {
2071                             // with no localized subs found, try any else instead
2072                             prio = 2;
2073                         }
2074                     }
2075                     if (!prio) {
2076                         // doesn't contain the movie name
2077                         // don't try in the mplayer subtitle directory
2078                         if ((j == 0) && (sub_match_fuzziness >= 2)) {
2079                             prio = 1;
2080                         }
2081                     }
2082
2083                     mp_msg(MSGT_SUBREADER, MSGL_DBG2, "Potential sub file: "
2084                            "\"%s\"  Priority: %d\n", de->d_name, prio);
2085                     if (prio) {
2086                         prio += prio;
2087 #ifdef CONFIG_ICONV
2088                         if (i<3){ // prefer UTF-8 coded
2089                             prio++;
2090                         }
2091 #endif
2092                         sprintf(tmpresult, "%s%s", j == 0 ? f_dir : path, de->d_name);
2093 //                      fprintf(stderr, "%s priority %d\n", tmpresult, prio);
2094                         if ((f = fopen(tmpresult, "rt"))) {
2095                             fclose(f);
2096                             result[subcnt].priority = prio;
2097                             result[subcnt].fname = strdup(tmpresult);
2098                             subcnt++;
2099                         }
2100                     }
2101
2102                 }
2103                 if (subcnt >= MAX_SUBTITLE_FILES) break;
2104             }
2105             closedir(d);
2106         }
2107
2108     }
2109
2110     free(tmp_sub_id);
2111
2112     free(f_dir);
2113     free(f_fname);
2114     free(f_fname_noext);
2115     free(f_fname_trim);
2116
2117     free(tmp_fname_noext);
2118     free(tmp_fname_trim);
2119     free(tmp_fname_ext);
2120
2121     free(tmpresult);
2122
2123     qsort(result, subcnt, sizeof(subfn), compare_sub_priority);
2124
2125     result2 = calloc(subcnt + 1, sizeof(*result2));
2126
2127     for (i = 0; i < subcnt; i++) {
2128         result2[i] = result[i].fname;
2129     }
2130     result2[subcnt] = NULL;
2131
2132     free(result);
2133
2134     return result2;
2135 }
2136
2137 void list_sub_file(sub_data* subd){
2138     int i,j;
2139     subtitle *subs = subd->subtitles;
2140
2141     for(j=0; j < subd->sub_num; j++){
2142         subtitle* egysub=&subs[j];
2143         mp_msg(MSGT_SUBREADER,MSGL_INFO,"%i line%c (%li-%li)\n",
2144                     egysub->lines,
2145                     (1==egysub->lines)?' ':'s',
2146                     egysub->start,
2147                     egysub->end);
2148         for (i=0; i<egysub->lines; i++) {
2149             mp_msg(MSGT_SUBREADER,MSGL_INFO,"\t\t%d: %s%s", i,egysub->text[i], i==egysub->lines-1?"":" \n ");
2150         }
2151         mp_msg(MSGT_SUBREADER,MSGL_INFO,"\n");
2152     }
2153
2154     mp_msg(MSGT_SUBREADER,MSGL_INFO,"Subtitle format %s time.\n",
2155                                   subd->sub_uses_time ? "uses":"doesn't use");
2156     mp_msg(MSGT_SUBREADER,MSGL_INFO,"Read %i subtitles, %i errors.\n", subd->sub_num, subd->sub_errs);
2157 }
2158
2159 void dump_srt(sub_data* subd, float fps){
2160     int i,j;
2161     int h,m,s,ms;
2162     FILE * fd;
2163     subtitle * onesub;
2164     unsigned long temp;
2165     subtitle *subs = subd->subtitles;
2166
2167     if (!subd->sub_uses_time && sub_fps == 0)
2168         sub_fps = fps;
2169     fd=fopen("dumpsub.srt","w");
2170     if(!fd)
2171     {
2172         perror("dump_srt: fopen");
2173         return;
2174     }
2175     for(i=0; i < subd->sub_num; i++)
2176     {
2177         onesub=subs+i;    //=&subs[i];
2178         fprintf(fd,"%d\n",i+1);//line number
2179
2180         temp=onesub->start;
2181         if (!subd->sub_uses_time)
2182             temp = temp * 100 / sub_fps;
2183         temp -= sub_delay * 100;
2184         h=temp/360000;temp%=360000;     //h =1*100*60*60
2185         m=temp/6000;  temp%=6000;       //m =1*100*60
2186         s=temp/100;   temp%=100;        //s =1*100
2187         ms=temp*10;                     //ms=1*10
2188         fprintf(fd,"%02d:%02d:%02d,%03d --> ",h,m,s,ms);
2189
2190         temp=onesub->end;
2191         if (!subd->sub_uses_time)
2192             temp = temp * 100 / sub_fps;
2193         temp -= sub_delay * 100;
2194         h=temp/360000;temp%=360000;
2195         m=temp/6000;  temp%=6000;
2196         s=temp/100;   temp%=100;
2197         ms=temp*10;
2198         fprintf(fd,"%02d:%02d:%02d,%03d\n",h,m,s,ms);
2199
2200         for(j=0;j<onesub->lines;j++)
2201             fprintf(fd,"%s\n",onesub->text[j]);
2202
2203         fprintf(fd,"\n");
2204     }
2205     fclose(fd);
2206     mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.srt\'.\n");
2207 }
2208
2209 void dump_mpsub(sub_data* subd, float fps){
2210         int i,j;
2211         FILE *fd;
2212         float a,b;
2213         subtitle *subs = subd->subtitles;
2214
2215         mpsub_position = subd->sub_uses_time? (sub_delay*100) : (sub_delay*fps);
2216         if (sub_fps==0) sub_fps=fps;
2217
2218         fd=fopen ("dump.mpsub", "w");
2219         if (!fd) {
2220                 perror ("dump_mpsub: fopen");
2221                 return;
2222         }
2223
2224
2225         if (subd->sub_uses_time) fprintf (fd,"FORMAT=TIME\n\n");
2226         else fprintf (fd, "FORMAT=%5.2f\n\n", fps);
2227
2228         for(j=0; j < subd->sub_num; j++){
2229                 subtitle* egysub=&subs[j];
2230                 if (subd->sub_uses_time) {
2231                         a=((egysub->start-mpsub_position)/100.0);
2232                         b=((egysub->end-egysub->start)/100.0);
2233                         if ( (float)((int)a) == a)
2234                         fprintf (fd, "%.0f",a);
2235                         else
2236                         fprintf (fd, "%.2f",a);
2237
2238                         if ( (float)((int)b) == b)
2239                         fprintf (fd, " %.0f\n",b);
2240                         else
2241                         fprintf (fd, " %.2f\n",b);
2242                 } else {
2243                         fprintf (fd, "%ld %ld\n", (long)((egysub->start*(fps/sub_fps))-((mpsub_position*(fps/sub_fps)))),
2244                                         (long)(((egysub->end)-(egysub->start))*(fps/sub_fps)));
2245                 }
2246
2247                 mpsub_position = egysub->end;
2248                 for (i=0; i<egysub->lines; i++) {
2249                         fprintf (fd, "%s\n",egysub->text[i]);
2250                 }
2251                 fprintf (fd, "\n");
2252         }
2253         fclose (fd);
2254         mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dump.mpsub\'.\n");
2255 }
2256
2257 void dump_microdvd(sub_data* subd, float fps) {
2258     int i, delay;
2259     FILE *fd;
2260     subtitle *subs = subd->subtitles;
2261     if (sub_fps == 0)
2262         sub_fps = fps;
2263     fd = fopen("dumpsub.sub", "w");
2264     if (!fd) {
2265         perror("dumpsub.sub: fopen");
2266         return;
2267     }
2268     delay = sub_delay * sub_fps;
2269     for (i = 0; i < subd->sub_num; ++i) {
2270         int j, start, end;
2271         start = subs[i].start;
2272         end = subs[i].end;
2273         if (subd->sub_uses_time) {
2274             start = start * sub_fps / 100 ;
2275             end = end * sub_fps / 100;
2276         }
2277         else {
2278             start = start * sub_fps / fps;
2279             end = end * sub_fps / fps;
2280         }
2281         start -= delay;
2282         end -= delay;
2283         fprintf(fd, "{%d}{%d}", start, end);
2284         for (j = 0; j < subs[i].lines; ++j)
2285             fprintf(fd, "%s%s", j ? "|" : "", subs[i].text[j]);
2286         fprintf(fd, "\n");
2287     }
2288     fclose(fd);
2289     mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.sub\'.\n");
2290 }
2291
2292 void dump_jacosub(sub_data* subd, float fps) {
2293     int i,j;
2294     int h,m,s,cs;
2295     FILE * fd;
2296     subtitle * onesub;
2297     unsigned long temp;
2298     subtitle *subs = subd->subtitles;
2299
2300     if (!subd->sub_uses_time && sub_fps == 0)
2301         sub_fps = fps;
2302     fd=fopen("dumpsub.jss","w");
2303     if(!fd)
2304     {
2305         perror("dump_jacosub: fopen");
2306         return;
2307     }
2308     fprintf(fd, "#TIMERES %d\n", (subd->sub_uses_time) ? 100 : (int)sub_fps);
2309     for(i=0; i < subd->sub_num; i++)
2310     {
2311         onesub=subs+i;    //=&subs[i];
2312
2313         temp=onesub->start;
2314         if (!subd->sub_uses_time)
2315             temp = temp * 100 / sub_fps;
2316         temp -= sub_delay * 100;
2317         h=temp/360000;temp%=360000;     //h =1*100*60*60
2318         m=temp/6000;  temp%=6000;       //m =1*100*60
2319         s=temp/100;   temp%=100;        //s =1*100
2320         cs=temp;                        //cs=1*10
2321         fprintf(fd,"%02d:%02d:%02d.%02d ",h,m,s,cs);
2322
2323         temp=onesub->end;
2324         if (!subd->sub_uses_time)
2325             temp = temp * 100 / sub_fps;
2326         temp -= sub_delay * 100;
2327         h=temp/360000;temp%=360000;
2328         m=temp/6000;  temp%=6000;
2329         s=temp/100;   temp%=100;
2330         cs=temp;
2331         fprintf(fd,"%02d:%02d:%02d.%02d {~} ",h,m,s,cs);
2332
2333         for(j=0;j<onesub->lines;j++)
2334             fprintf(fd,"%s%s",j ? "\\n" : "", onesub->text[j]);
2335
2336         fprintf(fd,"\n");
2337     }
2338     fclose(fd);
2339     mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.js\'.\n");
2340 }
2341
2342 void dump_sami(sub_data* subd, float fps) {
2343     int i,j;
2344     FILE * fd;
2345     subtitle * onesub;
2346     unsigned long temp;
2347     subtitle *subs = subd->subtitles;
2348
2349     if (!subd->sub_uses_time && sub_fps == 0)
2350         sub_fps = fps;
2351     fd=fopen("dumpsub.smi","w");
2352     if(!fd)
2353     {
2354         perror("dump_jacosub: fopen");
2355         return;
2356     }
2357     fprintf(fd, "<SAMI>\n"
2358                 "<HEAD>\n"
2359                 "       <STYLE TYPE=\"Text/css\">\n"
2360                 "       <!--\n"
2361                 "         P {margin-left: 29pt; margin-right: 29pt; font-size: 24pt; text-align: center; font-family: Tahoma; font-weight: bold; color: #FCDD03; background-color: #000000;}\n"
2362                 "         .SUBTTL {Name: 'Subtitles'; Lang: en-US; SAMIType: CC;}\n"
2363                 "       -->\n"
2364                 "       </STYLE>\n"
2365                 "</HEAD>\n"
2366                 "<BODY>\n");
2367     for(i=0; i < subd->sub_num; i++)
2368     {
2369         onesub=subs+i;    //=&subs[i];
2370
2371         temp=onesub->start;
2372         if (!subd->sub_uses_time)
2373             temp = temp * 100 / sub_fps;
2374         temp -= sub_delay * 100;
2375         fprintf(fd,"\t<SYNC Start=%lu>\n"
2376                     "\t  <P>", temp * 10);
2377
2378         for(j=0;j<onesub->lines;j++)
2379             fprintf(fd,"%s%s",j ? "<br>" : "", onesub->text[j]);
2380
2381         fprintf(fd,"\n");
2382
2383         temp=onesub->end;
2384         if (!subd->sub_uses_time)
2385             temp = temp * 100 / sub_fps;
2386         temp -= sub_delay * 100;
2387         fprintf(fd,"\t<SYNC Start=%lu>\n"
2388                     "\t  <P>&nbsp;\n", temp * 10);
2389     }
2390     fprintf(fd, "</BODY>\n"
2391                 "</SAMI>\n");
2392     fclose(fd);
2393     mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.smi\'.\n");
2394 }
2395
2396 void sub_free( sub_data * subd )
2397 {
2398     int i, j;
2399
2400     if ( !subd ) return;
2401
2402     for (i = 0; i < subd->sub_num; i++)
2403         for (j = 0; j < subd->subtitles[i].lines; j++)
2404             free( subd->subtitles[i].text[j] );
2405     free( subd->subtitles );
2406     free( subd->filename );
2407     free( subd );
2408 }
2409
2410 #define MAX_SUBLINE 512
2411 /**
2412  * \brief parse text and append it to subtitle in sub
2413  * \param sub subtitle struct to add text to
2414  * \param txt text to parse
2415  * \param len length of text in txt
2416  * \param endpts pts at which this subtitle text should be removed again
2417  *
2418  * <> and {} are interpreted as comment delimiters, "\n", "\N", '\n', '\r'
2419  * and '\0' are interpreted as newlines, duplicate, leading and trailing
2420  * newlines are ignored.
2421  */
2422 void sub_add_text(subtitle *sub, const char *txt, int len, double endpts) {
2423   int comment = 0;
2424   int double_newline = 1; // ignore newlines at the beginning
2425   int i, pos;
2426   char *buf;
2427 #ifdef CONFIG_FRIBIDI
2428   int orig_lines = sub->lines;
2429 #endif
2430   if (sub->lines >= SUB_MAX_TEXT) return;
2431   pos = 0;
2432   buf = malloc(MAX_SUBLINE + 1);
2433   sub->text[sub->lines] = buf;
2434   sub->endpts[sub->lines] = endpts;
2435   for (i = 0; i < len && pos < MAX_SUBLINE; i++) {
2436     char c = txt[i];
2437     if (c == '<') comment |= 1;
2438     if (c == '{') comment |= 2;
2439     if (comment) {
2440       if (c == '}') comment &= ~2;
2441       if (c == '>') comment &= ~1;
2442       continue;
2443     }
2444     if (pos == MAX_SUBLINE - 1) {
2445       i--;
2446       c = 0;
2447     }
2448     if (c == '\\' && i + 1 < len) {
2449       c = txt[++i];
2450       if (c == 'n' || c == 'N') c = 0;
2451     }
2452     if (c == '\n' || c == '\r') c = 0;
2453     if (c) {
2454       double_newline = 0;
2455       buf[pos++] = c;
2456     } else if (!double_newline) {
2457       if (sub->lines >= SUB_MAX_TEXT - 1) {
2458         mp_msg(MSGT_VO, MSGL_WARN, "Too many subtitle lines\n");
2459         break;
2460       }
2461       double_newline = 1;
2462       buf[pos] = 0;
2463       sub->lines++;
2464       pos = 0;
2465       buf = malloc(MAX_SUBLINE + 1);
2466       sub->text[sub->lines] = buf;
2467       sub->endpts[sub->lines] = endpts;
2468     }
2469   }
2470   buf[pos] = 0;
2471   if (sub->lines < SUB_MAX_TEXT &&
2472       strlen(sub->text[sub->lines]))
2473     sub->lines++;
2474 #ifdef CONFIG_FRIBIDI
2475   sub = sub_fribidi(sub, sub_utf8, orig_lines);
2476 #endif
2477 }
2478
2479 /**
2480  * \brief remove outdated subtitle lines.
2481  * \param sub subtitle struct to modify
2482  * \param pts current pts. All lines with endpts <= this will be removed.
2483  *            Use MP_NOPTS_VALUE to remove all lines
2484  * \return 1 if sub was modified, 0 otherwise.
2485  */
2486 int sub_clear_text(subtitle *sub, double pts) {
2487   int i = 0;
2488   int changed = 0;
2489   while (i < sub->lines) {
2490     double endpts = sub->endpts[i];
2491     if (pts == MP_NOPTS_VALUE || (endpts != MP_NOPTS_VALUE && pts >= endpts)) {
2492       int j;
2493       free(sub->text[i]);
2494       for (j = i + 1; j < sub->lines; j++) {
2495         sub->text[j - 1] = sub->text[j];
2496         sub->endpts[j - 1] = sub->endpts[j];
2497       }
2498       sub->lines--;
2499       changed = 1;
2500     } else
2501       i++;
2502   }
2503   return changed;
2504 }