WinGui: Fix another instance of the Caliburn vs Json.net sillyness where objects...
[HandBrake.git] / libhb / decsrtsub.c
blob0712a03ce5bfa33b90004b7b094e23b7eda25cec
1 /* decsrtsub.c
3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
8 */
10 #include <stdlib.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <iconv.h>
14 #include <errno.h>
15 #include "hb.h"
16 #include "colormap.h"
17 #include "decsrtsub.h"
19 struct start_and_end {
20 unsigned long start, end;
23 enum
25 k_state_inEntry,
26 k_state_inEntry_or_new,
27 k_state_potential_new_entry,
28 k_state_timecode,
31 typedef struct srt_entry_s {
32 long offset, duration;
33 long start, stop;
34 char text[1024];
35 int pos;
36 } srt_entry_t;
39 * Store all context in the work private struct,
41 struct hb_work_private_s
43 hb_job_t * job;
44 FILE * file;
45 char buf[1024];
46 int pos;
47 int end;
48 char utf8_buf[2048];
49 int utf8_pos;
50 int utf8_end;
51 int utf8_bom_skipped;
52 unsigned long current_time;
53 unsigned long number_of_entries;
54 unsigned long last_entry_number;
55 unsigned long current_state;
56 srt_entry_t current_entry;
57 iconv_t *iconv_context;
58 hb_subtitle_t *subtitle;
59 uint64_t start_time; // In HB time
60 uint64_t stop_time; // In HB time
62 int line; // SSA line number
65 static char* srt_markup_to_ssa(char *srt, int *len)
67 char terminator;
68 char color[40];
69 uint32_t rgb;
71 *len = 0;
72 if (srt[0] != '<' && srt[0] != '{')
73 return NULL;
75 if (srt[0] == '<')
76 terminator = '>';
77 else
78 terminator = '}';
80 if (srt[1] == 'i' && srt[2] == terminator)
82 *len = 3;
83 return hb_strdup_printf("{\\i1}");
85 else if (srt[1] == 'b' && srt[2] == terminator)
87 *len = 3;
88 return hb_strdup_printf("{\\b1}");
90 else if (srt[1] == 'u' && srt[2] == terminator)
92 *len = 3;
93 return hb_strdup_printf("{\\u1}");
95 else if (srt[1] == '/' && srt[2] == 'i' && srt[3] == terminator)
97 *len = 4;
98 return hb_strdup_printf("{\\i0}");
100 else if (srt[1] == '/' && srt[2] == 'b' && srt[3] == terminator)
102 *len = 4;
103 return hb_strdup_printf("{\\b0}");
105 else if (srt[1] == '/' && srt[2] == 'u' && srt[3] == terminator)
107 *len = 4;
108 return hb_strdup_printf("{\\u0}");
110 else if (srt[0] == '<' && !strncmp(srt + 1, "font", 4))
112 int match;
113 match = sscanf(srt + 1, "font color=\"%39[^\"]\">", color);
114 if (match != 1)
116 return NULL;
118 while (srt[*len] != '>') (*len)++;
119 (*len)++;
120 if (color[0] == '#')
121 rgb = strtol(color + 1, NULL, 16);
122 else
123 rgb = hb_rgb_lookup_by_name(color);
124 return hb_strdup_printf("{\\1c&H%X&}", HB_RGB_TO_BGR(rgb));
126 else if (srt[0] == '<' && srt[1] == '/' && !strncmp(srt + 2, "font", 4) &&
127 srt[6] == '>')
129 *len = 7;
130 return hb_strdup_printf("{\\1c&HFFFFFF&}");
133 return NULL;
136 void hb_srt_to_ssa(hb_buffer_t *sub_in, int line)
138 if (sub_in->size == 0)
139 return;
141 // null terminate input if not already terminated
142 if (sub_in->data[sub_in->size-1] != 0)
144 hb_buffer_realloc(sub_in, ++sub_in->size);
145 sub_in->data[sub_in->size - 1] = 0;
147 char * srt = (char*)sub_in->data;
148 // SSA markup expands a little over SRT, so allocate a bit of extra
149 // space. More will be realloc'd if needed.
150 hb_buffer_t * sub = hb_buffer_init(sub_in->size + 80);
151 char * ssa, *ssa_markup;
152 int skip, len, pos, ii;
154 // Exchange data between input sub and new ssa_sub
155 // After this, sub_in contains ssa data
156 hb_buffer_swap_copy(sub_in, sub);
157 ssa = (char*)sub_in->data;
159 sprintf((char*)sub_in->data, "%d,,Default,,0,0,0,,", line);
160 pos = strlen((char*)sub_in->data);
162 ii = 0;
163 while (srt[ii] != '\0')
165 if ((ssa_markup = srt_markup_to_ssa(srt + ii, &skip)) != NULL)
167 len = strlen(ssa_markup);
168 hb_buffer_realloc(sub_in, pos + len + 1);
169 // After realloc, sub_in->data may change
170 ssa = (char*)sub_in->data;
171 sprintf(ssa + pos, "%s", ssa_markup);
172 free(ssa_markup);
173 pos += len;
174 ii += skip;
176 else
178 hb_buffer_realloc(sub_in, pos + 4);
179 // After realloc, sub_in->data may change
180 ssa = (char*)sub_in->data;
181 if (srt[ii] == '\r')
183 ssa[pos++] = '\\';
184 ssa[pos++] = 'N';
185 ii++;
186 if (srt[ii] == '\n')
188 ii++;
191 else if (srt[ii] == '\n')
193 ssa[pos++] = '\\';
194 ssa[pos++] = 'N';
195 ii++;
197 else
199 ssa[pos++] = srt[ii++];
203 ssa[pos] = '\0';
204 sub_in->size = pos + 1;
205 hb_buffer_close(&sub);
208 static int
209 read_time_from_string( const char* timeString, struct start_and_end *result )
211 // for ex. 00:00:15,248 --> 00:00:16,545
213 long houres1, minutes1, seconds1, milliseconds1,
214 houres2, minutes2, seconds2, milliseconds2;
215 int scanned;
217 scanned = sscanf(timeString, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n",
218 &houres1, &minutes1, &seconds1, &milliseconds1,
219 &houres2, &minutes2, &seconds2, &milliseconds2);
220 if (scanned != 8)
222 return 0;
224 result->start =
225 milliseconds1 + seconds1*1000 + minutes1*60*1000 + houres1*60*60*1000;
226 result->end =
227 milliseconds2 + seconds2*1000 + minutes2*60*1000 + houres2*60*60*1000;
228 return 1;
231 static int utf8_fill( hb_work_private_t * pv )
233 int bytes, conversion = 0;
234 size_t out_size;
236 /* Align utf8 data to beginning of the buffer so that we can
237 * fill the buffer to its maximum */
238 memmove( pv->utf8_buf, pv->utf8_buf + pv->utf8_pos, pv->utf8_end - pv->utf8_pos );
239 pv->utf8_end -= pv->utf8_pos;
240 pv->utf8_pos = 0;
241 out_size = 2048 - pv->utf8_end;
242 while( out_size )
244 char *p, *q;
245 size_t in_size, retval;
247 if( pv->end == pv->pos )
249 bytes = fread( pv->buf, 1, 1024, pv->file );
250 pv->pos = 0;
251 pv->end = bytes;
252 if( bytes == 0 )
254 if( conversion )
255 return 1;
256 else
257 return 0;
261 p = pv->buf + pv->pos;
262 q = pv->utf8_buf + pv->utf8_end;
263 in_size = pv->end - pv->pos;
265 retval = iconv( pv->iconv_context, &p, &in_size, &q, &out_size);
266 if( q != pv->utf8_buf + pv->utf8_pos )
267 conversion = 1;
269 pv->utf8_end = q - pv->utf8_buf;
270 pv->pos = p - pv->buf;
272 if ( !pv->utf8_bom_skipped )
274 uint8_t *buf = (uint8_t*)pv->utf8_buf;
275 if (buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
277 pv->utf8_pos = 3;
279 pv->utf8_bom_skipped = 1;
282 if( ( retval == -1 ) && ( errno == EINVAL ) )
284 /* Incomplete multibyte sequence, read more data */
285 memmove( pv->buf, p, pv->end - pv->pos );
286 pv->end -= pv->pos;
287 pv->pos = 0;
288 bytes = fread( pv->buf + pv->end, 1, 1024 - pv->end, pv->file );
289 if( bytes == 0 )
291 if( !conversion )
292 return 0;
293 else
294 return 1;
296 pv->end += bytes;
297 } else if ( ( retval == -1 ) && ( errno == EILSEQ ) )
299 hb_error( "Invalid byte for codeset in input, discard byte" );
300 /* Try the next byte of the input */
301 pv->pos++;
302 } else if ( ( retval == -1 ) && ( errno == E2BIG ) )
304 /* buffer full */
305 return conversion;
308 return 1;
311 static int get_line( hb_work_private_t * pv, char *buf, int size )
313 int i;
314 char c;
316 // clear remnants of the previous line before progessing a new one
317 memset(buf, '\0', size);
319 /* Find newline in converted UTF-8 buffer */
320 for( i = 0; i < size - 1; i++ )
322 if( pv->utf8_pos >= pv->utf8_end )
324 if( !utf8_fill( pv ) )
326 if( i )
327 return 1;
328 else
329 return 0;
332 c = pv->utf8_buf[pv->utf8_pos++];
333 if( c == '\n' )
335 buf[i] = '\n';
336 buf[i+1] = '\0';
337 return 1;
339 buf[i] = c;
341 buf[0] = '\0';
342 return 1;
346 * Read the SRT file and put the entries into the subtitle fifo for all to read
348 static hb_buffer_t *srt_read( hb_work_private_t *pv )
350 char line_buffer[1024];
351 int reprocess = 0, resync = 0;
353 if( !pv->file )
355 return NULL;
358 while( reprocess || get_line( pv, line_buffer, sizeof( line_buffer ) ) )
360 reprocess = 0;
361 switch (pv->current_state)
363 case k_state_timecode:
365 struct start_and_end timing;
366 int result;
368 result = read_time_from_string( line_buffer, &timing );
369 if (!result)
371 resync = 1;
372 pv->current_state = k_state_potential_new_entry;
373 continue;
375 pv->current_entry.duration = timing.end - timing.start;
376 pv->current_entry.offset = timing.start - pv->current_time;
378 pv->current_time = timing.end;
380 pv->current_entry.start = timing.start;
381 pv->current_entry.stop = timing.end;
383 pv->current_state = k_state_inEntry;
384 continue;
387 case k_state_inEntry_or_new:
389 char *endpoint;
391 * Is this really new next entry begin?
392 * Look for entry number.
394 strtol(line_buffer, &endpoint, 10);
395 if (endpoint == line_buffer ||
396 (endpoint && *endpoint != '\n' && *endpoint != '\r'))
399 * Doesn't resemble an entry number
400 * must still be in an entry
402 if (!resync)
404 reprocess = 1;
405 pv->current_state = k_state_inEntry;
407 continue;
409 reprocess = 1;
410 pv->current_state = k_state_potential_new_entry;
411 break;
414 case k_state_inEntry:
416 char *q;
417 int size, len;
419 // If the current line is empty, we assume this is the
420 // seperation betwene two entries. In case we are wrong,
421 // the mistake is corrected in the next state.
422 if (strcmp(line_buffer, "\n") == 0 || strcmp(line_buffer, "\r\n") == 0) {
423 pv->current_state = k_state_potential_new_entry;
424 continue;
427 q = pv->current_entry.text + pv->current_entry.pos;
428 len = strlen( line_buffer );
429 size = MIN(1024 - pv->current_entry.pos - 1, len );
430 memcpy(q, line_buffer, size);
431 pv->current_entry.pos += size;
432 pv->current_entry.text[pv->current_entry.pos] = '\0';
433 break;
436 case k_state_potential_new_entry:
438 char *endpoint;
439 long entry_number;
440 hb_buffer_t *buffer = NULL;
442 * Is this really new next entry begin?
444 entry_number = strtol(line_buffer, &endpoint, 10);
445 if (!resync && (*line_buffer == '\n' || *line_buffer == '\r'))
448 * Well.. looks like we are in the wrong mode.. lets add the
449 * newline we misinterpreted...
451 strncat(pv->current_entry.text, " ", sizeof(pv->current_entry.text) - strlen(pv->current_entry.text) - 1);
452 pv->current_state = k_state_inEntry_or_new;
453 continue;
455 if (endpoint == line_buffer ||
456 (endpoint && *endpoint != '\n' && *endpoint != '\r'))
459 * Well.. looks like we are in the wrong mode.. lets add the
460 * line we misinterpreted...
462 if (!resync)
464 reprocess = 1;
465 pv->current_state = k_state_inEntry;
467 continue;
470 * We found the next entry - or a really rare error condition
472 pv->last_entry_number = entry_number;
473 resync = 0;
474 if (*pv->current_entry.text != '\0')
476 long length;
477 char *p, *q;
478 int line = 1;
479 uint64_t start_time = ( pv->current_entry.start +
480 pv->subtitle->config.offset ) * 90;
481 uint64_t stop_time = ( pv->current_entry.stop +
482 pv->subtitle->config.offset ) * 90;
484 if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
486 hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time);
487 memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
488 ++(pv->number_of_entries);
489 pv->current_state = k_state_timecode;
490 continue;
493 length = strlen( pv->current_entry.text );
495 for (q = p = pv->current_entry.text; *p != '\0'; p++)
497 if (*p == '\n' || *p == '\r')
499 if (*(p + 1) == '\n' || *(p + 1) == '\r' ||
500 *(p + 1) == '\0')
502 // followed by line break or last character, skip it
503 length--;
504 continue;
506 else if (line == 1)
508 // replace '\r' with '\n'
509 *q = '\n';
510 line = 2;
512 else
514 // all subtitles on two lines tops
515 // replace line breaks with spaces
516 *q = ' ';
518 q++;
520 else
522 *q = *p;
523 q++;
526 *q = '\0';
528 buffer = hb_buffer_init( length + 1 );
530 if( buffer )
532 buffer->s.start = start_time - pv->start_time;
533 buffer->s.stop = stop_time - pv->start_time;
535 memcpy( buffer->data, pv->current_entry.text, length + 1 );
538 memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
539 ++(pv->number_of_entries);
540 pv->current_state = k_state_timecode;
541 if( buffer )
543 return buffer;
545 continue;
550 hb_buffer_t *buffer = NULL;
551 if (*pv->current_entry.text != '\0')
553 long length;
554 char *p, *q;
555 int line = 1;
556 uint64_t start_time = ( pv->current_entry.start +
557 pv->subtitle->config.offset ) * 90;
558 uint64_t stop_time = ( pv->current_entry.stop +
559 pv->subtitle->config.offset ) * 90;
561 if( !( start_time > pv->start_time && stop_time < pv->stop_time ) )
563 hb_deep_log( 3, "Discarding SRT at time start %"PRId64", stop %"PRId64, start_time, stop_time);
564 memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
565 return NULL;
568 length = strlen( pv->current_entry.text );
570 for (q = p = pv->current_entry.text; *p != '\0'; p++)
572 if (*p == '\n' || *p == '\r')
574 if (*(p + 1) == '\n' || *(p + 1) == '\r' || *(p + 1) == '\0')
576 // followed by line break or last character, skip it
577 length--;
578 continue;
580 else if (line == 1)
582 // replace '\r' with '\n'
583 *q = '\n';
584 line = 2;
586 else
588 // all subtitles on two lines tops
589 // replace line breaks with spaces
590 *q = ' ';
592 q++;
594 else
596 *q = *p;
597 q++;
600 *q = '\0';
602 buffer = hb_buffer_init( length + 1 );
604 if( buffer )
606 buffer->s.start = start_time - pv->start_time;
607 buffer->s.stop = stop_time - pv->start_time;
609 memcpy( buffer->data, pv->current_entry.text, length + 1 );
612 memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
613 if( buffer )
615 return buffer;
618 return NULL;
621 static int decsrtInit( hb_work_object_t * w, hb_job_t * job )
623 int retval = 1;
624 hb_work_private_t * pv;
625 hb_buffer_t *buffer;
626 int i;
627 hb_chapter_t * chapter;
629 pv = calloc( 1, sizeof( hb_work_private_t ) );
630 if( pv )
632 w->private_data = pv;
634 pv->job = job;
636 buffer = hb_buffer_init( 0 );
637 hb_fifo_push( w->fifo_in, buffer);
639 pv->current_state = k_state_potential_new_entry;
640 pv->number_of_entries = 0;
641 pv->last_entry_number = 0;
642 pv->current_time = 0;
643 pv->subtitle = w->subtitle;
646 * Figure out the start and stop times from teh chapters being
647 * encoded - drop subtitle not in this range.
649 pv->start_time = 0;
650 for( i = 1; i < job->chapter_start; ++i )
652 chapter = hb_list_item( job->list_chapter, i - 1 );
653 if( chapter )
655 pv->start_time += chapter->duration;
656 } else {
657 hb_error( "Could not locate chapter %d for SRT start time", i );
658 retval = 0;
661 pv->stop_time = pv->start_time;
662 for( i = job->chapter_start; i <= job->chapter_end; ++i )
664 chapter = hb_list_item( job->list_chapter, i - 1 );
665 if( chapter )
667 pv->stop_time += chapter->duration;
668 } else {
669 hb_error( "Could not locate chapter %d for SRT start time", i );
670 retval = 0;
674 hb_deep_log( 3, "SRT Start time %"PRId64", stop time %"PRId64, pv->start_time, pv->stop_time);
676 pv->iconv_context = iconv_open( "utf-8", pv->subtitle->config.src_codeset );
679 if( pv->iconv_context == (iconv_t) -1 )
681 hb_error("Could not open the iconv library with those file formats\n");
683 } else {
684 memset( &pv->current_entry, 0, sizeof( srt_entry_t ) );
686 pv->file = hb_fopen(w->subtitle->config.src_filename, "r");
688 if( !pv->file )
690 hb_error("Could not open the SRT subtitle file '%s'\n",
691 w->subtitle->config.src_filename);
692 } else {
693 retval = 0;
697 if (!retval)
699 // Generate generic SSA Script Info.
700 int height = job->title->geometry.height - job->crop[0] - job->crop[1];
701 int width = job->title->geometry.width - job->crop[2] - job->crop[3];
702 hb_subtitle_add_ssa_header(w->subtitle, "Arial",
703 .066 * job->title->geometry.height,
704 width, height);
706 return retval;
709 static int decsrtWork( hb_work_object_t * w, hb_buffer_t ** buf_in,
710 hb_buffer_t ** buf_out )
712 hb_work_private_t * pv = w->private_data;
713 hb_buffer_t * in = *buf_in;
714 hb_buffer_t * out = NULL;
716 out = srt_read( pv );
717 if( out )
719 hb_srt_to_ssa(out, ++pv->line);
722 * Keep a buffer in our input fifo so that we get run.
724 hb_fifo_push( w->fifo_in, in);
725 *buf_in = NULL;
726 *buf_out = out;
727 } else {
728 *buf_out = NULL;
729 return HB_WORK_OK;
732 return HB_WORK_OK;
735 static void decsrtClose( hb_work_object_t * w )
737 hb_work_private_t * pv = w->private_data;
738 fclose( pv->file );
739 iconv_close(pv->iconv_context);
740 free( w->private_data );
743 hb_work_object_t hb_decsrtsub =
745 WORK_DECSRTSUB,
746 "SRT Subtitle Decoder",
747 decsrtInit,
748 decsrtWork,
749 decsrtClose