3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
17 #include "decsrtsub.h"
19 struct start_and_end
{
20 unsigned long start
, end
;
26 k_state_inEntry_or_new
,
27 k_state_potential_new_entry
,
31 typedef struct srt_entry_s
{
32 long offset
, duration
;
39 * Store all context in the work private struct,
41 struct hb_work_private_s
52 unsigned long current_time
;
53 unsigned long number_of_entries
;
54 unsigned long last_entry_number
;
55 unsigned long current_state
;
56 srt_entry_t current_entry
;
57 iconv_t
*iconv_context
;
58 hb_subtitle_t
*subtitle
;
59 uint64_t start_time
; // In HB time
60 uint64_t stop_time
; // In HB time
62 int line
; // SSA line number
65 static char* srt_markup_to_ssa(char *srt
, int *len
)
72 if (srt
[0] != '<' && srt
[0] != '{')
80 if (srt
[1] == 'i' && srt
[2] == terminator
)
83 return hb_strdup_printf("{\\i1}");
85 else if (srt
[1] == 'b' && srt
[2] == terminator
)
88 return hb_strdup_printf("{\\b1}");
90 else if (srt
[1] == 'u' && srt
[2] == terminator
)
93 return hb_strdup_printf("{\\u1}");
95 else if (srt
[1] == '/' && srt
[2] == 'i' && srt
[3] == terminator
)
98 return hb_strdup_printf("{\\i0}");
100 else if (srt
[1] == '/' && srt
[2] == 'b' && srt
[3] == terminator
)
103 return hb_strdup_printf("{\\b0}");
105 else if (srt
[1] == '/' && srt
[2] == 'u' && srt
[3] == terminator
)
108 return hb_strdup_printf("{\\u0}");
110 else if (srt
[0] == '<' && !strncmp(srt
+ 1, "font", 4))
113 match
= sscanf(srt
+ 1, "font color=\"%39[^\"]\">", color
);
118 while (srt
[*len
] != '>') (*len
)++;
121 rgb
= strtol(color
+ 1, NULL
, 16);
123 rgb
= hb_rgb_lookup_by_name(color
);
124 return hb_strdup_printf("{\\1c&H%X&}", HB_RGB_TO_BGR(rgb
));
126 else if (srt
[0] == '<' && srt
[1] == '/' && !strncmp(srt
+ 2, "font", 4) &&
130 return hb_strdup_printf("{\\1c&HFFFFFF&}");
136 void hb_srt_to_ssa(hb_buffer_t
*sub_in
, int line
)
138 if (sub_in
->size
== 0)
141 // null terminate input if not already terminated
142 if (sub_in
->data
[sub_in
->size
-1] != 0)
144 hb_buffer_realloc(sub_in
, ++sub_in
->size
);
145 sub_in
->data
[sub_in
->size
- 1] = 0;
147 char * srt
= (char*)sub_in
->data
;
148 // SSA markup expands a little over SRT, so allocate a bit of extra
149 // space. More will be realloc'd if needed.
150 hb_buffer_t
* sub
= hb_buffer_init(sub_in
->size
+ 80);
151 char * ssa
, *ssa_markup
;
152 int skip
, len
, pos
, ii
;
154 // Exchange data between input sub and new ssa_sub
155 // After this, sub_in contains ssa data
156 hb_buffer_swap_copy(sub_in
, sub
);
157 ssa
= (char*)sub_in
->data
;
159 sprintf((char*)sub_in
->data
, "%d,,Default,,0,0,0,,", line
);
160 pos
= strlen((char*)sub_in
->data
);
163 while (srt
[ii
] != '\0')
165 if ((ssa_markup
= srt_markup_to_ssa(srt
+ ii
, &skip
)) != NULL
)
167 len
= strlen(ssa_markup
);
168 hb_buffer_realloc(sub_in
, pos
+ len
+ 1);
169 // After realloc, sub_in->data may change
170 ssa
= (char*)sub_in
->data
;
171 sprintf(ssa
+ pos
, "%s", ssa_markup
);
178 hb_buffer_realloc(sub_in
, pos
+ 4);
179 // After realloc, sub_in->data may change
180 ssa
= (char*)sub_in
->data
;
191 else if (srt
[ii
] == '\n')
199 ssa
[pos
++] = srt
[ii
++];
204 sub_in
->size
= pos
+ 1;
205 hb_buffer_close(&sub
);
209 read_time_from_string( const char* timeString
, struct start_and_end
*result
)
211 // for ex. 00:00:15,248 --> 00:00:16,545
213 long houres1
, minutes1
, seconds1
, milliseconds1
,
214 houres2
, minutes2
, seconds2
, milliseconds2
;
217 scanned
= sscanf(timeString
, "%ld:%ld:%ld,%ld --> %ld:%ld:%ld,%ld\n",
218 &houres1
, &minutes1
, &seconds1
, &milliseconds1
,
219 &houres2
, &minutes2
, &seconds2
, &milliseconds2
);
225 milliseconds1
+ seconds1
*1000 + minutes1
*60*1000 + houres1
*60*60*1000;
227 milliseconds2
+ seconds2
*1000 + minutes2
*60*1000 + houres2
*60*60*1000;
231 static int utf8_fill( hb_work_private_t
* pv
)
233 int bytes
, conversion
= 0;
236 /* Align utf8 data to beginning of the buffer so that we can
237 * fill the buffer to its maximum */
238 memmove( pv
->utf8_buf
, pv
->utf8_buf
+ pv
->utf8_pos
, pv
->utf8_end
- pv
->utf8_pos
);
239 pv
->utf8_end
-= pv
->utf8_pos
;
241 out_size
= 2048 - pv
->utf8_end
;
245 size_t in_size
, retval
;
247 if( pv
->end
== pv
->pos
)
249 bytes
= fread( pv
->buf
, 1, 1024, pv
->file
);
261 p
= pv
->buf
+ pv
->pos
;
262 q
= pv
->utf8_buf
+ pv
->utf8_end
;
263 in_size
= pv
->end
- pv
->pos
;
265 retval
= iconv( pv
->iconv_context
, &p
, &in_size
, &q
, &out_size
);
266 if( q
!= pv
->utf8_buf
+ pv
->utf8_pos
)
269 pv
->utf8_end
= q
- pv
->utf8_buf
;
270 pv
->pos
= p
- pv
->buf
;
272 if ( !pv
->utf8_bom_skipped
)
274 uint8_t *buf
= (uint8_t*)pv
->utf8_buf
;
275 if (buf
[0] == 0xef && buf
[1] == 0xbb && buf
[2] == 0xbf)
279 pv
->utf8_bom_skipped
= 1;
282 if( ( retval
== -1 ) && ( errno
== EINVAL
) )
284 /* Incomplete multibyte sequence, read more data */
285 memmove( pv
->buf
, p
, pv
->end
- pv
->pos
);
288 bytes
= fread( pv
->buf
+ pv
->end
, 1, 1024 - pv
->end
, pv
->file
);
297 } else if ( ( retval
== -1 ) && ( errno
== EILSEQ
) )
299 hb_error( "Invalid byte for codeset in input, discard byte" );
300 /* Try the next byte of the input */
302 } else if ( ( retval
== -1 ) && ( errno
== E2BIG
) )
311 static int get_line( hb_work_private_t
* pv
, char *buf
, int size
)
316 // clear remnants of the previous line before progessing a new one
317 memset(buf
, '\0', size
);
319 /* Find newline in converted UTF-8 buffer */
320 for( i
= 0; i
< size
- 1; i
++ )
322 if( pv
->utf8_pos
>= pv
->utf8_end
)
324 if( !utf8_fill( pv
) )
332 c
= pv
->utf8_buf
[pv
->utf8_pos
++];
346 * Read the SRT file and put the entries into the subtitle fifo for all to read
348 static hb_buffer_t
*srt_read( hb_work_private_t
*pv
)
350 char line_buffer
[1024];
351 int reprocess
= 0, resync
= 0;
358 while( reprocess
|| get_line( pv
, line_buffer
, sizeof( line_buffer
) ) )
361 switch (pv
->current_state
)
363 case k_state_timecode
:
365 struct start_and_end timing
;
368 result
= read_time_from_string( line_buffer
, &timing
);
372 pv
->current_state
= k_state_potential_new_entry
;
375 pv
->current_entry
.duration
= timing
.end
- timing
.start
;
376 pv
->current_entry
.offset
= timing
.start
- pv
->current_time
;
378 pv
->current_time
= timing
.end
;
380 pv
->current_entry
.start
= timing
.start
;
381 pv
->current_entry
.stop
= timing
.end
;
383 pv
->current_state
= k_state_inEntry
;
387 case k_state_inEntry_or_new
:
391 * Is this really new next entry begin?
392 * Look for entry number.
394 strtol(line_buffer
, &endpoint
, 10);
395 if (endpoint
== line_buffer
||
396 (endpoint
&& *endpoint
!= '\n' && *endpoint
!= '\r'))
399 * Doesn't resemble an entry number
400 * must still be in an entry
405 pv
->current_state
= k_state_inEntry
;
410 pv
->current_state
= k_state_potential_new_entry
;
414 case k_state_inEntry
:
419 // If the current line is empty, we assume this is the
420 // seperation betwene two entries. In case we are wrong,
421 // the mistake is corrected in the next state.
422 if (strcmp(line_buffer
, "\n") == 0 || strcmp(line_buffer
, "\r\n") == 0) {
423 pv
->current_state
= k_state_potential_new_entry
;
427 q
= pv
->current_entry
.text
+ pv
->current_entry
.pos
;
428 len
= strlen( line_buffer
);
429 size
= MIN(1024 - pv
->current_entry
.pos
- 1, len
);
430 memcpy(q
, line_buffer
, size
);
431 pv
->current_entry
.pos
+= size
;
432 pv
->current_entry
.text
[pv
->current_entry
.pos
] = '\0';
436 case k_state_potential_new_entry
:
440 hb_buffer_t
*buffer
= NULL
;
442 * Is this really new next entry begin?
444 entry_number
= strtol(line_buffer
, &endpoint
, 10);
445 if (!resync
&& (*line_buffer
== '\n' || *line_buffer
== '\r'))
448 * Well.. looks like we are in the wrong mode.. lets add the
449 * newline we misinterpreted...
451 strncat(pv
->current_entry
.text
, " ", sizeof(pv
->current_entry
.text
) - strlen(pv
->current_entry
.text
) - 1);
452 pv
->current_state
= k_state_inEntry_or_new
;
455 if (endpoint
== line_buffer
||
456 (endpoint
&& *endpoint
!= '\n' && *endpoint
!= '\r'))
459 * Well.. looks like we are in the wrong mode.. lets add the
460 * line we misinterpreted...
465 pv
->current_state
= k_state_inEntry
;
470 * We found the next entry - or a really rare error condition
472 pv
->last_entry_number
= entry_number
;
474 if (*pv
->current_entry
.text
!= '\0')
479 uint64_t start_time
= ( pv
->current_entry
.start
+
480 pv
->subtitle
->config
.offset
) * 90;
481 uint64_t stop_time
= ( pv
->current_entry
.stop
+
482 pv
->subtitle
->config
.offset
) * 90;
484 if( !( start_time
> pv
->start_time
&& stop_time
< pv
->stop_time
) )
486 hb_deep_log( 3, "Discarding SRT at time start %"PRId64
", stop %"PRId64
, start_time
, stop_time
);
487 memset( &pv
->current_entry
, 0, sizeof( srt_entry_t
) );
488 ++(pv
->number_of_entries
);
489 pv
->current_state
= k_state_timecode
;
493 length
= strlen( pv
->current_entry
.text
);
495 for (q
= p
= pv
->current_entry
.text
; *p
!= '\0'; p
++)
497 if (*p
== '\n' || *p
== '\r')
499 if (*(p
+ 1) == '\n' || *(p
+ 1) == '\r' ||
502 // followed by line break or last character, skip it
508 // replace '\r' with '\n'
514 // all subtitles on two lines tops
515 // replace line breaks with spaces
528 buffer
= hb_buffer_init( length
+ 1 );
532 buffer
->s
.start
= start_time
- pv
->start_time
;
533 buffer
->s
.stop
= stop_time
- pv
->start_time
;
535 memcpy( buffer
->data
, pv
->current_entry
.text
, length
+ 1 );
538 memset( &pv
->current_entry
, 0, sizeof( srt_entry_t
) );
539 ++(pv
->number_of_entries
);
540 pv
->current_state
= k_state_timecode
;
550 hb_buffer_t
*buffer
= NULL
;
551 if (*pv
->current_entry
.text
!= '\0')
556 uint64_t start_time
= ( pv
->current_entry
.start
+
557 pv
->subtitle
->config
.offset
) * 90;
558 uint64_t stop_time
= ( pv
->current_entry
.stop
+
559 pv
->subtitle
->config
.offset
) * 90;
561 if( !( start_time
> pv
->start_time
&& stop_time
< pv
->stop_time
) )
563 hb_deep_log( 3, "Discarding SRT at time start %"PRId64
", stop %"PRId64
, start_time
, stop_time
);
564 memset( &pv
->current_entry
, 0, sizeof( srt_entry_t
) );
568 length
= strlen( pv
->current_entry
.text
);
570 for (q
= p
= pv
->current_entry
.text
; *p
!= '\0'; p
++)
572 if (*p
== '\n' || *p
== '\r')
574 if (*(p
+ 1) == '\n' || *(p
+ 1) == '\r' || *(p
+ 1) == '\0')
576 // followed by line break or last character, skip it
582 // replace '\r' with '\n'
588 // all subtitles on two lines tops
589 // replace line breaks with spaces
602 buffer
= hb_buffer_init( length
+ 1 );
606 buffer
->s
.start
= start_time
- pv
->start_time
;
607 buffer
->s
.stop
= stop_time
- pv
->start_time
;
609 memcpy( buffer
->data
, pv
->current_entry
.text
, length
+ 1 );
612 memset( &pv
->current_entry
, 0, sizeof( srt_entry_t
) );
621 static int decsrtInit( hb_work_object_t
* w
, hb_job_t
* job
)
624 hb_work_private_t
* pv
;
627 hb_chapter_t
* chapter
;
629 pv
= calloc( 1, sizeof( hb_work_private_t
) );
632 w
->private_data
= pv
;
636 buffer
= hb_buffer_init( 0 );
637 hb_fifo_push( w
->fifo_in
, buffer
);
639 pv
->current_state
= k_state_potential_new_entry
;
640 pv
->number_of_entries
= 0;
641 pv
->last_entry_number
= 0;
642 pv
->current_time
= 0;
643 pv
->subtitle
= w
->subtitle
;
646 * Figure out the start and stop times from teh chapters being
647 * encoded - drop subtitle not in this range.
650 for( i
= 1; i
< job
->chapter_start
; ++i
)
652 chapter
= hb_list_item( job
->list_chapter
, i
- 1 );
655 pv
->start_time
+= chapter
->duration
;
657 hb_error( "Could not locate chapter %d for SRT start time", i
);
661 pv
->stop_time
= pv
->start_time
;
662 for( i
= job
->chapter_start
; i
<= job
->chapter_end
; ++i
)
664 chapter
= hb_list_item( job
->list_chapter
, i
- 1 );
667 pv
->stop_time
+= chapter
->duration
;
669 hb_error( "Could not locate chapter %d for SRT start time", i
);
674 hb_deep_log( 3, "SRT Start time %"PRId64
", stop time %"PRId64
, pv
->start_time
, pv
->stop_time
);
676 pv
->iconv_context
= iconv_open( "utf-8", pv
->subtitle
->config
.src_codeset
);
679 if( pv
->iconv_context
== (iconv_t
) -1 )
681 hb_error("Could not open the iconv library with those file formats\n");
684 memset( &pv
->current_entry
, 0, sizeof( srt_entry_t
) );
686 pv
->file
= hb_fopen(w
->subtitle
->config
.src_filename
, "r");
690 hb_error("Could not open the SRT subtitle file '%s'\n",
691 w
->subtitle
->config
.src_filename
);
699 // Generate generic SSA Script Info.
700 int height
= job
->title
->geometry
.height
- job
->crop
[0] - job
->crop
[1];
701 int width
= job
->title
->geometry
.width
- job
->crop
[2] - job
->crop
[3];
702 hb_subtitle_add_ssa_header(w
->subtitle
, "Arial",
703 .066 * job
->title
->geometry
.height
,
709 static int decsrtWork( hb_work_object_t
* w
, hb_buffer_t
** buf_in
,
710 hb_buffer_t
** buf_out
)
712 hb_work_private_t
* pv
= w
->private_data
;
713 hb_buffer_t
* in
= *buf_in
;
714 hb_buffer_t
* out
= NULL
;
716 out
= srt_read( pv
);
719 hb_srt_to_ssa(out
, ++pv
->line
);
722 * Keep a buffer in our input fifo so that we get run.
724 hb_fifo_push( w
->fifo_in
, in
);
735 static void decsrtClose( hb_work_object_t
* w
)
737 hb_work_private_t
* pv
= w
->private_data
;
739 iconv_close(pv
->iconv_context
);
740 free( w
->private_data
);
743 hb_work_object_t hb_decsrtsub
=
746 "SRT Subtitle Decoder",