3 Copyright (c) 2003-2015 HandBrake Team
4 This file is part of the HandBrake source code
5 Homepage: <http://handbrake.fr/>.
6 It may be used under the terms of the GNU General Public License v2.
7 For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html
11 * Converts TX3G subtitles to UTF-8 subtitles with limited HTML-style markup (<b>, <i>, <u>).
13 * TX3G == MPEG 4, Part 17 (ISO/IEC 14496-17) == 3GPP Timed Text (26.245)
14 * A full reference to the format can be found here:
15 * http://www.3gpp.org/ftp/Specs/html-info/26245.htm
17 * @author David Foster (davidfstr)
25 struct hb_work_private_s
27 int line
; // SSA line number
36 #define MAX_MARKUP_LEN 40
37 #define SSA_PREAMBLE_LEN 24
40 uint16_t startChar
; // NOTE: indices in terms of *character* (not: byte) positions
43 uint8_t faceStyleFlags
; // FaceStyleFlag
45 uint32_t textColorRGBA
;
48 // NOTE: None of these macros check for buffer overflow
49 #define READ_U8() *pos; pos += 1;
50 #define READ_U16() (pos[0] << 8) | pos[1]; pos += 2;
51 #define READ_U32() (pos[0] << 24) | (pos[1] << 16) | (pos[2] << 8) | pos[3]; pos += 4;
52 #define READ_ARRAY(n) pos; pos += n;
53 #define SKIP_ARRAY(n) pos += n;
55 #define WRITE_CHAR(c) {dst[0]=c; dst += 1;}
57 #define FOURCC(str) ((((uint32_t) str[0]) << 24) | \
58 (((uint32_t) str[1]) << 16) | \
59 (((uint32_t) str[2]) << 8) | \
60 (((uint32_t) str[3]) << 0))
61 #define IS_10xxxxxx(c) ((c & 0xC0) == 0x80)
63 static int write_ssa_markup(char *dst
, StyleRecord
*style
)
67 sprintf(dst
, "{\\r}");
70 sprintf(dst
, "{\\i%d\\b%d\\u%d\\1c&H%X&\\1a&H%02X&}",
71 !!(style
->faceStyleFlags
& ITALIC
),
72 !!(style
->faceStyleFlags
& BOLD
),
73 !!(style
->faceStyleFlags
& UNDERLINE
),
74 HB_RGB_TO_BGR(style
->textColorRGBA
>> 8),
75 255 - (style
->textColorRGBA
& 0xFF)); // SSA alpha is inverted 0==opaque
80 static hb_buffer_t
*tx3g_decode_to_ssa(hb_work_private_t
*pv
, hb_buffer_t
*in
)
82 uint8_t *pos
= in
->data
;
83 uint8_t *end
= in
->data
+ in
->size
;
85 uint16_t numStyleRecords
= 0;
86 StyleRecord
*styleRecords
= NULL
;
89 * Parse the packet as a TX3G TextSample.
91 * Look for a single StyleBox ('styl') and read all contained StyleRecords.
92 * Ignore all other box types.
94 * NOTE: Buffer overflows on read are not checked.
96 uint16_t textLength
= READ_U16();
97 uint8_t *text
= READ_ARRAY(textLength
);
101 * Read TextSampleModifierBox
103 uint32_t size
= READ_U32();
106 size
= pos
- end
; // extends to end of packet
110 hb_log( "dectx3gsub: TextSampleModifierBox has unsupported large size" );
113 uint32_t type
= READ_U32();
114 if (type
== FOURCC("uuid"))
116 hb_log( "dectx3gsub: TextSampleModifierBox has unsupported extended type" );
120 if (type
== FOURCC("styl"))
122 // Found a StyleBox. Parse the contained StyleRecords
124 if ( numStyleRecords
!= 0 )
126 hb_log( "dectx3gsub: found additional StyleBoxes on subtitle; skipping" );
131 numStyleRecords
= READ_U16();
132 if (numStyleRecords
> 0)
133 styleRecords
= calloc(numStyleRecords
, sizeof(StyleRecord
));
136 for (i
= 0; i
< numStyleRecords
; i
++)
138 styleRecords
[i
].startChar
= READ_U16();
139 styleRecords
[i
].endChar
= READ_U16();
140 styleRecords
[i
].fontID
= READ_U16();
141 styleRecords
[i
].faceStyleFlags
= READ_U8();
142 styleRecords
[i
].fontSize
= READ_U8();
143 styleRecords
[i
].textColorRGBA
= READ_U32();
148 // Found some other kind of TextSampleModifierBox. Skip it.
154 * Copy text to output buffer, and add HTML markup for the style records
156 int maxOutputSize
= textLength
+ SSA_PREAMBLE_LEN
+ (numStyleRecords
* MAX_MARKUP_LEN
);
157 hb_buffer_t
*out
= hb_buffer_init( maxOutputSize
);
160 uint8_t *dst
= out
->data
;
165 sprintf((char*)dst
, "%d,,Default,,0,0,0,,", pv
->line
);
166 dst
+= strlen((char*)dst
);
168 for (pos
= text
, end
= text
+ textLength
; pos
< end
; pos
++)
170 if (IS_10xxxxxx(*pos
))
172 // Is a non-first byte of a multi-byte UTF-8 character
174 continue; // ...without incrementing 'charIndex'
177 if (styleIndex
< numStyleRecords
)
179 if (styleRecords
[styleIndex
].endChar
== charIndex
)
181 if (styleIndex
+ 1 >= numStyleRecords
||
182 styleRecords
[styleIndex
+1].startChar
> charIndex
)
184 dst
+= write_ssa_markup((char*)dst
, NULL
);
188 if (styleRecords
[styleIndex
].startChar
== charIndex
)
190 dst
+= write_ssa_markup((char*)dst
, &styleRecords
[styleIndex
]);
207 // No text in the subtitle. This sub is just filler, drop it.
209 hb_buffer_close(&out
);
215 // Trim output buffer to the actual amount of data written
216 out
->size
= dst
- out
->data
;
218 // Copy metadata from the input packet to the output packet
219 out
->s
.frametype
= HB_FRAME_SUBTITLE
;
220 out
->s
.start
= in
->s
.start
;
221 out
->s
.stop
= in
->s
.stop
;
236 #undef WRITE_START_TAG
239 static int dectx3gInit( hb_work_object_t
* w
, hb_job_t
* job
)
241 hb_work_private_t
* pv
;
242 pv
= calloc( 1, sizeof( hb_work_private_t
) );
245 w
->private_data
= pv
;
248 // parse w->subtitle->extradata txg3 sample description into
249 // SSA format and replace extradata.
250 // For now we just create a generic SSA Script Info.
251 int height
= job
->title
->geometry
.height
- job
->crop
[0] - job
->crop
[1];
252 int width
= job
->title
->geometry
.width
- job
->crop
[2] - job
->crop
[3];
253 hb_subtitle_add_ssa_header(w
->subtitle
, "Arial",
254 .066 * job
->title
->geometry
.height
,
260 static int dectx3gWork( hb_work_object_t
* w
, hb_buffer_t
** buf_in
,
261 hb_buffer_t
** buf_out
)
263 hb_work_private_t
* pv
= w
->private_data
;
264 hb_buffer_t
* in
= *buf_in
;
266 if ( in
->s
.stop
== 0 ) {
267 hb_log( "dectx3gsub: subtitle packet lacks duration" );
270 if (in
->s
.flags
& HB_BUF_FLAG_EOF
)
277 *buf_out
= tx3g_decode_to_ssa(pv
, in
);
282 static void dectx3gClose( hb_work_object_t
* w
)
284 free(w
->private_data
);
287 hb_work_object_t hb_dectx3gsub
=
290 "TX3G Subtitle Decoder",