codec: atsc a65: fix truncated conversions
[vlc.git] / modules / codec / atsc_a65.c
blob7fdd2959e638491a4a84cc74ed0cab1816131064
1 /*****************************************************************************
2 * atsc_a65.c : ATSC A65 decoding helpers
3 *****************************************************************************
4 * Copyright (C) 2016 - VideoLAN Authors
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *****************************************************************************/
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
24 #include <vlc_common.h>
25 #include <vlc_charset.h>
27 #include "atsc_a65.h"
29 enum
31 ATSC_A65_COMPRESSION_NONE = 0x00,
32 ATSC_A65_COMPRESSION_HUFFMAN_C4C5 = 0x01,
33 ATSC_A65_COMPRESSION_HUFFMAN_C6C7 = 0x02,
34 ATSC_A65_COMPRESSION_RESERVED_FIRST = 0x03,
35 ATSC_A65_COMPRESSION_RESERVED_LAST = 0xAF,
36 ATSC_A65_COMPRESSION_OTHER_FIRST = 0xB0,
37 ATSC_A65_COMPRESSION_OTHER_LAST = 0xFF,
40 enum
42 ATSC_A65_MODE_UNICODE_RANGE_START = 0x00, /* See reserved ranges */
43 ATSC_A65_MODE_UNICODE_RANGE_END = 0x33,
44 ATSC_A65_MODE_SCSU = 0x3E,
45 ATSC_A65_MODE_UNICODE_UTF16 = 0x3F,
46 ATSC_A65_MODE_TAIWAN_FIRST = 0x40,
47 ATSC_A65_MODE_TAIWAN_LAST = 0x41,
48 ATSC_A65_MODE_SOUTH_KOREA = 0x48,
49 ATSC_A65_MODE_OTHER_FIRST = 0xE0,
50 ATSC_A65_MODE_OTHER_LAST = 0xFE,
51 ATSC_A65_MODE_NOT_APPLICABLE = 0xFF,
54 const uint8_t ATSC_A65_MODE_RESERVED_RANGES[12] = {
55 /* start, end */
56 0x07, 0x08,
57 0x11, 0x1F,
58 0x28, 0x2F,
59 0x34, 0x3D,
60 0x42, 0x47,
61 0x49, 0xDF,
64 struct atsc_a65_handle_t
66 char *psz_lang;
67 vlc_iconv_t iconv_u16be;
70 atsc_a65_handle_t *atsc_a65_handle_New( const char *psz_lang )
72 atsc_a65_handle_t *p_handle = malloc( sizeof(*p_handle) );
73 if( p_handle )
75 if( psz_lang && strlen(psz_lang) > 2 )
76 p_handle->psz_lang = strdup( psz_lang );
77 else
78 p_handle->psz_lang = NULL;
80 p_handle->iconv_u16be = NULL;
82 return p_handle;
85 void atsc_a65_handle_Release( atsc_a65_handle_t *p_handle )
87 if( p_handle->iconv_u16be )
88 vlc_iconv_close( p_handle->iconv_u16be );
89 free( p_handle->psz_lang );
90 free( p_handle );
93 static char *enlarge_to16( const uint8_t *p_src, size_t i_src, uint8_t i_prefix )
95 if( i_src == 0 )
96 return NULL;
98 char *psz_new_allocated = malloc( i_src * 2 + 1 );
99 char *psz_new = psz_new_allocated;
101 if( psz_new )
103 memset( psz_new, i_prefix, i_src * 2 );
104 psz_new[ i_src * 2 ] = 0;
105 while( i_src-- )
107 psz_new[1] = p_src[0];
108 p_src++;
109 psz_new += 2;
112 return psz_new_allocated;
115 static bool convert_encoding_set( atsc_a65_handle_t *p_handle,
116 const uint8_t *p_src, size_t i_src,
117 char **ppsz_merg, size_t *pi_mergmin1,
118 uint8_t i_mode )
120 char *psz_dest = *ppsz_merg;
121 size_t i_mergmin1 = *pi_mergmin1;
122 bool b_ret = true;
124 if( i_src == 0 )
125 return NULL;
127 /* First exclude reserved ranges */
128 for( unsigned i=0; i<12; i+=2 )
130 if( i_mode >= ATSC_A65_MODE_RESERVED_RANGES[i] &&
131 i_mode <= ATSC_A65_MODE_RESERVED_RANGES[i+1] )
132 return false;
135 if( i_mode <= ATSC_A65_MODE_UNICODE_RANGE_END ) /* 8 range prefix + 8 */
137 if( !p_handle->iconv_u16be )
139 if ( !(p_handle->iconv_u16be = vlc_iconv_open("UTF-8", "UTF-16BE")) )
140 return false;
142 else if ( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, NULL, NULL, NULL, NULL ) ) /* reset */
144 return false;
147 char *psz16 = enlarge_to16( p_src, i_src, i_mode ); /* Maybe we can skip and feed iconv 2 by 2 */
148 if( psz16 )
150 char *psz_realloc = realloc( psz_dest, i_mergmin1 + (4 * i_src) + 1 );
151 if( psz_realloc )
153 const char *p_inbuf = psz16;
154 char *p_outbuf = &psz_realloc[i_mergmin1];
155 const size_t i_outbuf_size = i_src * 4;
156 size_t i_inbuf_remain = i_src * 2;
157 size_t i_outbuf_remain = i_outbuf_size;
158 b_ret = ( VLC_ICONV_ERR != vlc_iconv( p_handle->iconv_u16be, &p_inbuf, &i_inbuf_remain,
159 &p_outbuf, &i_outbuf_remain ) );
160 psz_dest = psz_realloc;
161 i_mergmin1 += (i_outbuf_size - i_outbuf_remain);
162 *p_outbuf = '\0';
164 free( psz16 );
166 else return false;
168 else
170 /* Unsupported encodings */
171 return false;
174 *ppsz_merg = psz_dest;
175 *pi_mergmin1 = i_mergmin1;
176 return b_ret;
179 #define BUF_ADVANCE(n) p_buffer += n; i_buffer -= n;
181 char * atsc_a65_Decode_multiple_string( atsc_a65_handle_t *p_handle, const uint8_t *p_buffer, size_t i_buffer )
183 char *psz_res = NULL;
184 size_t i_resmin1 = 0;
186 if( i_buffer < 1 )
187 return NULL;
189 uint8_t i_nb = p_buffer[0];
190 BUF_ADVANCE(1);
192 for( ; i_nb > 0; i_nb-- )
194 if( i_buffer < 4 )
195 goto error;
197 bool b_skip = ( p_handle->psz_lang && memcmp(p_buffer, p_handle->psz_lang, 3) );
198 BUF_ADVANCE(3);
200 uint8_t i_seg = p_buffer[0];
201 BUF_ADVANCE(1);
202 for( ; i_seg > 0; i_seg-- )
204 if( i_buffer < 3 )
205 goto error;
207 const uint8_t i_compression = p_buffer[0];
208 const uint8_t i_mode = p_buffer[1];
209 const uint8_t i_bytes = p_buffer[2];
210 BUF_ADVANCE(3);
212 if( i_buffer < i_bytes )
213 goto error;
215 if( i_compression != ATSC_A65_COMPRESSION_NONE ) // TBD
217 b_skip = true;
220 if( !b_skip )
222 (void) convert_encoding_set( p_handle, p_buffer, i_bytes,
223 &psz_res, &i_resmin1, i_mode );
226 BUF_ADVANCE(i_bytes);
230 return psz_res;
232 error:
233 free( psz_res );
234 return NULL;
237 #undef BUF_ADVANCE
239 char * atsc_a65_Decode_simple_UTF16_string( atsc_a65_handle_t *p_handle, const uint8_t *p_buffer, size_t i_buffer )
241 if( i_buffer < 1 )
242 return NULL;
244 if( !p_handle->iconv_u16be )
246 if ( !(p_handle->iconv_u16be = vlc_iconv_open("UTF-8", "UTF-16BE")) )
247 return NULL;
249 else if ( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, NULL, NULL, NULL, NULL ) ) /* reset */
251 return NULL;
254 const size_t i_target_buffer = i_buffer * 3 / 2;
255 size_t i_target_remaining = i_target_buffer;
256 const char *psz_toconvert = (const char *) p_buffer;
257 char *psz_converted_end;
258 char *psz_converted = psz_converted_end = malloc( i_target_buffer );
260 if( unlikely(!psz_converted) )
261 return NULL;
263 if( VLC_ICONV_ERR == vlc_iconv( p_handle->iconv_u16be, &psz_toconvert, &i_buffer,
264 &psz_converted_end, &i_target_remaining ) )
266 free( psz_converted );
267 psz_converted = NULL;
269 else
270 psz_converted[ i_target_buffer - i_target_remaining - 1 ] = 0;
272 return psz_converted;