re_search_internal: Avoid overflow in computing re_malloc buffer size
[glibc.git] / libio / iofwide.c
bloba9936687ddb78b3615f5f95e6dfe672eb2fd6658
1 /* Copyright (C) 1999-2003, 2005 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA.
19 As a special exception, if you link the code in this file with
20 files compiled with a GNU compiler to produce an executable,
21 that does not cause the resulting executable to be covered by
22 the GNU Lesser General Public License. This exception does not
23 however invalidate any other reasons why the executable file
24 might be covered by the GNU Lesser General Public License.
25 This exception applies to code released by its copyright holders
26 in files containing the exception. */
28 #include <libioP.h>
29 #ifdef _LIBC
30 # include <dlfcn.h>
31 # include <wchar.h>
32 #endif
33 #include <assert.h>
34 #include <stdlib.h>
35 #include <string.h>
37 #ifdef _LIBC
38 # include <langinfo.h>
39 # include <locale/localeinfo.h>
40 # include <wcsmbs/wcsmbsload.h>
41 # include <iconv/gconv_int.h>
42 # include <shlib-compat.h>
43 # include <sysdep.h>
44 #endif
47 /* Prototypes of libio's codecvt functions. */
48 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
49 __mbstate_t *statep,
50 const wchar_t *from_start,
51 const wchar_t *from_end,
52 const wchar_t **from_stop, char *to_start,
53 char *to_end, char **to_stop);
54 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
55 __mbstate_t *statep, char *to_start,
56 char *to_end, char **to_stop);
57 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
58 __mbstate_t *statep,
59 const char *from_start,
60 const char *from_end,
61 const char **from_stop, wchar_t *to_start,
62 wchar_t *to_end, wchar_t **to_stop);
63 static int do_encoding (struct _IO_codecvt *codecvt);
64 static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
65 const char *from_start,
66 const char *from_end, _IO_size_t max);
67 static int do_max_length (struct _IO_codecvt *codecvt);
68 static int do_always_noconv (struct _IO_codecvt *codecvt);
71 /* The functions used in `codecvt' for libio are always the same. */
72 const struct _IO_codecvt __libio_codecvt =
74 .__codecvt_destr = NULL, /* Destructor, never used. */
75 .__codecvt_do_out = do_out,
76 .__codecvt_do_unshift = do_unshift,
77 .__codecvt_do_in = do_in,
78 .__codecvt_do_encoding = do_encoding,
79 .__codecvt_do_always_noconv = do_always_noconv,
80 .__codecvt_do_length = do_length,
81 .__codecvt_do_max_length = do_max_length
85 #ifdef _LIBC
86 const struct __gconv_trans_data __libio_translit attribute_hidden =
88 .__trans_fct = __gconv_transliterate
90 #endif
93 /* Return orientation of stream. If mode is nonzero try to change
94 the orientation first. */
95 #undef _IO_fwide
96 int
97 _IO_fwide (fp, mode)
98 _IO_FILE *fp;
99 int mode;
101 /* Normalize the value. */
102 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
104 #if defined SHARED && defined _LIBC \
105 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
106 if (__builtin_expect (&_IO_stdin_used == NULL, 0)
107 && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr))
108 /* This is for a stream in the glibc 2.0 format. */
109 return -1;
110 #endif
112 /* The orientation already has been determined. */
113 if (fp->_mode != 0
114 /* Or the caller simply wants to know about the current orientation. */
115 || mode == 0)
116 return fp->_mode;
118 /* Set the orientation appropriately. */
119 if (mode > 0)
121 struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
123 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
124 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
126 /* Get the character conversion functions based on the currently
127 selected locale for LC_CTYPE. */
128 #ifdef _LIBC
130 /* Clear the state. We start all over again. */
131 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
132 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
134 struct gconv_fcts fcts;
135 __wcsmbs_clone_conv (&fcts);
136 assert (fcts.towc_nsteps == 1);
137 assert (fcts.tomb_nsteps == 1);
139 /* The functions are always the same. */
140 *cc = __libio_codecvt;
142 cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
143 cc->__cd_in.__cd.__steps = fcts.towc;
145 cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
146 cc->__cd_in.__cd.__data[0].__internal_use = 1;
147 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
148 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
150 /* XXX For now no transliteration. */
151 cc->__cd_in.__cd.__data[0].__trans = NULL;
153 cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
154 cc->__cd_out.__cd.__steps = fcts.tomb;
156 cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
157 cc->__cd_out.__cd.__data[0].__internal_use = 1;
158 cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
159 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
161 /* And now the transliteration. */
162 cc->__cd_out.__cd.__data[0].__trans
163 = (struct __gconv_trans_data *) &__libio_translit;
165 #else
166 # ifdef _GLIBCPP_USE_WCHAR_T
168 /* Determine internal and external character sets.
170 XXX For now we make our life easy: we assume a fixed internal
171 encoding (as most sane systems have; hi HP/UX!). If somebody
172 cares about systems which changing internal charsets they
173 should come up with a solution for the determination of the
174 currently used internal character set. */
175 const char *internal_ccs = _G_INTERNAL_CCS;
176 const char *external_ccs = NULL;
178 # ifdef HAVE_NL_LANGINFO
179 external_ccs = nl_langinfo (CODESET);
180 # endif
181 if (external_ccs == NULL)
182 external_ccs = "ISO-8859-1";
184 cc->__cd_in = iconv_open (internal_ccs, external_ccs);
185 if (cc->__cd_in != (iconv_t) -1)
186 cc->__cd_out = iconv_open (external_ccs, internal_ccs);
188 if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
190 if (cc->__cd_in != (iconv_t) -1)
191 iconv_close (cc->__cd_in);
192 /* XXX */
193 abort ();
196 # else
197 # error "somehow determine this from LC_CTYPE"
198 # endif
199 #endif
201 /* From now on use the wide character callback functions. */
202 ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
204 /* One last twist: we get the current stream position. The wide
205 char streams have much more problems with not knowing the
206 current position and so we should disable the optimization
207 which allows the functions without knowing the position. */
208 fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur);
211 /* Set the mode now. */
212 fp->_mode = mode;
214 return mode;
218 static enum __codecvt_result
219 do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
220 const wchar_t *from_start, const wchar_t *from_end,
221 const wchar_t **from_stop, char *to_start, char *to_end,
222 char **to_stop)
224 enum __codecvt_result result;
226 #ifdef _LIBC
227 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
228 int status;
229 size_t dummy;
230 const unsigned char *from_start_copy = (unsigned char *) from_start;
232 codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
233 codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
234 codecvt->__cd_out.__cd.__data[0].__statep = statep;
236 __gconv_fct fct = gs->__fct;
237 #ifdef PTR_DEMANGLE
238 if (gs->__shlib_handle != NULL)
239 PTR_DEMANGLE (fct);
240 #endif
242 status = DL_CALL_FCT (fct,
243 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
244 (const unsigned char *) from_end, NULL,
245 &dummy, 0, 0));
247 *from_stop = (wchar_t *) from_start_copy;
248 *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
250 switch (status)
252 case __GCONV_OK:
253 case __GCONV_EMPTY_INPUT:
254 result = __codecvt_ok;
255 break;
257 case __GCONV_FULL_OUTPUT:
258 case __GCONV_INCOMPLETE_INPUT:
259 result = __codecvt_partial;
260 break;
262 default:
263 result = __codecvt_error;
264 break;
266 #else
267 # ifdef _GLIBCPP_USE_WCHAR_T
268 size_t res;
269 const char *from_start_copy = (const char *) from_start;
270 size_t from_len = from_end - from_start;
271 char *to_start_copy = to_start;
272 size_t to_len = to_end - to_start;
273 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
274 &to_start_copy, &to_len);
276 if (res == 0 || from_len == 0)
277 result = __codecvt_ok;
278 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
279 result = __codecvt_partial;
280 else
281 result = __codecvt_error;
283 # else
284 /* Decide what to do. */
285 result = __codecvt_error;
286 # endif
287 #endif
289 return result;
293 static enum __codecvt_result
294 do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
295 char *to_start, char *to_end, char **to_stop)
297 enum __codecvt_result result;
299 #ifdef _LIBC
300 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
301 int status;
302 size_t dummy;
304 codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
305 codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
306 codecvt->__cd_out.__cd.__data[0].__statep = statep;
308 __gconv_fct fct = gs->__fct;
309 #ifdef PTR_DEMANGLE
310 if (gs->__shlib_handle != NULL)
311 PTR_DEMANGLE (fct);
312 #endif
314 status = DL_CALL_FCT (fct,
315 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
316 NULL, &dummy, 1, 0));
318 *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
320 switch (status)
322 case __GCONV_OK:
323 case __GCONV_EMPTY_INPUT:
324 result = __codecvt_ok;
325 break;
327 case __GCONV_FULL_OUTPUT:
328 case __GCONV_INCOMPLETE_INPUT:
329 result = __codecvt_partial;
330 break;
332 default:
333 result = __codecvt_error;
334 break;
336 #else
337 # ifdef _GLIBCPP_USE_WCHAR_T
338 size_t res;
339 char *to_start_copy = (char *) to_start;
340 size_t to_len = to_end - to_start;
342 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
344 if (res == 0)
345 result = __codecvt_ok;
346 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
347 result = __codecvt_partial;
348 else
349 result = __codecvt_error;
350 # else
351 /* Decide what to do. */
352 result = __codecvt_error;
353 # endif
354 #endif
356 return result;
360 static enum __codecvt_result
361 do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
362 const char *from_start, const char *from_end, const char **from_stop,
363 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
365 enum __codecvt_result result;
367 #ifdef _LIBC
368 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
369 int status;
370 size_t dummy;
371 const unsigned char *from_start_copy = (unsigned char *) from_start;
373 codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_start;
374 codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) to_end;
375 codecvt->__cd_in.__cd.__data[0].__statep = statep;
377 __gconv_fct fct = gs->__fct;
378 #ifdef PTR_DEMANGLE
379 if (gs->__shlib_handle != NULL)
380 PTR_DEMANGLE (fct);
381 #endif
383 status = DL_CALL_FCT (fct,
384 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
385 (const unsigned char *) from_end, NULL,
386 &dummy, 0, 0));
388 *from_stop = (const char *) from_start_copy;
389 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
391 switch (status)
393 case __GCONV_OK:
394 case __GCONV_EMPTY_INPUT:
395 result = __codecvt_ok;
396 break;
398 case __GCONV_FULL_OUTPUT:
399 case __GCONV_INCOMPLETE_INPUT:
400 result = __codecvt_partial;
401 break;
403 default:
404 result = __codecvt_error;
405 break;
407 #else
408 # ifdef _GLIBCPP_USE_WCHAR_T
409 size_t res;
410 const char *from_start_copy = (const char *) from_start;
411 size_t from_len = from_end - from_start;
412 char *to_start_copy = (char *) from_start;
413 size_t to_len = to_end - to_start;
415 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
416 &to_start_copy, &to_len);
418 if (res == 0)
419 result = __codecvt_ok;
420 else if (to_len == 0)
421 result = __codecvt_partial;
422 else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
423 result = __codecvt_partial;
424 else
425 result = __codecvt_error;
426 # else
427 /* Decide what to do. */
428 result = __codecvt_error;
429 # endif
430 #endif
432 return result;
436 static int
437 do_encoding (struct _IO_codecvt *codecvt)
439 #ifdef _LIBC
440 /* See whether the encoding is stateful. */
441 if (codecvt->__cd_in.__cd.__steps[0].__stateful)
442 return -1;
443 /* Fortunately not. Now determine the input bytes for the conversion
444 necessary for each wide character. */
445 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
446 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
447 /* Not a constant value. */
448 return 0;
450 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
451 #else
452 /* Worst case scenario. */
453 return -1;
454 #endif
458 static int
459 do_always_noconv (struct _IO_codecvt *codecvt)
461 return 0;
465 static int
466 do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
467 const char *from_start, const char *from_end, _IO_size_t max)
469 int result;
470 #ifdef _LIBC
471 const unsigned char *cp = (const unsigned char *) from_start;
472 wchar_t to_buf[max];
473 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
474 int status;
475 size_t dummy;
477 codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_buf;
478 codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) &to_buf[max];
479 codecvt->__cd_in.__cd.__data[0].__statep = statep;
481 __gconv_fct fct = gs->__fct;
482 #ifdef PTR_DEMANGLE
483 if (gs->__shlib_handle != NULL)
484 PTR_DEMANGLE (fct);
485 #endif
487 status = DL_CALL_FCT (fct,
488 (gs, codecvt->__cd_in.__cd.__data, &cp,
489 (const unsigned char *) from_end, NULL,
490 &dummy, 0, 0));
492 result = cp - (const unsigned char *) from_start;
493 #else
494 # ifdef _GLIBCPP_USE_WCHAR_T
495 const char *from_start_copy = (const char *) from_start;
496 size_t from_len = from_end - from_start;
497 wchar_t to_buf[max];
498 size_t res;
499 char *to_start = (char *) to_buf;
501 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
502 &to_start, &max);
504 result = from_start_copy - (char *) from_start;
505 # else
506 /* Decide what to do. */
507 result = 0;
508 # endif
509 #endif
511 return result;
515 static int
516 do_max_length (struct _IO_codecvt *codecvt)
518 #ifdef _LIBC
519 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
520 #else
521 return MB_CUR_MAX;
522 #endif