Update.
[glibc.git] / libio / iofwide.c
blob225237ef6660ec30e39ca12049ff28e97db1c9d2
1 /* Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA.
19 As a special exception, if you link the code in this file with
20 files compiled with a GNU compiler to produce an executable,
21 that does not cause the resulting executable to be covered by
22 the GNU Lesser General Public License. This exception does not
23 however invalidate any other reasons why the executable file
24 might be covered by the GNU Lesser General Public License.
25 This exception applies to code released by its copyright holders
26 in files containing the exception. */
28 #include <libioP.h>
29 #ifdef _LIBC
30 # include <dlfcn.h>
31 # include <wchar.h>
32 #endif
33 #include <assert.h>
34 #include <stdlib.h>
35 #include <string.h>
37 #ifdef _LIBC
38 # include <langinfo.h>
39 # include <locale/localeinfo.h>
40 # include <wcsmbs/wcsmbsload.h>
41 # include <iconv/gconv_int.h>
42 # include <shlib-compat.h>
43 #endif
46 /* Prototypes of libio's codecvt functions. */
47 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
48 __mbstate_t *statep,
49 const wchar_t *from_start,
50 const wchar_t *from_end,
51 const wchar_t **from_stop, char *to_start,
52 char *to_end, char **to_stop);
53 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
54 __mbstate_t *statep, char *to_start,
55 char *to_end, char **to_stop);
56 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
57 __mbstate_t *statep,
58 const char *from_start,
59 const char *from_end,
60 const char **from_stop, wchar_t *to_start,
61 wchar_t *to_end, wchar_t **to_stop);
62 static int do_encoding (struct _IO_codecvt *codecvt);
63 static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
64 const char *from_start,
65 const char *from_end, _IO_size_t max);
66 static int do_max_length (struct _IO_codecvt *codecvt);
67 static int do_always_noconv (struct _IO_codecvt *codecvt);
70 /* The functions used in `codecvt' for libio are always the same. */
71 struct _IO_codecvt __libio_codecvt =
73 .__codecvt_destr = NULL, /* Destructor, never used. */
74 .__codecvt_do_out = do_out,
75 .__codecvt_do_unshift = do_unshift,
76 .__codecvt_do_in = do_in,
77 .__codecvt_do_encoding = do_encoding,
78 .__codecvt_do_always_noconv = do_always_noconv,
79 .__codecvt_do_length = do_length,
80 .__codecvt_do_max_length = do_max_length
84 #ifdef _LIBC
85 struct __gconv_trans_data __libio_translit attribute_hidden =
87 .__trans_fct = __gconv_transliterate
89 #endif
92 /* Return orientation of stream. If mode is nonzero try to change
93 the orientation first. */
94 #undef _IO_fwide
95 int
96 _IO_fwide (fp, mode)
97 _IO_FILE *fp;
98 int mode;
100 /* Normalize the value. */
101 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
103 if (mode == 0)
104 /* The caller simply wants to know about the current orientation. */
105 return fp->_mode;
107 #if defined SHARED && defined _LIBC \
108 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
109 if (__builtin_expect (&_IO_stdin_used == NULL, 0)
110 && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr))
111 /* This is for a stream in the glibc 2.0 format. */
112 return -1;
113 #endif
115 if (fp->_mode != 0)
116 /* The orientation already has been determined. */
117 return fp->_mode;
119 /* Set the orientation appropriately. */
120 if (mode > 0)
122 struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
124 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
125 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
127 /* Get the character conversion functions based on the currently
128 selected locale for LC_CTYPE. */
129 #ifdef _LIBC
131 struct gconv_fcts fcts;
133 /* Clear the state. We start all over again. */
134 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
135 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
137 __wcsmbs_clone_conv (&fcts);
138 assert (fcts.towc_nsteps == 1);
139 assert (fcts.tomb_nsteps == 1);
141 /* The functions are always the same. */
142 *cc = __libio_codecvt;
144 cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
145 cc->__cd_in.__cd.__steps = fcts.towc;
147 cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
148 cc->__cd_in.__cd.__data[0].__internal_use = 1;
149 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
150 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
152 /* XXX For now no transliteration. */
153 cc->__cd_in.__cd.__data[0].__trans = NULL;
155 cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
156 cc->__cd_out.__cd.__steps = fcts.tomb;
158 cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
159 cc->__cd_out.__cd.__data[0].__internal_use = 1;
160 cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
161 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
163 /* And now the transliteration. */
164 cc->__cd_out.__cd.__data[0].__trans = &__libio_translit;
166 #else
167 # ifdef _GLIBCPP_USE_WCHAR_T
169 /* Determine internal and external character sets.
171 XXX For now we make our life easy: we assume a fixed internal
172 encoding (as most sane systems have; hi HP/UX!). If somebody
173 cares about systems which changing internal charsets they
174 should come up with a solution for the determination of the
175 currently used internal character set. */
176 const char *internal_ccs = _G_INTERNAL_CCS;
177 const char *external_ccs = NULL;
179 # ifdef HAVE_NL_LANGINFO
180 external_ccs = nl_langinfo (CODESET);
181 # endif
182 if (external_ccs == NULL)
183 external_ccs = "ISO-8859-1";
185 cc->__cd_in = iconv_open (internal_ccs, external_ccs);
186 if (cc->__cd_in != (iconv_t) -1)
187 cc->__cd_out = iconv_open (external_ccs, internal_ccs);
189 if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
191 if (cc->__cd_in != (iconv_t) -1)
192 iconv_close (cc->__cd_in);
193 /* XXX */
194 abort ();
197 # else
198 # error "somehow determine this from LC_CTYPE"
199 # endif
200 #endif
202 /* From now on use the wide character callback functions. */
203 ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
205 /* One last twist: we get the current stream position. The wide
206 char streams have much more problems with not knowing the
207 current position and so we should disable the optimization
208 which allows the functions without knowing the position. */
209 fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur);
212 /* Set the mode now. */
213 fp->_mode = mode;
215 return mode;
219 static enum __codecvt_result
220 do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
221 const wchar_t *from_start, const wchar_t *from_end,
222 const wchar_t **from_stop, char *to_start, char *to_end,
223 char **to_stop)
225 enum __codecvt_result result;
227 #ifdef _LIBC
228 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
229 int status;
230 size_t dummy;
231 const unsigned char *from_start_copy = (unsigned char *) from_start;
233 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
234 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
235 codecvt->__cd_out.__cd.__data[0].__statep = statep;
237 status = DL_CALL_FCT (gs->__fct,
238 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
239 (const unsigned char *) from_end, NULL,
240 &dummy, 0, 0));
242 *from_stop = (wchar_t *) from_start_copy;
243 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
245 switch (status)
247 case __GCONV_OK:
248 case __GCONV_EMPTY_INPUT:
249 result = __codecvt_ok;
250 break;
252 case __GCONV_FULL_OUTPUT:
253 case __GCONV_INCOMPLETE_INPUT:
254 result = __codecvt_partial;
255 break;
257 default:
258 result = __codecvt_error;
259 break;
261 #else
262 # ifdef _GLIBCPP_USE_WCHAR_T
263 size_t res;
264 const char *from_start_copy = (const char *) from_start;
265 size_t from_len = from_end - from_start;
266 char *to_start_copy = to_start;
267 size_t to_len = to_end - to_start;
268 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
269 &to_start_copy, &to_len);
271 if (res == 0 || from_len == 0)
272 result = __codecvt_ok;
273 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
274 result = __codecvt_partial;
275 else
276 result = __codecvt_error;
278 # else
279 /* Decide what to do. */
280 result = __codecvt_error;
281 # endif
282 #endif
284 return result;
288 static enum __codecvt_result
289 do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
290 char *to_start, char *to_end, char **to_stop)
292 enum __codecvt_result result;
294 #ifdef _LIBC
295 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
296 int status;
297 size_t dummy;
299 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
300 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
301 codecvt->__cd_out.__cd.__data[0].__statep = statep;
303 status = DL_CALL_FCT (gs->__fct,
304 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
305 NULL, &dummy, 1, 0));
307 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
309 switch (status)
311 case __GCONV_OK:
312 case __GCONV_EMPTY_INPUT:
313 result = __codecvt_ok;
314 break;
316 case __GCONV_FULL_OUTPUT:
317 case __GCONV_INCOMPLETE_INPUT:
318 result = __codecvt_partial;
319 break;
321 default:
322 result = __codecvt_error;
323 break;
325 #else
326 # ifdef _GLIBCPP_USE_WCHAR_T
327 size_t res;
328 char *to_start_copy = (char *) to_start;
329 size_t to_len = to_end - to_start;
331 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
333 if (res == 0)
334 result = __codecvt_ok;
335 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
336 result = __codecvt_partial;
337 else
338 result = __codecvt_error;
339 # else
340 /* Decide what to do. */
341 result = __codecvt_error;
342 # endif
343 #endif
345 return result;
349 static enum __codecvt_result
350 do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
351 const char *from_start, const char *from_end, const char **from_stop,
352 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
354 enum __codecvt_result result;
356 #ifdef _LIBC
357 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
358 int status;
359 size_t dummy;
360 const unsigned char *from_start_copy = (unsigned char *) from_start;
362 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start;
363 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end;
364 codecvt->__cd_in.__cd.__data[0].__statep = statep;
366 status = DL_CALL_FCT (gs->__fct,
367 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
368 from_end, NULL, &dummy, 0, 0));
370 *from_stop = from_start_copy;
371 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
373 switch (status)
375 case __GCONV_OK:
376 case __GCONV_EMPTY_INPUT:
377 result = __codecvt_ok;
378 break;
380 case __GCONV_FULL_OUTPUT:
381 case __GCONV_INCOMPLETE_INPUT:
382 result = __codecvt_partial;
383 break;
385 default:
386 result = __codecvt_error;
387 break;
389 #else
390 # ifdef _GLIBCPP_USE_WCHAR_T
391 size_t res;
392 const char *from_start_copy = (const char *) from_start;
393 size_t from_len = from_end - from_start;
394 char *to_start_copy = (char *) from_start;
395 size_t to_len = to_end - to_start;
397 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
398 &to_start_copy, &to_len);
400 if (res == 0)
401 result = __codecvt_ok;
402 else if (to_len == 0)
403 result = __codecvt_partial;
404 else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
405 result = __codecvt_partial;
406 else
407 result = __codecvt_error;
408 # else
409 /* Decide what to do. */
410 result = __codecvt_error;
411 # endif
412 #endif
414 return result;
418 static int
419 do_encoding (struct _IO_codecvt *codecvt)
421 #ifdef _LIBC
422 /* See whether the encoding is stateful. */
423 if (codecvt->__cd_in.__cd.__steps[0].__stateful)
424 return -1;
425 /* Fortunately not. Now determine the input bytes for the conversion
426 necessary for each wide character. */
427 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
428 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
429 /* Not a constant value. */
430 return 0;
432 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
433 #else
434 /* Worst case scenario. */
435 return -1;
436 #endif
440 static int
441 do_always_noconv (struct _IO_codecvt *codecvt)
443 return 0;
447 static int
448 do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
449 const char *from_start, const char *from_end, _IO_size_t max)
451 int result;
452 #ifdef _LIBC
453 const unsigned char *cp = (const unsigned char *) from_start;
454 wchar_t to_buf[max];
455 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
456 int status;
457 size_t dummy;
459 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf;
460 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max];
461 codecvt->__cd_in.__cd.__data[0].__statep = statep;
463 status = DL_CALL_FCT (gs->__fct,
464 (gs, codecvt->__cd_in.__cd.__data, &cp, from_end,
465 NULL, &dummy, 0, 0));
467 result = cp - (const unsigned char *) from_start;
468 #else
469 # ifdef _GLIBCPP_USE_WCHAR_T
470 const char *from_start_copy = (const char *) from_start;
471 size_t from_len = from_end - from_start;
472 wchar_t to_buf[max];
473 size_t res;
474 char *to_start = (char *) to_buf;
476 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
477 &to_start, &max);
479 result = from_start_copy - (char *) from_start;
480 # else
481 /* Decide what to do. */
482 result = 0;
483 # endif
484 #endif
486 return result;
490 static int
491 do_max_length (struct _IO_codecvt *codecvt)
493 #ifdef _LIBC
494 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
495 #else
496 return MB_CUR_MAX;
497 #endif