Set LC_ALL=C before sed.
[glibc.git] / libio / iofwide.c
blobf85ea1e5df9af47d14263e1a299789275435dfdf
1 /* Copyright (C) 1999-2013 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
27 #include <libioP.h>
28 #ifdef _LIBC
29 # include <dlfcn.h>
30 # include <wchar.h>
31 #endif
32 #include <assert.h>
33 #include <stdlib.h>
34 #include <string.h>
36 #ifdef _LIBC
37 # include <langinfo.h>
38 # include <locale/localeinfo.h>
39 # include <wcsmbs/wcsmbsload.h>
40 # include <iconv/gconv_int.h>
41 # include <shlib-compat.h>
42 # include <sysdep.h>
43 #endif
46 /* Prototypes of libio's codecvt functions. */
47 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
48 __mbstate_t *statep,
49 const wchar_t *from_start,
50 const wchar_t *from_end,
51 const wchar_t **from_stop, char *to_start,
52 char *to_end, char **to_stop);
53 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
54 __mbstate_t *statep, char *to_start,
55 char *to_end, char **to_stop);
56 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
57 __mbstate_t *statep,
58 const char *from_start,
59 const char *from_end,
60 const char **from_stop, wchar_t *to_start,
61 wchar_t *to_end, wchar_t **to_stop);
62 static int do_encoding (struct _IO_codecvt *codecvt);
63 static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
64 const char *from_start,
65 const char *from_end, _IO_size_t max);
66 static int do_max_length (struct _IO_codecvt *codecvt);
67 static int do_always_noconv (struct _IO_codecvt *codecvt);
70 /* The functions used in `codecvt' for libio are always the same. */
71 const struct _IO_codecvt __libio_codecvt =
73 .__codecvt_destr = NULL, /* Destructor, never used. */
74 .__codecvt_do_out = do_out,
75 .__codecvt_do_unshift = do_unshift,
76 .__codecvt_do_in = do_in,
77 .__codecvt_do_encoding = do_encoding,
78 .__codecvt_do_always_noconv = do_always_noconv,
79 .__codecvt_do_length = do_length,
80 .__codecvt_do_max_length = do_max_length
84 #ifdef _LIBC
85 const struct __gconv_trans_data __libio_translit attribute_hidden =
87 .__trans_fct = __gconv_transliterate
89 #endif
92 /* Return orientation of stream. If mode is nonzero try to change
93 the orientation first. */
94 #undef _IO_fwide
95 int
96 _IO_fwide (fp, mode)
97 _IO_FILE *fp;
98 int mode;
100 /* Normalize the value. */
101 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
103 #if defined SHARED && defined _LIBC \
104 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
105 if (__builtin_expect (&_IO_stdin_used == NULL, 0)
106 && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr))
107 /* This is for a stream in the glibc 2.0 format. */
108 return -1;
109 #endif
111 /* The orientation already has been determined. */
112 if (fp->_mode != 0
113 /* Or the caller simply wants to know about the current orientation. */
114 || mode == 0)
115 return fp->_mode;
117 /* Set the orientation appropriately. */
118 if (mode > 0)
120 struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
122 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
123 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
125 /* Get the character conversion functions based on the currently
126 selected locale for LC_CTYPE. */
127 #ifdef _LIBC
129 /* Clear the state. We start all over again. */
130 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
131 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
133 struct gconv_fcts fcts;
134 __wcsmbs_clone_conv (&fcts);
135 assert (fcts.towc_nsteps == 1);
136 assert (fcts.tomb_nsteps == 1);
138 /* The functions are always the same. */
139 *cc = __libio_codecvt;
141 cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
142 cc->__cd_in.__cd.__steps = fcts.towc;
144 cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
145 cc->__cd_in.__cd.__data[0].__internal_use = 1;
146 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
147 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
149 /* XXX For now no transliteration. */
150 cc->__cd_in.__cd.__data[0].__trans = NULL;
152 cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
153 cc->__cd_out.__cd.__steps = fcts.tomb;
155 cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
156 cc->__cd_out.__cd.__data[0].__internal_use = 1;
157 cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
158 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
160 /* And now the transliteration. */
161 cc->__cd_out.__cd.__data[0].__trans
162 = (struct __gconv_trans_data *) &__libio_translit;
164 #else
165 # ifdef _GLIBCPP_USE_WCHAR_T
167 /* Determine internal and external character sets.
169 XXX For now we make our life easy: we assume a fixed internal
170 encoding (as most sane systems have; hi HP/UX!). If somebody
171 cares about systems which changing internal charsets they
172 should come up with a solution for the determination of the
173 currently used internal character set. */
174 const char *internal_ccs = _G_INTERNAL_CCS;
175 const char *external_ccs = NULL;
177 # ifdef HAVE_NL_LANGINFO
178 external_ccs = nl_langinfo (CODESET);
179 # endif
180 if (external_ccs == NULL)
181 external_ccs = "ISO-8859-1";
183 cc->__cd_in = iconv_open (internal_ccs, external_ccs);
184 if (cc->__cd_in != (iconv_t) -1)
185 cc->__cd_out = iconv_open (external_ccs, internal_ccs);
187 if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
189 if (cc->__cd_in != (iconv_t) -1)
190 iconv_close (cc->__cd_in);
191 /* XXX */
192 abort ();
195 # else
196 # error "somehow determine this from LC_CTYPE"
197 # endif
198 #endif
200 /* From now on use the wide character callback functions. */
201 ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
203 /* One last twist: we get the current stream position. The wide
204 char streams have much more problems with not knowing the
205 current position and so we should disable the optimization
206 which allows the functions without knowing the position. */
207 fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur);
210 /* Set the mode now. */
211 fp->_mode = mode;
213 return mode;
217 static enum __codecvt_result
218 do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
219 const wchar_t *from_start, const wchar_t *from_end,
220 const wchar_t **from_stop, char *to_start, char *to_end,
221 char **to_stop)
223 enum __codecvt_result result;
225 #ifdef _LIBC
226 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
227 int status;
228 size_t dummy;
229 const unsigned char *from_start_copy = (unsigned char *) from_start;
231 codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
232 codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
233 codecvt->__cd_out.__cd.__data[0].__statep = statep;
235 __gconv_fct fct = gs->__fct;
236 #ifdef PTR_DEMANGLE
237 if (gs->__shlib_handle != NULL)
238 PTR_DEMANGLE (fct);
239 #endif
241 status = DL_CALL_FCT (fct,
242 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
243 (const unsigned char *) from_end, NULL,
244 &dummy, 0, 0));
246 *from_stop = (wchar_t *) from_start_copy;
247 *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
249 switch (status)
251 case __GCONV_OK:
252 case __GCONV_EMPTY_INPUT:
253 result = __codecvt_ok;
254 break;
256 case __GCONV_FULL_OUTPUT:
257 case __GCONV_INCOMPLETE_INPUT:
258 result = __codecvt_partial;
259 break;
261 default:
262 result = __codecvt_error;
263 break;
265 #else
266 # ifdef _GLIBCPP_USE_WCHAR_T
267 size_t res;
268 const char *from_start_copy = (const char *) from_start;
269 size_t from_len = from_end - from_start;
270 char *to_start_copy = to_start;
271 size_t to_len = to_end - to_start;
272 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
273 &to_start_copy, &to_len);
275 if (res == 0 || from_len == 0)
276 result = __codecvt_ok;
277 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
278 result = __codecvt_partial;
279 else
280 result = __codecvt_error;
282 # else
283 /* Decide what to do. */
284 result = __codecvt_error;
285 # endif
286 #endif
288 return result;
292 static enum __codecvt_result
293 do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
294 char *to_start, char *to_end, char **to_stop)
296 enum __codecvt_result result;
298 #ifdef _LIBC
299 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
300 int status;
301 size_t dummy;
303 codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
304 codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
305 codecvt->__cd_out.__cd.__data[0].__statep = statep;
307 __gconv_fct fct = gs->__fct;
308 #ifdef PTR_DEMANGLE
309 if (gs->__shlib_handle != NULL)
310 PTR_DEMANGLE (fct);
311 #endif
313 status = DL_CALL_FCT (fct,
314 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
315 NULL, &dummy, 1, 0));
317 *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
319 switch (status)
321 case __GCONV_OK:
322 case __GCONV_EMPTY_INPUT:
323 result = __codecvt_ok;
324 break;
326 case __GCONV_FULL_OUTPUT:
327 case __GCONV_INCOMPLETE_INPUT:
328 result = __codecvt_partial;
329 break;
331 default:
332 result = __codecvt_error;
333 break;
335 #else
336 # ifdef _GLIBCPP_USE_WCHAR_T
337 size_t res;
338 char *to_start_copy = (char *) to_start;
339 size_t to_len = to_end - to_start;
341 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
343 if (res == 0)
344 result = __codecvt_ok;
345 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
346 result = __codecvt_partial;
347 else
348 result = __codecvt_error;
349 # else
350 /* Decide what to do. */
351 result = __codecvt_error;
352 # endif
353 #endif
355 return result;
359 static enum __codecvt_result
360 do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
361 const char *from_start, const char *from_end, const char **from_stop,
362 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
364 enum __codecvt_result result;
366 #ifdef _LIBC
367 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
368 int status;
369 size_t dummy;
370 const unsigned char *from_start_copy = (unsigned char *) from_start;
372 codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_start;
373 codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) to_end;
374 codecvt->__cd_in.__cd.__data[0].__statep = statep;
376 __gconv_fct fct = gs->__fct;
377 #ifdef PTR_DEMANGLE
378 if (gs->__shlib_handle != NULL)
379 PTR_DEMANGLE (fct);
380 #endif
382 status = DL_CALL_FCT (fct,
383 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
384 (const unsigned char *) from_end, NULL,
385 &dummy, 0, 0));
387 *from_stop = (const char *) from_start_copy;
388 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
390 switch (status)
392 case __GCONV_OK:
393 case __GCONV_EMPTY_INPUT:
394 result = __codecvt_ok;
395 break;
397 case __GCONV_FULL_OUTPUT:
398 case __GCONV_INCOMPLETE_INPUT:
399 result = __codecvt_partial;
400 break;
402 default:
403 result = __codecvt_error;
404 break;
406 #else
407 # ifdef _GLIBCPP_USE_WCHAR_T
408 size_t res;
409 const char *from_start_copy = (const char *) from_start;
410 size_t from_len = from_end - from_start;
411 char *to_start_copy = (char *) from_start;
412 size_t to_len = to_end - to_start;
414 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
415 &to_start_copy, &to_len);
417 if (res == 0)
418 result = __codecvt_ok;
419 else if (to_len == 0)
420 result = __codecvt_partial;
421 else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
422 result = __codecvt_partial;
423 else
424 result = __codecvt_error;
425 # else
426 /* Decide what to do. */
427 result = __codecvt_error;
428 # endif
429 #endif
431 return result;
435 static int
436 do_encoding (struct _IO_codecvt *codecvt)
438 #ifdef _LIBC
439 /* See whether the encoding is stateful. */
440 if (codecvt->__cd_in.__cd.__steps[0].__stateful)
441 return -1;
442 /* Fortunately not. Now determine the input bytes for the conversion
443 necessary for each wide character. */
444 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
445 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
446 /* Not a constant value. */
447 return 0;
449 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
450 #else
451 /* Worst case scenario. */
452 return -1;
453 #endif
457 static int
458 do_always_noconv (struct _IO_codecvt *codecvt)
460 return 0;
464 static int
465 do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
466 const char *from_start, const char *from_end, _IO_size_t max)
468 int result;
469 #ifdef _LIBC
470 const unsigned char *cp = (const unsigned char *) from_start;
471 wchar_t to_buf[max];
472 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
473 size_t dummy;
475 codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_buf;
476 codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) &to_buf[max];
477 codecvt->__cd_in.__cd.__data[0].__statep = statep;
479 __gconv_fct fct = gs->__fct;
480 #ifdef PTR_DEMANGLE
481 if (gs->__shlib_handle != NULL)
482 PTR_DEMANGLE (fct);
483 #endif
485 DL_CALL_FCT (fct,
486 (gs, codecvt->__cd_in.__cd.__data, &cp,
487 (const unsigned char *) from_end, NULL,
488 &dummy, 0, 0));
490 result = cp - (const unsigned char *) from_start;
491 #else
492 # ifdef _GLIBCPP_USE_WCHAR_T
493 const char *from_start_copy = (const char *) from_start;
494 size_t from_len = from_end - from_start;
495 wchar_t to_buf[max];
496 size_t res;
497 char *to_start = (char *) to_buf;
499 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
500 &to_start, &max);
502 result = from_start_copy - (char *) from_start;
503 # else
504 /* Decide what to do. */
505 result = 0;
506 # endif
507 #endif
509 return result;
513 static int
514 do_max_length (struct _IO_codecvt *codecvt)
516 #ifdef _LIBC
517 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
518 #else
519 return MB_CUR_MAX;
520 #endif