1 /* Copyright (C) 1999-2003, 2005 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 As a special exception, if you link the code in this file with
20 files compiled with a GNU compiler to produce an executable,
21 that does not cause the resulting executable to be covered by
22 the GNU Lesser General Public License. This exception does not
23 however invalidate any other reasons why the executable file
24 might be covered by the GNU Lesser General Public License.
25 This exception applies to code released by its copyright holders
26 in files containing the exception. */
38 # include <langinfo.h>
39 # include <locale/localeinfo.h>
40 # include <wcsmbs/wcsmbsload.h>
41 # include <iconv/gconv_int.h>
42 # include <shlib-compat.h>
47 /* Prototypes of libio's codecvt functions. */
48 static enum __codecvt_result
do_out (struct _IO_codecvt
*codecvt
,
50 const wchar_t *from_start
,
51 const wchar_t *from_end
,
52 const wchar_t **from_stop
, char *to_start
,
53 char *to_end
, char **to_stop
);
54 static enum __codecvt_result
do_unshift (struct _IO_codecvt
*codecvt
,
55 __mbstate_t
*statep
, char *to_start
,
56 char *to_end
, char **to_stop
);
57 static enum __codecvt_result
do_in (struct _IO_codecvt
*codecvt
,
59 const char *from_start
,
61 const char **from_stop
, wchar_t *to_start
,
62 wchar_t *to_end
, wchar_t **to_stop
);
63 static int do_encoding (struct _IO_codecvt
*codecvt
);
64 static int do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
65 const char *from_start
,
66 const char *from_end
, _IO_size_t max
);
67 static int do_max_length (struct _IO_codecvt
*codecvt
);
68 static int do_always_noconv (struct _IO_codecvt
*codecvt
);
71 /* The functions used in `codecvt' for libio are always the same. */
72 const struct _IO_codecvt __libio_codecvt
=
74 .__codecvt_destr
= NULL
, /* Destructor, never used. */
75 .__codecvt_do_out
= do_out
,
76 .__codecvt_do_unshift
= do_unshift
,
77 .__codecvt_do_in
= do_in
,
78 .__codecvt_do_encoding
= do_encoding
,
79 .__codecvt_do_always_noconv
= do_always_noconv
,
80 .__codecvt_do_length
= do_length
,
81 .__codecvt_do_max_length
= do_max_length
86 const struct __gconv_trans_data __libio_translit attribute_hidden
=
88 .__trans_fct
= __gconv_transliterate
93 /* Return orientation of stream. If mode is nonzero try to change
94 the orientation first. */
101 /* Normalize the value. */
102 mode
= mode
< 0 ? -1 : (mode
== 0 ? 0 : 1);
104 #if defined SHARED && defined _LIBC \
105 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
106 if (__builtin_expect (&_IO_stdin_used
== NULL
, 0)
107 && (fp
== _IO_stdin
|| fp
== _IO_stdout
|| fp
== _IO_stderr
))
108 /* This is for a stream in the glibc 2.0 format. */
112 /* The orientation already has been determined. */
114 /* Or the caller simply wants to know about the current orientation. */
118 /* Set the orientation appropriately. */
121 struct _IO_codecvt
*cc
= fp
->_codecvt
= &fp
->_wide_data
->_codecvt
;
123 fp
->_wide_data
->_IO_read_ptr
= fp
->_wide_data
->_IO_read_end
;
124 fp
->_wide_data
->_IO_write_ptr
= fp
->_wide_data
->_IO_write_base
;
126 /* Get the character conversion functions based on the currently
127 selected locale for LC_CTYPE. */
130 /* Clear the state. We start all over again. */
131 memset (&fp
->_wide_data
->_IO_state
, '\0', sizeof (__mbstate_t
));
132 memset (&fp
->_wide_data
->_IO_last_state
, '\0', sizeof (__mbstate_t
));
134 struct gconv_fcts fcts
;
135 __wcsmbs_clone_conv (&fcts
);
136 assert (fcts
.towc_nsteps
== 1);
137 assert (fcts
.tomb_nsteps
== 1);
139 /* The functions are always the same. */
140 *cc
= __libio_codecvt
;
142 cc
->__cd_in
.__cd
.__nsteps
= fcts
.towc_nsteps
;
143 cc
->__cd_in
.__cd
.__steps
= fcts
.towc
;
145 cc
->__cd_in
.__cd
.__data
[0].__invocation_counter
= 0;
146 cc
->__cd_in
.__cd
.__data
[0].__internal_use
= 1;
147 cc
->__cd_in
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
148 cc
->__cd_in
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
150 /* XXX For now no transliteration. */
151 cc
->__cd_in
.__cd
.__data
[0].__trans
= NULL
;
153 cc
->__cd_out
.__cd
.__nsteps
= fcts
.tomb_nsteps
;
154 cc
->__cd_out
.__cd
.__steps
= fcts
.tomb
;
156 cc
->__cd_out
.__cd
.__data
[0].__invocation_counter
= 0;
157 cc
->__cd_out
.__cd
.__data
[0].__internal_use
= 1;
158 cc
->__cd_out
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
159 cc
->__cd_out
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
161 /* And now the transliteration. */
162 cc
->__cd_out
.__cd
.__data
[0].__trans
163 = (struct __gconv_trans_data
*) &__libio_translit
;
166 # ifdef _GLIBCPP_USE_WCHAR_T
168 /* Determine internal and external character sets.
170 XXX For now we make our life easy: we assume a fixed internal
171 encoding (as most sane systems have; hi HP/UX!). If somebody
172 cares about systems which changing internal charsets they
173 should come up with a solution for the determination of the
174 currently used internal character set. */
175 const char *internal_ccs
= _G_INTERNAL_CCS
;
176 const char *external_ccs
= NULL
;
178 # ifdef HAVE_NL_LANGINFO
179 external_ccs
= nl_langinfo (CODESET
);
181 if (external_ccs
== NULL
)
182 external_ccs
= "ISO-8859-1";
184 cc
->__cd_in
= iconv_open (internal_ccs
, external_ccs
);
185 if (cc
->__cd_in
!= (iconv_t
) -1)
186 cc
->__cd_out
= iconv_open (external_ccs
, internal_ccs
);
188 if (cc
->__cd_in
== (iconv_t
) -1 || cc
->__cd_out
== (iconv_t
) -1)
190 if (cc
->__cd_in
!= (iconv_t
) -1)
191 iconv_close (cc
->__cd_in
);
197 # error "somehow determine this from LC_CTYPE"
201 /* From now on use the wide character callback functions. */
202 ((struct _IO_FILE_plus
*) fp
)->vtable
= fp
->_wide_data
->_wide_vtable
;
204 /* One last twist: we get the current stream position. The wide
205 char streams have much more problems with not knowing the
206 current position and so we should disable the optimization
207 which allows the functions without knowing the position. */
208 fp
->_offset
= _IO_SYSSEEK (fp
, 0, _IO_seek_cur
);
211 /* Set the mode now. */
218 static enum __codecvt_result
219 do_out (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
220 const wchar_t *from_start
, const wchar_t *from_end
,
221 const wchar_t **from_stop
, char *to_start
, char *to_end
,
224 enum __codecvt_result result
;
227 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
230 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
232 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
233 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
234 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
236 __gconv_fct fct
= gs
->__fct
;
238 if (gs
->__shlib_handle
!= NULL
)
242 status
= DL_CALL_FCT (fct
,
243 (gs
, codecvt
->__cd_out
.__cd
.__data
, &from_start_copy
,
244 (const unsigned char *) from_end
, NULL
,
247 *from_stop
= (wchar_t *) from_start_copy
;
248 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
253 case __GCONV_EMPTY_INPUT
:
254 result
= __codecvt_ok
;
257 case __GCONV_FULL_OUTPUT
:
258 case __GCONV_INCOMPLETE_INPUT
:
259 result
= __codecvt_partial
;
263 result
= __codecvt_error
;
267 # ifdef _GLIBCPP_USE_WCHAR_T
269 const char *from_start_copy
= (const char *) from_start
;
270 size_t from_len
= from_end
- from_start
;
271 char *to_start_copy
= to_start
;
272 size_t to_len
= to_end
- to_start
;
273 res
= iconv (codecvt
->__cd_out
, &from_start_copy
, &from_len
,
274 &to_start_copy
, &to_len
);
276 if (res
== 0 || from_len
== 0)
277 result
= __codecvt_ok
;
278 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
279 result
= __codecvt_partial
;
281 result
= __codecvt_error
;
284 /* Decide what to do. */
285 result
= __codecvt_error
;
293 static enum __codecvt_result
294 do_unshift (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
295 char *to_start
, char *to_end
, char **to_stop
)
297 enum __codecvt_result result
;
300 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
304 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
305 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
306 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
308 __gconv_fct fct
= gs
->__fct
;
310 if (gs
->__shlib_handle
!= NULL
)
314 status
= DL_CALL_FCT (fct
,
315 (gs
, codecvt
->__cd_out
.__cd
.__data
, NULL
, NULL
,
316 NULL
, &dummy
, 1, 0));
318 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
323 case __GCONV_EMPTY_INPUT
:
324 result
= __codecvt_ok
;
327 case __GCONV_FULL_OUTPUT
:
328 case __GCONV_INCOMPLETE_INPUT
:
329 result
= __codecvt_partial
;
333 result
= __codecvt_error
;
337 # ifdef _GLIBCPP_USE_WCHAR_T
339 char *to_start_copy
= (char *) to_start
;
340 size_t to_len
= to_end
- to_start
;
342 res
= iconv (codecvt
->__cd_out
, NULL
, NULL
, &to_start_copy
, &to_len
);
345 result
= __codecvt_ok
;
346 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
347 result
= __codecvt_partial
;
349 result
= __codecvt_error
;
351 /* Decide what to do. */
352 result
= __codecvt_error
;
360 static enum __codecvt_result
361 do_in (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
362 const char *from_start
, const char *from_end
, const char **from_stop
,
363 wchar_t *to_start
, wchar_t *to_end
, wchar_t **to_stop
)
365 enum __codecvt_result result
;
368 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
371 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
373 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
374 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
375 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
377 __gconv_fct fct
= gs
->__fct
;
379 if (gs
->__shlib_handle
!= NULL
)
383 status
= DL_CALL_FCT (fct
,
384 (gs
, codecvt
->__cd_in
.__cd
.__data
, &from_start_copy
,
385 (const unsigned char *) from_end
, NULL
,
388 *from_stop
= (const char *) from_start_copy
;
389 *to_stop
= (wchar_t *) codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
;
394 case __GCONV_EMPTY_INPUT
:
395 result
= __codecvt_ok
;
398 case __GCONV_FULL_OUTPUT
:
399 case __GCONV_INCOMPLETE_INPUT
:
400 result
= __codecvt_partial
;
404 result
= __codecvt_error
;
408 # ifdef _GLIBCPP_USE_WCHAR_T
410 const char *from_start_copy
= (const char *) from_start
;
411 size_t from_len
= from_end
- from_start
;
412 char *to_start_copy
= (char *) from_start
;
413 size_t to_len
= to_end
- to_start
;
415 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
416 &to_start_copy
, &to_len
);
419 result
= __codecvt_ok
;
420 else if (to_len
== 0)
421 result
= __codecvt_partial
;
422 else if (from_len
< codecvt
->__codecvt_do_max_length (codecvt
))
423 result
= __codecvt_partial
;
425 result
= __codecvt_error
;
427 /* Decide what to do. */
428 result
= __codecvt_error
;
437 do_encoding (struct _IO_codecvt
*codecvt
)
440 /* See whether the encoding is stateful. */
441 if (codecvt
->__cd_in
.__cd
.__steps
[0].__stateful
)
443 /* Fortunately not. Now determine the input bytes for the conversion
444 necessary for each wide character. */
445 if (codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
446 != codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
)
447 /* Not a constant value. */
450 return codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
;
452 /* Worst case scenario. */
459 do_always_noconv (struct _IO_codecvt
*codecvt
)
466 do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
467 const char *from_start
, const char *from_end
, _IO_size_t max
)
471 const unsigned char *cp
= (const unsigned char *) from_start
;
473 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
477 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_buf
;
478 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) &to_buf
[max
];
479 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
481 __gconv_fct fct
= gs
->__fct
;
483 if (gs
->__shlib_handle
!= NULL
)
487 status
= DL_CALL_FCT (fct
,
488 (gs
, codecvt
->__cd_in
.__cd
.__data
, &cp
,
489 (const unsigned char *) from_end
, NULL
,
492 result
= cp
- (const unsigned char *) from_start
;
494 # ifdef _GLIBCPP_USE_WCHAR_T
495 const char *from_start_copy
= (const char *) from_start
;
496 size_t from_len
= from_end
- from_start
;
499 char *to_start
= (char *) to_buf
;
501 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
504 result
= from_start_copy
- (char *) from_start
;
506 /* Decide what to do. */
516 do_max_length (struct _IO_codecvt
*codecvt
)
519 return codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
;