1 /* Copyright (C) 1999-2003, 2005, 2011 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
37 # include <langinfo.h>
38 # include <locale/localeinfo.h>
39 # include <wcsmbs/wcsmbsload.h>
40 # include <iconv/gconv_int.h>
41 # include <shlib-compat.h>
46 /* Prototypes of libio's codecvt functions. */
47 static enum __codecvt_result
do_out (struct _IO_codecvt
*codecvt
,
49 const wchar_t *from_start
,
50 const wchar_t *from_end
,
51 const wchar_t **from_stop
, char *to_start
,
52 char *to_end
, char **to_stop
);
53 static enum __codecvt_result
do_unshift (struct _IO_codecvt
*codecvt
,
54 __mbstate_t
*statep
, char *to_start
,
55 char *to_end
, char **to_stop
);
56 static enum __codecvt_result
do_in (struct _IO_codecvt
*codecvt
,
58 const char *from_start
,
60 const char **from_stop
, wchar_t *to_start
,
61 wchar_t *to_end
, wchar_t **to_stop
);
62 static int do_encoding (struct _IO_codecvt
*codecvt
);
63 static int do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
64 const char *from_start
,
65 const char *from_end
, _IO_size_t max
);
66 static int do_max_length (struct _IO_codecvt
*codecvt
);
67 static int do_always_noconv (struct _IO_codecvt
*codecvt
);
70 /* The functions used in `codecvt' for libio are always the same. */
71 const struct _IO_codecvt __libio_codecvt
=
73 .__codecvt_destr
= NULL
, /* Destructor, never used. */
74 .__codecvt_do_out
= do_out
,
75 .__codecvt_do_unshift
= do_unshift
,
76 .__codecvt_do_in
= do_in
,
77 .__codecvt_do_encoding
= do_encoding
,
78 .__codecvt_do_always_noconv
= do_always_noconv
,
79 .__codecvt_do_length
= do_length
,
80 .__codecvt_do_max_length
= do_max_length
85 const struct __gconv_trans_data __libio_translit attribute_hidden
=
87 .__trans_fct
= __gconv_transliterate
92 /* Return orientation of stream. If mode is nonzero try to change
93 the orientation first. */
100 /* Normalize the value. */
101 mode
= mode
< 0 ? -1 : (mode
== 0 ? 0 : 1);
103 #if defined SHARED && defined _LIBC \
104 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
105 if (__builtin_expect (&_IO_stdin_used
== NULL
, 0)
106 && (fp
== _IO_stdin
|| fp
== _IO_stdout
|| fp
== _IO_stderr
))
107 /* This is for a stream in the glibc 2.0 format. */
111 /* The orientation already has been determined. */
113 /* Or the caller simply wants to know about the current orientation. */
117 /* Set the orientation appropriately. */
120 struct _IO_codecvt
*cc
= fp
->_codecvt
= &fp
->_wide_data
->_codecvt
;
122 fp
->_wide_data
->_IO_read_ptr
= fp
->_wide_data
->_IO_read_end
;
123 fp
->_wide_data
->_IO_write_ptr
= fp
->_wide_data
->_IO_write_base
;
125 /* Get the character conversion functions based on the currently
126 selected locale for LC_CTYPE. */
129 /* Clear the state. We start all over again. */
130 memset (&fp
->_wide_data
->_IO_state
, '\0', sizeof (__mbstate_t
));
131 memset (&fp
->_wide_data
->_IO_last_state
, '\0', sizeof (__mbstate_t
));
133 struct gconv_fcts fcts
;
134 __wcsmbs_clone_conv (&fcts
);
135 assert (fcts
.towc_nsteps
== 1);
136 assert (fcts
.tomb_nsteps
== 1);
138 /* The functions are always the same. */
139 *cc
= __libio_codecvt
;
141 cc
->__cd_in
.__cd
.__nsteps
= fcts
.towc_nsteps
;
142 cc
->__cd_in
.__cd
.__steps
= fcts
.towc
;
144 cc
->__cd_in
.__cd
.__data
[0].__invocation_counter
= 0;
145 cc
->__cd_in
.__cd
.__data
[0].__internal_use
= 1;
146 cc
->__cd_in
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
147 cc
->__cd_in
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
149 /* XXX For now no transliteration. */
150 cc
->__cd_in
.__cd
.__data
[0].__trans
= NULL
;
152 cc
->__cd_out
.__cd
.__nsteps
= fcts
.tomb_nsteps
;
153 cc
->__cd_out
.__cd
.__steps
= fcts
.tomb
;
155 cc
->__cd_out
.__cd
.__data
[0].__invocation_counter
= 0;
156 cc
->__cd_out
.__cd
.__data
[0].__internal_use
= 1;
157 cc
->__cd_out
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
158 cc
->__cd_out
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
160 /* And now the transliteration. */
161 cc
->__cd_out
.__cd
.__data
[0].__trans
162 = (struct __gconv_trans_data
*) &__libio_translit
;
165 # ifdef _GLIBCPP_USE_WCHAR_T
167 /* Determine internal and external character sets.
169 XXX For now we make our life easy: we assume a fixed internal
170 encoding (as most sane systems have; hi HP/UX!). If somebody
171 cares about systems which changing internal charsets they
172 should come up with a solution for the determination of the
173 currently used internal character set. */
174 const char *internal_ccs
= _G_INTERNAL_CCS
;
175 const char *external_ccs
= NULL
;
177 # ifdef HAVE_NL_LANGINFO
178 external_ccs
= nl_langinfo (CODESET
);
180 if (external_ccs
== NULL
)
181 external_ccs
= "ISO-8859-1";
183 cc
->__cd_in
= iconv_open (internal_ccs
, external_ccs
);
184 if (cc
->__cd_in
!= (iconv_t
) -1)
185 cc
->__cd_out
= iconv_open (external_ccs
, internal_ccs
);
187 if (cc
->__cd_in
== (iconv_t
) -1 || cc
->__cd_out
== (iconv_t
) -1)
189 if (cc
->__cd_in
!= (iconv_t
) -1)
190 iconv_close (cc
->__cd_in
);
196 # error "somehow determine this from LC_CTYPE"
200 /* From now on use the wide character callback functions. */
201 ((struct _IO_FILE_plus
*) fp
)->vtable
= fp
->_wide_data
->_wide_vtable
;
203 /* One last twist: we get the current stream position. The wide
204 char streams have much more problems with not knowing the
205 current position and so we should disable the optimization
206 which allows the functions without knowing the position. */
207 fp
->_offset
= _IO_SYSSEEK (fp
, 0, _IO_seek_cur
);
210 /* Set the mode now. */
217 static enum __codecvt_result
218 do_out (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
219 const wchar_t *from_start
, const wchar_t *from_end
,
220 const wchar_t **from_stop
, char *to_start
, char *to_end
,
223 enum __codecvt_result result
;
226 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
229 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
231 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
232 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
233 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
235 __gconv_fct fct
= gs
->__fct
;
237 if (gs
->__shlib_handle
!= NULL
)
241 status
= DL_CALL_FCT (fct
,
242 (gs
, codecvt
->__cd_out
.__cd
.__data
, &from_start_copy
,
243 (const unsigned char *) from_end
, NULL
,
246 *from_stop
= (wchar_t *) from_start_copy
;
247 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
252 case __GCONV_EMPTY_INPUT
:
253 result
= __codecvt_ok
;
256 case __GCONV_FULL_OUTPUT
:
257 case __GCONV_INCOMPLETE_INPUT
:
258 result
= __codecvt_partial
;
262 result
= __codecvt_error
;
266 # ifdef _GLIBCPP_USE_WCHAR_T
268 const char *from_start_copy
= (const char *) from_start
;
269 size_t from_len
= from_end
- from_start
;
270 char *to_start_copy
= to_start
;
271 size_t to_len
= to_end
- to_start
;
272 res
= iconv (codecvt
->__cd_out
, &from_start_copy
, &from_len
,
273 &to_start_copy
, &to_len
);
275 if (res
== 0 || from_len
== 0)
276 result
= __codecvt_ok
;
277 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
278 result
= __codecvt_partial
;
280 result
= __codecvt_error
;
283 /* Decide what to do. */
284 result
= __codecvt_error
;
292 static enum __codecvt_result
293 do_unshift (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
294 char *to_start
, char *to_end
, char **to_stop
)
296 enum __codecvt_result result
;
299 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
303 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
304 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
305 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
307 __gconv_fct fct
= gs
->__fct
;
309 if (gs
->__shlib_handle
!= NULL
)
313 status
= DL_CALL_FCT (fct
,
314 (gs
, codecvt
->__cd_out
.__cd
.__data
, NULL
, NULL
,
315 NULL
, &dummy
, 1, 0));
317 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
322 case __GCONV_EMPTY_INPUT
:
323 result
= __codecvt_ok
;
326 case __GCONV_FULL_OUTPUT
:
327 case __GCONV_INCOMPLETE_INPUT
:
328 result
= __codecvt_partial
;
332 result
= __codecvt_error
;
336 # ifdef _GLIBCPP_USE_WCHAR_T
338 char *to_start_copy
= (char *) to_start
;
339 size_t to_len
= to_end
- to_start
;
341 res
= iconv (codecvt
->__cd_out
, NULL
, NULL
, &to_start_copy
, &to_len
);
344 result
= __codecvt_ok
;
345 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
346 result
= __codecvt_partial
;
348 result
= __codecvt_error
;
350 /* Decide what to do. */
351 result
= __codecvt_error
;
359 static enum __codecvt_result
360 do_in (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
361 const char *from_start
, const char *from_end
, const char **from_stop
,
362 wchar_t *to_start
, wchar_t *to_end
, wchar_t **to_stop
)
364 enum __codecvt_result result
;
367 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
370 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
372 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
373 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
374 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
376 __gconv_fct fct
= gs
->__fct
;
378 if (gs
->__shlib_handle
!= NULL
)
382 status
= DL_CALL_FCT (fct
,
383 (gs
, codecvt
->__cd_in
.__cd
.__data
, &from_start_copy
,
384 (const unsigned char *) from_end
, NULL
,
387 *from_stop
= (const char *) from_start_copy
;
388 *to_stop
= (wchar_t *) codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
;
393 case __GCONV_EMPTY_INPUT
:
394 result
= __codecvt_ok
;
397 case __GCONV_FULL_OUTPUT
:
398 case __GCONV_INCOMPLETE_INPUT
:
399 result
= __codecvt_partial
;
403 result
= __codecvt_error
;
407 # ifdef _GLIBCPP_USE_WCHAR_T
409 const char *from_start_copy
= (const char *) from_start
;
410 size_t from_len
= from_end
- from_start
;
411 char *to_start_copy
= (char *) from_start
;
412 size_t to_len
= to_end
- to_start
;
414 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
415 &to_start_copy
, &to_len
);
418 result
= __codecvt_ok
;
419 else if (to_len
== 0)
420 result
= __codecvt_partial
;
421 else if (from_len
< codecvt
->__codecvt_do_max_length (codecvt
))
422 result
= __codecvt_partial
;
424 result
= __codecvt_error
;
426 /* Decide what to do. */
427 result
= __codecvt_error
;
436 do_encoding (struct _IO_codecvt
*codecvt
)
439 /* See whether the encoding is stateful. */
440 if (codecvt
->__cd_in
.__cd
.__steps
[0].__stateful
)
442 /* Fortunately not. Now determine the input bytes for the conversion
443 necessary for each wide character. */
444 if (codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
445 != codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
)
446 /* Not a constant value. */
449 return codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
;
451 /* Worst case scenario. */
458 do_always_noconv (struct _IO_codecvt
*codecvt
)
465 do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
466 const char *from_start
, const char *from_end
, _IO_size_t max
)
470 const unsigned char *cp
= (const unsigned char *) from_start
;
472 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
475 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_buf
;
476 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) &to_buf
[max
];
477 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
479 __gconv_fct fct
= gs
->__fct
;
481 if (gs
->__shlib_handle
!= NULL
)
486 (gs
, codecvt
->__cd_in
.__cd
.__data
, &cp
,
487 (const unsigned char *) from_end
, NULL
,
490 result
= cp
- (const unsigned char *) from_start
;
492 # ifdef _GLIBCPP_USE_WCHAR_T
493 const char *from_start_copy
= (const char *) from_start
;
494 size_t from_len
= from_end
- from_start
;
497 char *to_start
= (char *) to_buf
;
499 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
502 result
= from_start_copy
- (char *) from_start
;
504 /* Decide what to do. */
514 do_max_length (struct _IO_codecvt
*codecvt
)
517 return codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
;