1 /* Copyright (C) 1999-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
37 # include <langinfo.h>
38 # include <locale/localeinfo.h>
39 # include <wcsmbs/wcsmbsload.h>
40 # include <iconv/gconv_int.h>
41 # include <shlib-compat.h>
46 /* Prototypes of libio's codecvt functions. */
47 static enum __codecvt_result
do_out (struct _IO_codecvt
*codecvt
,
49 const wchar_t *from_start
,
50 const wchar_t *from_end
,
51 const wchar_t **from_stop
, char *to_start
,
52 char *to_end
, char **to_stop
);
53 static enum __codecvt_result
do_unshift (struct _IO_codecvt
*codecvt
,
54 __mbstate_t
*statep
, char *to_start
,
55 char *to_end
, char **to_stop
);
56 static enum __codecvt_result
do_in (struct _IO_codecvt
*codecvt
,
58 const char *from_start
,
60 const char **from_stop
, wchar_t *to_start
,
61 wchar_t *to_end
, wchar_t **to_stop
);
62 static int do_encoding (struct _IO_codecvt
*codecvt
);
63 static int do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
64 const char *from_start
,
65 const char *from_end
, _IO_size_t max
);
66 static int do_max_length (struct _IO_codecvt
*codecvt
);
67 static int do_always_noconv (struct _IO_codecvt
*codecvt
);
70 /* The functions used in `codecvt' for libio are always the same. */
71 const struct _IO_codecvt __libio_codecvt
=
73 .__codecvt_destr
= NULL
, /* Destructor, never used. */
74 .__codecvt_do_out
= do_out
,
75 .__codecvt_do_unshift
= do_unshift
,
76 .__codecvt_do_in
= do_in
,
77 .__codecvt_do_encoding
= do_encoding
,
78 .__codecvt_do_always_noconv
= do_always_noconv
,
79 .__codecvt_do_length
= do_length
,
80 .__codecvt_do_max_length
= do_max_length
84 /* Return orientation of stream. If mode is nonzero try to change
85 the orientation first. */
92 /* Normalize the value. */
93 mode
= mode
< 0 ? -1 : (mode
== 0 ? 0 : 1);
95 #if defined SHARED && defined _LIBC \
96 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
97 if (__builtin_expect (&_IO_stdin_used
== NULL
, 0)
98 && (fp
== _IO_stdin
|| fp
== _IO_stdout
|| fp
== _IO_stderr
))
99 /* This is for a stream in the glibc 2.0 format. */
103 /* The orientation already has been determined. */
105 /* Or the caller simply wants to know about the current orientation. */
109 /* Set the orientation appropriately. */
112 struct _IO_codecvt
*cc
= fp
->_codecvt
= &fp
->_wide_data
->_codecvt
;
114 fp
->_wide_data
->_IO_read_ptr
= fp
->_wide_data
->_IO_read_end
;
115 fp
->_wide_data
->_IO_write_ptr
= fp
->_wide_data
->_IO_write_base
;
117 /* Get the character conversion functions based on the currently
118 selected locale for LC_CTYPE. */
121 /* Clear the state. We start all over again. */
122 memset (&fp
->_wide_data
->_IO_state
, '\0', sizeof (__mbstate_t
));
123 memset (&fp
->_wide_data
->_IO_last_state
, '\0', sizeof (__mbstate_t
));
125 struct gconv_fcts fcts
;
126 __wcsmbs_clone_conv (&fcts
);
127 assert (fcts
.towc_nsteps
== 1);
128 assert (fcts
.tomb_nsteps
== 1);
130 /* The functions are always the same. */
131 *cc
= __libio_codecvt
;
133 cc
->__cd_in
.__cd
.__nsteps
= fcts
.towc_nsteps
;
134 cc
->__cd_in
.__cd
.__steps
= fcts
.towc
;
136 cc
->__cd_in
.__cd
.__data
[0].__invocation_counter
= 0;
137 cc
->__cd_in
.__cd
.__data
[0].__internal_use
= 1;
138 cc
->__cd_in
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
139 cc
->__cd_in
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
141 cc
->__cd_out
.__cd
.__nsteps
= fcts
.tomb_nsteps
;
142 cc
->__cd_out
.__cd
.__steps
= fcts
.tomb
;
144 cc
->__cd_out
.__cd
.__data
[0].__invocation_counter
= 0;
145 cc
->__cd_out
.__cd
.__data
[0].__internal_use
= 1;
146 cc
->__cd_out
.__cd
.__data
[0].__flags
147 = __GCONV_IS_LAST
| __GCONV_TRANSLIT
;
148 cc
->__cd_out
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
151 # ifdef _GLIBCPP_USE_WCHAR_T
153 /* Determine internal and external character sets.
155 XXX For now we make our life easy: we assume a fixed internal
156 encoding (as most sane systems have; hi HP/UX!). If somebody
157 cares about systems which changing internal charsets they
158 should come up with a solution for the determination of the
159 currently used internal character set. */
160 const char *internal_ccs
= _G_INTERNAL_CCS
;
161 const char *external_ccs
= NULL
;
163 # ifdef HAVE_NL_LANGINFO
164 external_ccs
= nl_langinfo (CODESET
);
166 if (external_ccs
== NULL
)
167 external_ccs
= "ISO-8859-1";
169 cc
->__cd_in
= iconv_open (internal_ccs
, external_ccs
);
170 if (cc
->__cd_in
!= (iconv_t
) -1)
171 cc
->__cd_out
= iconv_open (external_ccs
, internal_ccs
);
173 if (cc
->__cd_in
== (iconv_t
) -1 || cc
->__cd_out
== (iconv_t
) -1)
175 if (cc
->__cd_in
!= (iconv_t
) -1)
176 iconv_close (cc
->__cd_in
);
182 # error "somehow determine this from LC_CTYPE"
186 /* From now on use the wide character callback functions. */
187 _IO_JUMPS_FILE_plus (fp
) = fp
->_wide_data
->_wide_vtable
;
190 /* Set the mode now. */
197 static enum __codecvt_result
198 do_out (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
199 const wchar_t *from_start
, const wchar_t *from_end
,
200 const wchar_t **from_stop
, char *to_start
, char *to_end
,
203 enum __codecvt_result result
;
206 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
209 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
211 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
212 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
213 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
215 __gconv_fct fct
= gs
->__fct
;
217 if (gs
->__shlib_handle
!= NULL
)
221 status
= DL_CALL_FCT (fct
,
222 (gs
, codecvt
->__cd_out
.__cd
.__data
, &from_start_copy
,
223 (const unsigned char *) from_end
, NULL
,
226 *from_stop
= (wchar_t *) from_start_copy
;
227 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
232 case __GCONV_EMPTY_INPUT
:
233 result
= __codecvt_ok
;
236 case __GCONV_FULL_OUTPUT
:
237 case __GCONV_INCOMPLETE_INPUT
:
238 result
= __codecvt_partial
;
242 result
= __codecvt_error
;
246 # ifdef _GLIBCPP_USE_WCHAR_T
248 const char *from_start_copy
= (const char *) from_start
;
249 size_t from_len
= from_end
- from_start
;
250 char *to_start_copy
= to_start
;
251 size_t to_len
= to_end
- to_start
;
252 res
= iconv (codecvt
->__cd_out
, &from_start_copy
, &from_len
,
253 &to_start_copy
, &to_len
);
255 if (res
== 0 || from_len
== 0)
256 result
= __codecvt_ok
;
257 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
258 result
= __codecvt_partial
;
260 result
= __codecvt_error
;
263 /* Decide what to do. */
264 result
= __codecvt_error
;
272 static enum __codecvt_result
273 do_unshift (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
274 char *to_start
, char *to_end
, char **to_stop
)
276 enum __codecvt_result result
;
279 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
283 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
284 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
285 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
287 __gconv_fct fct
= gs
->__fct
;
289 if (gs
->__shlib_handle
!= NULL
)
293 status
= DL_CALL_FCT (fct
,
294 (gs
, codecvt
->__cd_out
.__cd
.__data
, NULL
, NULL
,
295 NULL
, &dummy
, 1, 0));
297 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
302 case __GCONV_EMPTY_INPUT
:
303 result
= __codecvt_ok
;
306 case __GCONV_FULL_OUTPUT
:
307 case __GCONV_INCOMPLETE_INPUT
:
308 result
= __codecvt_partial
;
312 result
= __codecvt_error
;
316 # ifdef _GLIBCPP_USE_WCHAR_T
318 char *to_start_copy
= (char *) to_start
;
319 size_t to_len
= to_end
- to_start
;
321 res
= iconv (codecvt
->__cd_out
, NULL
, NULL
, &to_start_copy
, &to_len
);
324 result
= __codecvt_ok
;
325 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
326 result
= __codecvt_partial
;
328 result
= __codecvt_error
;
330 /* Decide what to do. */
331 result
= __codecvt_error
;
339 static enum __codecvt_result
340 do_in (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
341 const char *from_start
, const char *from_end
, const char **from_stop
,
342 wchar_t *to_start
, wchar_t *to_end
, wchar_t **to_stop
)
344 enum __codecvt_result result
;
347 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
350 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
352 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
353 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
354 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
356 __gconv_fct fct
= gs
->__fct
;
358 if (gs
->__shlib_handle
!= NULL
)
362 status
= DL_CALL_FCT (fct
,
363 (gs
, codecvt
->__cd_in
.__cd
.__data
, &from_start_copy
,
364 (const unsigned char *) from_end
, NULL
,
367 *from_stop
= (const char *) from_start_copy
;
368 *to_stop
= (wchar_t *) codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
;
373 case __GCONV_EMPTY_INPUT
:
374 result
= __codecvt_ok
;
377 case __GCONV_FULL_OUTPUT
:
378 case __GCONV_INCOMPLETE_INPUT
:
379 result
= __codecvt_partial
;
383 result
= __codecvt_error
;
387 # ifdef _GLIBCPP_USE_WCHAR_T
389 const char *from_start_copy
= (const char *) from_start
;
390 size_t from_len
= from_end
- from_start
;
391 char *to_start_copy
= (char *) from_start
;
392 size_t to_len
= to_end
- to_start
;
394 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
395 &to_start_copy
, &to_len
);
398 result
= __codecvt_ok
;
399 else if (to_len
== 0)
400 result
= __codecvt_partial
;
401 else if (from_len
< codecvt
->__codecvt_do_max_length (codecvt
))
402 result
= __codecvt_partial
;
404 result
= __codecvt_error
;
406 /* Decide what to do. */
407 result
= __codecvt_error
;
416 do_encoding (struct _IO_codecvt
*codecvt
)
419 /* See whether the encoding is stateful. */
420 if (codecvt
->__cd_in
.__cd
.__steps
[0].__stateful
)
422 /* Fortunately not. Now determine the input bytes for the conversion
423 necessary for each wide character. */
424 if (codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
425 != codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
)
426 /* Not a constant value. */
429 return codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
;
431 /* Worst case scenario. */
438 do_always_noconv (struct _IO_codecvt
*codecvt
)
445 do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
446 const char *from_start
, const char *from_end
, _IO_size_t max
)
450 const unsigned char *cp
= (const unsigned char *) from_start
;
452 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
455 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_buf
;
456 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) &to_buf
[max
];
457 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
459 __gconv_fct fct
= gs
->__fct
;
461 if (gs
->__shlib_handle
!= NULL
)
466 (gs
, codecvt
->__cd_in
.__cd
.__data
, &cp
,
467 (const unsigned char *) from_end
, NULL
,
470 result
= cp
- (const unsigned char *) from_start
;
472 # ifdef _GLIBCPP_USE_WCHAR_T
473 const char *from_start_copy
= (const char *) from_start
;
474 size_t from_len
= from_end
- from_start
;
477 char *to_start
= (char *) to_buf
;
479 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
482 result
= from_start_copy
- (char *) from_start
;
484 /* Decide what to do. */
494 do_max_length (struct _IO_codecvt
*codecvt
)
497 return codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
;