1 /* Copyright (C) 1999-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
37 # include <langinfo.h>
38 # include <locale/localeinfo.h>
39 # include <wcsmbs/wcsmbsload.h>
40 # include <iconv/gconv_int.h>
41 # include <shlib-compat.h>
46 /* Prototypes of libio's codecvt functions. */
47 static enum __codecvt_result
do_out (struct _IO_codecvt
*codecvt
,
49 const wchar_t *from_start
,
50 const wchar_t *from_end
,
51 const wchar_t **from_stop
, char *to_start
,
52 char *to_end
, char **to_stop
);
53 static enum __codecvt_result
do_unshift (struct _IO_codecvt
*codecvt
,
54 __mbstate_t
*statep
, char *to_start
,
55 char *to_end
, char **to_stop
);
56 static enum __codecvt_result
do_in (struct _IO_codecvt
*codecvt
,
58 const char *from_start
,
60 const char **from_stop
, wchar_t *to_start
,
61 wchar_t *to_end
, wchar_t **to_stop
);
62 static int do_encoding (struct _IO_codecvt
*codecvt
);
63 static int do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
64 const char *from_start
,
65 const char *from_end
, _IO_size_t max
);
66 static int do_max_length (struct _IO_codecvt
*codecvt
);
67 static int do_always_noconv (struct _IO_codecvt
*codecvt
);
70 /* The functions used in `codecvt' for libio are always the same. */
71 const struct _IO_codecvt __libio_codecvt
=
73 .__codecvt_destr
= NULL
, /* Destructor, never used. */
74 .__codecvt_do_out
= do_out
,
75 .__codecvt_do_unshift
= do_unshift
,
76 .__codecvt_do_in
= do_in
,
77 .__codecvt_do_encoding
= do_encoding
,
78 .__codecvt_do_always_noconv
= do_always_noconv
,
79 .__codecvt_do_length
= do_length
,
80 .__codecvt_do_max_length
= do_max_length
84 /* Return orientation of stream. If mode is nonzero try to change
85 the orientation first. */
88 _IO_fwide (_IO_FILE
*fp
, int mode
)
90 /* Normalize the value. */
91 mode
= mode
< 0 ? -1 : (mode
== 0 ? 0 : 1);
93 #if defined SHARED && defined _LIBC \
94 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
95 if (__builtin_expect (&_IO_stdin_used
== NULL
, 0)
96 && (fp
== _IO_stdin
|| fp
== _IO_stdout
|| fp
== _IO_stderr
))
97 /* This is for a stream in the glibc 2.0 format. */
101 /* The orientation already has been determined. */
103 /* Or the caller simply wants to know about the current orientation. */
107 /* Set the orientation appropriately. */
110 struct _IO_codecvt
*cc
= fp
->_codecvt
= &fp
->_wide_data
->_codecvt
;
112 fp
->_wide_data
->_IO_read_ptr
= fp
->_wide_data
->_IO_read_end
;
113 fp
->_wide_data
->_IO_write_ptr
= fp
->_wide_data
->_IO_write_base
;
115 /* Get the character conversion functions based on the currently
116 selected locale for LC_CTYPE. */
119 /* Clear the state. We start all over again. */
120 memset (&fp
->_wide_data
->_IO_state
, '\0', sizeof (__mbstate_t
));
121 memset (&fp
->_wide_data
->_IO_last_state
, '\0', sizeof (__mbstate_t
));
123 struct gconv_fcts fcts
;
124 __wcsmbs_clone_conv (&fcts
);
125 assert (fcts
.towc_nsteps
== 1);
126 assert (fcts
.tomb_nsteps
== 1);
128 /* The functions are always the same. */
129 *cc
= __libio_codecvt
;
131 cc
->__cd_in
.__cd
.__nsteps
= fcts
.towc_nsteps
;
132 cc
->__cd_in
.__cd
.__steps
= fcts
.towc
;
134 cc
->__cd_in
.__cd
.__data
[0].__invocation_counter
= 0;
135 cc
->__cd_in
.__cd
.__data
[0].__internal_use
= 1;
136 cc
->__cd_in
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
137 cc
->__cd_in
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
139 cc
->__cd_out
.__cd
.__nsteps
= fcts
.tomb_nsteps
;
140 cc
->__cd_out
.__cd
.__steps
= fcts
.tomb
;
142 cc
->__cd_out
.__cd
.__data
[0].__invocation_counter
= 0;
143 cc
->__cd_out
.__cd
.__data
[0].__internal_use
= 1;
144 cc
->__cd_out
.__cd
.__data
[0].__flags
145 = __GCONV_IS_LAST
| __GCONV_TRANSLIT
;
146 cc
->__cd_out
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
149 # ifdef _GLIBCPP_USE_WCHAR_T
151 /* Determine internal and external character sets.
153 XXX For now we make our life easy: we assume a fixed internal
154 encoding (as most sane systems have; hi HP/UX!). If somebody
155 cares about systems which changing internal charsets they
156 should come up with a solution for the determination of the
157 currently used internal character set. */
158 const char *internal_ccs
= _G_INTERNAL_CCS
;
159 const char *external_ccs
= NULL
;
161 # ifdef HAVE_NL_LANGINFO
162 external_ccs
= nl_langinfo (CODESET
);
164 if (external_ccs
== NULL
)
165 external_ccs
= "ISO-8859-1";
167 cc
->__cd_in
= iconv_open (internal_ccs
, external_ccs
);
168 if (cc
->__cd_in
!= (iconv_t
) -1)
169 cc
->__cd_out
= iconv_open (external_ccs
, internal_ccs
);
171 if (cc
->__cd_in
== (iconv_t
) -1 || cc
->__cd_out
== (iconv_t
) -1)
173 if (cc
->__cd_in
!= (iconv_t
) -1)
174 iconv_close (cc
->__cd_in
);
180 # error "somehow determine this from LC_CTYPE"
184 /* From now on use the wide character callback functions. */
185 _IO_JUMPS_FILE_plus (fp
) = fp
->_wide_data
->_wide_vtable
;
188 /* Set the mode now. */
195 static enum __codecvt_result
196 do_out (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
197 const wchar_t *from_start
, const wchar_t *from_end
,
198 const wchar_t **from_stop
, char *to_start
, char *to_end
,
201 enum __codecvt_result result
;
204 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
207 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
209 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
210 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
211 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
213 __gconv_fct fct
= gs
->__fct
;
215 if (gs
->__shlib_handle
!= NULL
)
219 status
= DL_CALL_FCT (fct
,
220 (gs
, codecvt
->__cd_out
.__cd
.__data
, &from_start_copy
,
221 (const unsigned char *) from_end
, NULL
,
224 *from_stop
= (wchar_t *) from_start_copy
;
225 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
230 case __GCONV_EMPTY_INPUT
:
231 result
= __codecvt_ok
;
234 case __GCONV_FULL_OUTPUT
:
235 case __GCONV_INCOMPLETE_INPUT
:
236 result
= __codecvt_partial
;
240 result
= __codecvt_error
;
244 # ifdef _GLIBCPP_USE_WCHAR_T
246 const char *from_start_copy
= (const char *) from_start
;
247 size_t from_len
= from_end
- from_start
;
248 char *to_start_copy
= to_start
;
249 size_t to_len
= to_end
- to_start
;
250 res
= iconv (codecvt
->__cd_out
, &from_start_copy
, &from_len
,
251 &to_start_copy
, &to_len
);
253 if (res
== 0 || from_len
== 0)
254 result
= __codecvt_ok
;
255 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
256 result
= __codecvt_partial
;
258 result
= __codecvt_error
;
261 /* Decide what to do. */
262 result
= __codecvt_error
;
270 static enum __codecvt_result
271 do_unshift (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
272 char *to_start
, char *to_end
, char **to_stop
)
274 enum __codecvt_result result
;
277 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
281 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
282 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
283 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
285 __gconv_fct fct
= gs
->__fct
;
287 if (gs
->__shlib_handle
!= NULL
)
291 status
= DL_CALL_FCT (fct
,
292 (gs
, codecvt
->__cd_out
.__cd
.__data
, NULL
, NULL
,
293 NULL
, &dummy
, 1, 0));
295 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
300 case __GCONV_EMPTY_INPUT
:
301 result
= __codecvt_ok
;
304 case __GCONV_FULL_OUTPUT
:
305 case __GCONV_INCOMPLETE_INPUT
:
306 result
= __codecvt_partial
;
310 result
= __codecvt_error
;
314 # ifdef _GLIBCPP_USE_WCHAR_T
316 char *to_start_copy
= (char *) to_start
;
317 size_t to_len
= to_end
- to_start
;
319 res
= iconv (codecvt
->__cd_out
, NULL
, NULL
, &to_start_copy
, &to_len
);
322 result
= __codecvt_ok
;
323 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
324 result
= __codecvt_partial
;
326 result
= __codecvt_error
;
328 /* Decide what to do. */
329 result
= __codecvt_error
;
337 static enum __codecvt_result
338 do_in (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
339 const char *from_start
, const char *from_end
, const char **from_stop
,
340 wchar_t *to_start
, wchar_t *to_end
, wchar_t **to_stop
)
342 enum __codecvt_result result
;
345 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
348 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
350 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
351 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
352 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
354 __gconv_fct fct
= gs
->__fct
;
356 if (gs
->__shlib_handle
!= NULL
)
360 status
= DL_CALL_FCT (fct
,
361 (gs
, codecvt
->__cd_in
.__cd
.__data
, &from_start_copy
,
362 (const unsigned char *) from_end
, NULL
,
365 *from_stop
= (const char *) from_start_copy
;
366 *to_stop
= (wchar_t *) codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
;
371 case __GCONV_EMPTY_INPUT
:
372 result
= __codecvt_ok
;
375 case __GCONV_FULL_OUTPUT
:
376 case __GCONV_INCOMPLETE_INPUT
:
377 result
= __codecvt_partial
;
381 result
= __codecvt_error
;
385 # ifdef _GLIBCPP_USE_WCHAR_T
387 const char *from_start_copy
= (const char *) from_start
;
388 size_t from_len
= from_end
- from_start
;
389 char *to_start_copy
= (char *) from_start
;
390 size_t to_len
= to_end
- to_start
;
392 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
393 &to_start_copy
, &to_len
);
396 result
= __codecvt_ok
;
397 else if (to_len
== 0)
398 result
= __codecvt_partial
;
399 else if (from_len
< codecvt
->__codecvt_do_max_length (codecvt
))
400 result
= __codecvt_partial
;
402 result
= __codecvt_error
;
404 /* Decide what to do. */
405 result
= __codecvt_error
;
414 do_encoding (struct _IO_codecvt
*codecvt
)
417 /* See whether the encoding is stateful. */
418 if (codecvt
->__cd_in
.__cd
.__steps
[0].__stateful
)
420 /* Fortunately not. Now determine the input bytes for the conversion
421 necessary for each wide character. */
422 if (codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
423 != codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
)
424 /* Not a constant value. */
427 return codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
;
429 /* Worst case scenario. */
436 do_always_noconv (struct _IO_codecvt
*codecvt
)
443 do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
444 const char *from_start
, const char *from_end
, _IO_size_t max
)
448 const unsigned char *cp
= (const unsigned char *) from_start
;
450 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
453 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_buf
;
454 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) &to_buf
[max
];
455 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
457 __gconv_fct fct
= gs
->__fct
;
459 if (gs
->__shlib_handle
!= NULL
)
464 (gs
, codecvt
->__cd_in
.__cd
.__data
, &cp
,
465 (const unsigned char *) from_end
, NULL
,
468 result
= cp
- (const unsigned char *) from_start
;
470 # ifdef _GLIBCPP_USE_WCHAR_T
471 const char *from_start_copy
= (const char *) from_start
;
472 size_t from_len
= from_end
- from_start
;
475 char *to_start
= (char *) to_buf
;
477 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
480 result
= from_start_copy
- (char *) from_start
;
482 /* Decide what to do. */
492 do_max_length (struct _IO_codecvt
*codecvt
)
495 return codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
;