1 /* Copyright (C) 1999-2014 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
37 # include <langinfo.h>
38 # include <locale/localeinfo.h>
39 # include <wcsmbs/wcsmbsload.h>
40 # include <iconv/gconv_int.h>
41 # include <shlib-compat.h>
46 /* Prototypes of libio's codecvt functions. */
47 static enum __codecvt_result
do_out (struct _IO_codecvt
*codecvt
,
49 const wchar_t *from_start
,
50 const wchar_t *from_end
,
51 const wchar_t **from_stop
, char *to_start
,
52 char *to_end
, char **to_stop
);
53 static enum __codecvt_result
do_unshift (struct _IO_codecvt
*codecvt
,
54 __mbstate_t
*statep
, char *to_start
,
55 char *to_end
, char **to_stop
);
56 static enum __codecvt_result
do_in (struct _IO_codecvt
*codecvt
,
58 const char *from_start
,
60 const char **from_stop
, wchar_t *to_start
,
61 wchar_t *to_end
, wchar_t **to_stop
);
62 static int do_encoding (struct _IO_codecvt
*codecvt
);
63 static int do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
64 const char *from_start
,
65 const char *from_end
, _IO_size_t max
);
66 static int do_max_length (struct _IO_codecvt
*codecvt
);
67 static int do_always_noconv (struct _IO_codecvt
*codecvt
);
70 /* The functions used in `codecvt' for libio are always the same. */
71 const struct _IO_codecvt __libio_codecvt
=
73 .__codecvt_destr
= NULL
, /* Destructor, never used. */
74 .__codecvt_do_out
= do_out
,
75 .__codecvt_do_unshift
= do_unshift
,
76 .__codecvt_do_in
= do_in
,
77 .__codecvt_do_encoding
= do_encoding
,
78 .__codecvt_do_always_noconv
= do_always_noconv
,
79 .__codecvt_do_length
= do_length
,
80 .__codecvt_do_max_length
= do_max_length
85 const struct __gconv_trans_data __libio_translit attribute_hidden
=
87 .__trans_fct
= __gconv_transliterate
92 /* Return orientation of stream. If mode is nonzero try to change
93 the orientation first. */
100 /* Normalize the value. */
101 mode
= mode
< 0 ? -1 : (mode
== 0 ? 0 : 1);
103 #if defined SHARED && defined _LIBC \
104 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
105 if (__builtin_expect (&_IO_stdin_used
== NULL
, 0)
106 && (fp
== _IO_stdin
|| fp
== _IO_stdout
|| fp
== _IO_stderr
))
107 /* This is for a stream in the glibc 2.0 format. */
111 /* The orientation already has been determined. */
113 /* Or the caller simply wants to know about the current orientation. */
117 /* Set the orientation appropriately. */
120 struct _IO_codecvt
*cc
= fp
->_codecvt
= &fp
->_wide_data
->_codecvt
;
122 fp
->_wide_data
->_IO_read_ptr
= fp
->_wide_data
->_IO_read_end
;
123 fp
->_wide_data
->_IO_write_ptr
= fp
->_wide_data
->_IO_write_base
;
125 /* Get the character conversion functions based on the currently
126 selected locale for LC_CTYPE. */
129 /* Clear the state. We start all over again. */
130 memset (&fp
->_wide_data
->_IO_state
, '\0', sizeof (__mbstate_t
));
131 memset (&fp
->_wide_data
->_IO_last_state
, '\0', sizeof (__mbstate_t
));
133 struct gconv_fcts fcts
;
134 __wcsmbs_clone_conv (&fcts
);
135 assert (fcts
.towc_nsteps
== 1);
136 assert (fcts
.tomb_nsteps
== 1);
138 /* The functions are always the same. */
139 *cc
= __libio_codecvt
;
141 cc
->__cd_in
.__cd
.__nsteps
= fcts
.towc_nsteps
;
142 cc
->__cd_in
.__cd
.__steps
= fcts
.towc
;
144 cc
->__cd_in
.__cd
.__data
[0].__invocation_counter
= 0;
145 cc
->__cd_in
.__cd
.__data
[0].__internal_use
= 1;
146 cc
->__cd_in
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
147 cc
->__cd_in
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
149 /* XXX For now no transliteration. */
150 cc
->__cd_in
.__cd
.__data
[0].__trans
= NULL
;
152 cc
->__cd_out
.__cd
.__nsteps
= fcts
.tomb_nsteps
;
153 cc
->__cd_out
.__cd
.__steps
= fcts
.tomb
;
155 cc
->__cd_out
.__cd
.__data
[0].__invocation_counter
= 0;
156 cc
->__cd_out
.__cd
.__data
[0].__internal_use
= 1;
157 cc
->__cd_out
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
158 cc
->__cd_out
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
160 /* And now the transliteration. */
161 cc
->__cd_out
.__cd
.__data
[0].__trans
162 = (struct __gconv_trans_data
*) &__libio_translit
;
165 # ifdef _GLIBCPP_USE_WCHAR_T
167 /* Determine internal and external character sets.
169 XXX For now we make our life easy: we assume a fixed internal
170 encoding (as most sane systems have; hi HP/UX!). If somebody
171 cares about systems which changing internal charsets they
172 should come up with a solution for the determination of the
173 currently used internal character set. */
174 const char *internal_ccs
= _G_INTERNAL_CCS
;
175 const char *external_ccs
= NULL
;
177 # ifdef HAVE_NL_LANGINFO
178 external_ccs
= nl_langinfo (CODESET
);
180 if (external_ccs
== NULL
)
181 external_ccs
= "ISO-8859-1";
183 cc
->__cd_in
= iconv_open (internal_ccs
, external_ccs
);
184 if (cc
->__cd_in
!= (iconv_t
) -1)
185 cc
->__cd_out
= iconv_open (external_ccs
, internal_ccs
);
187 if (cc
->__cd_in
== (iconv_t
) -1 || cc
->__cd_out
== (iconv_t
) -1)
189 if (cc
->__cd_in
!= (iconv_t
) -1)
190 iconv_close (cc
->__cd_in
);
196 # error "somehow determine this from LC_CTYPE"
200 /* From now on use the wide character callback functions. */
201 ((struct _IO_FILE_plus
*) fp
)->vtable
= fp
->_wide_data
->_wide_vtable
;
204 /* Set the mode now. */
211 static enum __codecvt_result
212 do_out (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
213 const wchar_t *from_start
, const wchar_t *from_end
,
214 const wchar_t **from_stop
, char *to_start
, char *to_end
,
217 enum __codecvt_result result
;
220 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
223 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
225 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
226 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
227 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
229 __gconv_fct fct
= gs
->__fct
;
231 if (gs
->__shlib_handle
!= NULL
)
235 status
= DL_CALL_FCT (fct
,
236 (gs
, codecvt
->__cd_out
.__cd
.__data
, &from_start_copy
,
237 (const unsigned char *) from_end
, NULL
,
240 *from_stop
= (wchar_t *) from_start_copy
;
241 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
246 case __GCONV_EMPTY_INPUT
:
247 result
= __codecvt_ok
;
250 case __GCONV_FULL_OUTPUT
:
251 case __GCONV_INCOMPLETE_INPUT
:
252 result
= __codecvt_partial
;
256 result
= __codecvt_error
;
260 # ifdef _GLIBCPP_USE_WCHAR_T
262 const char *from_start_copy
= (const char *) from_start
;
263 size_t from_len
= from_end
- from_start
;
264 char *to_start_copy
= to_start
;
265 size_t to_len
= to_end
- to_start
;
266 res
= iconv (codecvt
->__cd_out
, &from_start_copy
, &from_len
,
267 &to_start_copy
, &to_len
);
269 if (res
== 0 || from_len
== 0)
270 result
= __codecvt_ok
;
271 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
272 result
= __codecvt_partial
;
274 result
= __codecvt_error
;
277 /* Decide what to do. */
278 result
= __codecvt_error
;
286 static enum __codecvt_result
287 do_unshift (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
288 char *to_start
, char *to_end
, char **to_stop
)
290 enum __codecvt_result result
;
293 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
297 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
298 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
299 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
301 __gconv_fct fct
= gs
->__fct
;
303 if (gs
->__shlib_handle
!= NULL
)
307 status
= DL_CALL_FCT (fct
,
308 (gs
, codecvt
->__cd_out
.__cd
.__data
, NULL
, NULL
,
309 NULL
, &dummy
, 1, 0));
311 *to_stop
= (char *) codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
316 case __GCONV_EMPTY_INPUT
:
317 result
= __codecvt_ok
;
320 case __GCONV_FULL_OUTPUT
:
321 case __GCONV_INCOMPLETE_INPUT
:
322 result
= __codecvt_partial
;
326 result
= __codecvt_error
;
330 # ifdef _GLIBCPP_USE_WCHAR_T
332 char *to_start_copy
= (char *) to_start
;
333 size_t to_len
= to_end
- to_start
;
335 res
= iconv (codecvt
->__cd_out
, NULL
, NULL
, &to_start_copy
, &to_len
);
338 result
= __codecvt_ok
;
339 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
340 result
= __codecvt_partial
;
342 result
= __codecvt_error
;
344 /* Decide what to do. */
345 result
= __codecvt_error
;
353 static enum __codecvt_result
354 do_in (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
355 const char *from_start
, const char *from_end
, const char **from_stop
,
356 wchar_t *to_start
, wchar_t *to_end
, wchar_t **to_stop
)
358 enum __codecvt_result result
;
361 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
364 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
366 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_start
;
367 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) to_end
;
368 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
370 __gconv_fct fct
= gs
->__fct
;
372 if (gs
->__shlib_handle
!= NULL
)
376 status
= DL_CALL_FCT (fct
,
377 (gs
, codecvt
->__cd_in
.__cd
.__data
, &from_start_copy
,
378 (const unsigned char *) from_end
, NULL
,
381 *from_stop
= (const char *) from_start_copy
;
382 *to_stop
= (wchar_t *) codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
;
387 case __GCONV_EMPTY_INPUT
:
388 result
= __codecvt_ok
;
391 case __GCONV_FULL_OUTPUT
:
392 case __GCONV_INCOMPLETE_INPUT
:
393 result
= __codecvt_partial
;
397 result
= __codecvt_error
;
401 # ifdef _GLIBCPP_USE_WCHAR_T
403 const char *from_start_copy
= (const char *) from_start
;
404 size_t from_len
= from_end
- from_start
;
405 char *to_start_copy
= (char *) from_start
;
406 size_t to_len
= to_end
- to_start
;
408 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
409 &to_start_copy
, &to_len
);
412 result
= __codecvt_ok
;
413 else if (to_len
== 0)
414 result
= __codecvt_partial
;
415 else if (from_len
< codecvt
->__codecvt_do_max_length (codecvt
))
416 result
= __codecvt_partial
;
418 result
= __codecvt_error
;
420 /* Decide what to do. */
421 result
= __codecvt_error
;
430 do_encoding (struct _IO_codecvt
*codecvt
)
433 /* See whether the encoding is stateful. */
434 if (codecvt
->__cd_in
.__cd
.__steps
[0].__stateful
)
436 /* Fortunately not. Now determine the input bytes for the conversion
437 necessary for each wide character. */
438 if (codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
439 != codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
)
440 /* Not a constant value. */
443 return codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
;
445 /* Worst case scenario. */
452 do_always_noconv (struct _IO_codecvt
*codecvt
)
459 do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
460 const char *from_start
, const char *from_end
, _IO_size_t max
)
464 const unsigned char *cp
= (const unsigned char *) from_start
;
466 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
469 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (unsigned char *) to_buf
;
470 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (unsigned char *) &to_buf
[max
];
471 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
473 __gconv_fct fct
= gs
->__fct
;
475 if (gs
->__shlib_handle
!= NULL
)
480 (gs
, codecvt
->__cd_in
.__cd
.__data
, &cp
,
481 (const unsigned char *) from_end
, NULL
,
484 result
= cp
- (const unsigned char *) from_start
;
486 # ifdef _GLIBCPP_USE_WCHAR_T
487 const char *from_start_copy
= (const char *) from_start
;
488 size_t from_len
= from_end
- from_start
;
491 char *to_start
= (char *) to_buf
;
493 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
496 result
= from_start_copy
- (char *) from_start
;
498 /* Decide what to do. */
508 do_max_length (struct _IO_codecvt
*codecvt
)
511 return codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
;