1 /* Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
2 This file is part of the GNU IO Library.
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2, or (at
7 your option) any later version.
9 This library is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this library; see the file COPYING. If not, write to
16 the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 As a special exception, if you link this library with files
20 compiled with a GNU compiler to produce an executable, this does
21 not cause the resulting executable to be covered by the GNU General
22 Public License. This exception does not however invalidate any
23 other reasons why the executable file might be covered by the GNU
24 General Public License. */
35 # include <langinfo.h>
36 # include <locale/localeinfo.h>
37 # include <wcsmbs/wcsmbsload.h>
38 # include <iconv/gconv_int.h>
39 # include <shlib-compat.h>
43 /* Prototypes of libio's codecvt functions. */
44 static enum __codecvt_result
do_out (struct _IO_codecvt
*codecvt
,
46 const wchar_t *from_start
,
47 const wchar_t *from_end
,
48 const wchar_t **from_stop
, char *to_start
,
49 char *to_end
, char **to_stop
);
50 static enum __codecvt_result
do_unshift (struct _IO_codecvt
*codecvt
,
51 __mbstate_t
*statep
, char *to_start
,
52 char *to_end
, char **to_stop
);
53 static enum __codecvt_result
do_in (struct _IO_codecvt
*codecvt
,
55 const char *from_start
,
57 const char **from_stop
, wchar_t *to_start
,
58 wchar_t *to_end
, wchar_t **to_stop
);
59 static int do_encoding (struct _IO_codecvt
*codecvt
);
60 static int do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
61 const char *from_start
,
62 const char *from_end
, _IO_size_t max
);
63 static int do_max_length (struct _IO_codecvt
*codecvt
);
64 static int do_always_noconv (struct _IO_codecvt
*codecvt
);
67 /* The functions used in `codecvt' for libio are always the same. */
68 struct _IO_codecvt __libio_codecvt
=
70 .__codecvt_destr
= NULL
, /* Destructor, never used. */
71 .__codecvt_do_out
= do_out
,
72 .__codecvt_do_unshift
= do_unshift
,
73 .__codecvt_do_in
= do_in
,
74 .__codecvt_do_encoding
= do_encoding
,
75 .__codecvt_do_always_noconv
= do_always_noconv
,
76 .__codecvt_do_length
= do_length
,
77 .__codecvt_do_max_length
= do_max_length
82 static struct __gconv_trans_data libio_translit
=
84 .__trans_fct
= __gconv_transliterate
89 /* Return orientation of stream. If mode is nonzero try to change
90 the orientation first. */
97 /* Normalize the value. */
98 mode
= mode
< 0 ? -1 : (mode
== 0 ? 0 : 1);
101 /* The caller simply wants to know about the current orientation. */
104 #if defined SHARED && defined _LIBC \
105 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
106 if (__builtin_expect (&_IO_stdin_used
== NULL
, 0)
107 && (fp
== _IO_stdin
|| fp
== _IO_stdout
|| fp
== _IO_stderr
))
108 /* This is for a stream in the glibc 2.0 format. */
113 /* The orientation already has been determined. */
116 /* Set the orientation appropriately. */
119 struct _IO_codecvt
*cc
= fp
->_codecvt
= &fp
->_wide_data
->_codecvt
;
121 fp
->_wide_data
->_IO_read_ptr
= fp
->_wide_data
->_IO_read_end
;
122 fp
->_wide_data
->_IO_write_ptr
= fp
->_wide_data
->_IO_write_base
;
124 /* Get the character conversion functions based on the currently
125 selected locale for LC_CTYPE. */
128 struct gconv_fcts fcts
;
130 /* Clear the state. We start all over again. */
131 memset (&fp
->_wide_data
->_IO_state
, '\0', sizeof (__mbstate_t
));
132 memset (&fp
->_wide_data
->_IO_last_state
, '\0', sizeof (__mbstate_t
));
134 __wcsmbs_clone_conv (&fcts
);
136 /* The functions are always the same. */
137 *cc
= __libio_codecvt
;
139 cc
->__cd_in
.__cd
.__nsteps
= 1; /* Only one step allowed. */
140 cc
->__cd_in
.__cd
.__steps
= fcts
.towc
;
142 cc
->__cd_in
.__cd
.__data
[0].__invocation_counter
= 0;
143 cc
->__cd_in
.__cd
.__data
[0].__internal_use
= 1;
144 cc
->__cd_in
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
145 cc
->__cd_in
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
147 /* XXX For now no transliteration. */
148 cc
->__cd_in
.__cd
.__data
[0].__trans
= NULL
;
150 cc
->__cd_out
.__cd
.__nsteps
= 1; /* Only one step allowed. */
151 cc
->__cd_out
.__cd
.__steps
= fcts
.tomb
;
153 cc
->__cd_out
.__cd
.__data
[0].__invocation_counter
= 0;
154 cc
->__cd_out
.__cd
.__data
[0].__internal_use
= 1;
155 cc
->__cd_out
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
156 cc
->__cd_out
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
158 /* And now the transliteration. */
160 cc
->__cd_out
.__cd
.__data
[0].__trans
= &libio_translit
;
162 cc
->__cd_out
.__cd
.__data
[0].__trans
= NULL
;
166 # ifdef _GLIBCPP_USE_WCHAR_T
168 /* Determine internal and external character sets.
170 XXX For now we make our life easy: we assume a fixed internal
171 encoding (as most sane systems have; hi HP/UX!). If somebody
172 cares about systems which changing internal charsets they
173 should come up with a solution for the determination of the
174 currently used internal character set. */
175 const char *internal_ccs
= _G_INTERNAL_CCS
;
176 const char *external_ccs
= NULL
;
178 # ifdef HAVE_NL_LANGINFO
179 external_ccs
= nl_langinfo (CODESET
);
181 if (external_ccs
== NULL
)
182 external_ccs
= "ISO-8859-1";
184 cc
->__cd_in
= iconv_open (internal_ccs
, external_ccs
);
185 if (cc
->__cd_in
!= (iconv_t
) -1)
186 cc
->__cd_out
= iconv_open (external_ccs
, internal_ccs
);
188 if (cc
->__cd_in
== (iconv_t
) -1 || cc
->__cd_out
== (iconv_t
) -1)
193 # error "somehow determine this from LC_CTYPE"
197 /* From now on use the wide character callback functions. */
198 ((struct _IO_FILE_plus
*) fp
)->vtable
= fp
->_wide_data
->_wide_vtable
;
201 /* Set the mode now. */
208 static enum __codecvt_result
209 do_out (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
210 const wchar_t *from_start
, const wchar_t *from_end
,
211 const wchar_t **from_stop
, char *to_start
, char *to_end
,
214 enum __codecvt_result result
;
217 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
220 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
222 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= to_start
;
223 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= to_end
;
224 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
226 status
= DL_CALL_FCT (gs
->__fct
,
227 (gs
, codecvt
->__cd_out
.__cd
.__data
, &from_start_copy
,
228 (const unsigned char *) from_end
, NULL
,
231 *from_stop
= (wchar_t *) from_start_copy
;
232 *to_stop
= codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
237 case __GCONV_EMPTY_INPUT
:
238 result
= __codecvt_ok
;
241 case __GCONV_FULL_OUTPUT
:
242 case __GCONV_INCOMPLETE_INPUT
:
243 result
= __codecvt_partial
;
247 result
= __codecvt_error
;
251 # ifdef _GLIBCPP_USE_WCHAR_T
253 const char *from_start_copy
= (const char *) from_start
;
254 size_t from_len
= from_end
- from_start
;
255 char *to_start_copy
= to_start
;
256 size_t to_len
= to_end
- to_start
;
257 res
= iconv (codecvt
->__cd_out
, &from_start_copy
, &from_len
,
258 &to_start_copy
, &to_len
);
260 if (res
== 0 || from_len
== 0)
261 result
= __codecvt_ok
;
262 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
263 result
= __codecvt_partial
;
265 result
= __codecvt_error
;
268 /* Decide what to do. */
269 result
= __codecvt_error
;
277 static enum __codecvt_result
278 do_unshift (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
279 char *to_start
, char *to_end
, char **to_stop
)
281 enum __codecvt_result result
;
284 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
288 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= to_start
;
289 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= to_end
;
290 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
292 status
= DL_CALL_FCT (gs
->__fct
,
293 (gs
, codecvt
->__cd_out
.__cd
.__data
, NULL
, NULL
,
294 NULL
, &dummy
, 1, 0));
296 *to_stop
= codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
301 case __GCONV_EMPTY_INPUT
:
302 result
= __codecvt_ok
;
305 case __GCONV_FULL_OUTPUT
:
306 case __GCONV_INCOMPLETE_INPUT
:
307 result
= __codecvt_partial
;
311 result
= __codecvt_error
;
315 # ifdef _GLIBCPP_USE_WCHAR_T
317 char *to_start_copy
= (char *) to_start
;
318 size_t to_len
= to_end
- to_start
;
320 res
= iconv (codecvt
->__cd_out
, NULL
, NULL
, &to_start_copy
, &to_len
);
323 result
= __codecvt_ok
;
324 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
325 result
= __codecvt_partial
;
327 result
= __codecvt_error
;
329 /* Decide what to do. */
330 result
= __codecvt_error
;
338 static enum __codecvt_result
339 do_in (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
340 const char *from_start
, const char *from_end
, const char **from_stop
,
341 wchar_t *to_start
, wchar_t *to_end
, wchar_t **to_stop
)
343 enum __codecvt_result result
;
346 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
349 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
351 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (char *) to_start
;
352 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (char *) to_end
;
353 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
355 status
= DL_CALL_FCT (gs
->__fct
,
356 (gs
, codecvt
->__cd_in
.__cd
.__data
, &from_start_copy
,
357 from_end
, NULL
, &dummy
, 0, 0));
359 *from_stop
= from_start_copy
;
360 *to_stop
= (wchar_t *) codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
;
365 case __GCONV_EMPTY_INPUT
:
366 result
= __codecvt_ok
;
369 case __GCONV_FULL_OUTPUT
:
370 case __GCONV_INCOMPLETE_INPUT
:
371 result
= __codecvt_partial
;
375 result
= __codecvt_error
;
379 # ifdef _GLIBCPP_USE_WCHAR_T
381 const char *from_start_copy
= (const char *) from_start
;
382 size_t from_len
= from_end
- from_start
;
383 char *to_start_copy
= (char *) from_start
;
384 size_t to_len
= to_end
- to_start
;
386 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
387 &to_start_copy
, &to_len
);
390 result
= __codecvt_ok
;
391 else if (to_len
== 0)
392 result
= __codecvt_partial
;
393 else if (from_len
< codecvt
->__codecvt_do_max_length (codecvt
))
394 result
= __codecvt_partial
;
396 result
= __codecvt_error
;
398 /* Decide what to do. */
399 result
= __codecvt_error
;
408 do_encoding (struct _IO_codecvt
*codecvt
)
411 /* See whether the encoding is stateful. */
412 if (codecvt
->__cd_in
.__cd
.__steps
[0].__stateful
)
414 /* Fortunately not. Now determine the input bytes for the conversion
415 necessary for each wide character. */
416 if (codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
417 != codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
)
418 /* Not a constant value. */
421 return codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
;
423 /* Worst case scenario. */
430 do_always_noconv (struct _IO_codecvt
*codecvt
)
437 do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
438 const char *from_start
, const char *from_end
, _IO_size_t max
)
442 const unsigned char *cp
= (const unsigned char *) from_start
;
444 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
448 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (char *) to_buf
;
449 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (char *) &to_buf
[max
];
450 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
452 status
= DL_CALL_FCT (gs
->__fct
,
453 (gs
, codecvt
->__cd_in
.__cd
.__data
, &cp
, from_end
,
454 NULL
, &dummy
, 0, 0));
456 result
= cp
- (const unsigned char *) from_start
;
458 # ifdef _GLIBCPP_USE_WCHAR_T
459 const char *from_start_copy
= (const char *) from_start
;
460 size_t from_len
= from_end
- from_start
;
463 char *to_start
= (char *) to_buf
;
465 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
468 result
= from_start_copy
- (char *) from_start
;
470 /* Decide what to do. */
480 do_max_length (struct _IO_codecvt
*codecvt
)
483 return codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
;