1 /* Copyright (C) 1999, 2000 Free Software Foundation, Inc.
2 This file is part of the GNU IO Library.
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License as
6 published by the Free Software Foundation; either version 2, or (at
7 your option) any later version.
9 This library is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this library; see the file COPYING. If not, write to
16 the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 As a special exception, if you link this library with files
20 compiled with a GNU compiler to produce an executable, this does
21 not cause the resulting executable to be covered by the GNU General
22 Public License. This exception does not however invalidate any
23 other reasons why the executable file might be covered by the GNU
24 General Public License. */
35 # include <langinfo.h>
36 # include <locale/localeinfo.h>
37 # include <wcsmbs/wcsmbsload.h>
38 # include <iconv/gconv_int.h>
39 # include <shlib-compat.h>
43 /* Prototypes of libio's codecvt functions. */
44 static enum __codecvt_result
do_out (struct _IO_codecvt
*codecvt
,
46 const wchar_t *from_start
,
47 const wchar_t *from_end
,
48 const wchar_t **from_stop
, char *to_start
,
49 char *to_end
, char **to_stop
);
50 static enum __codecvt_result
do_unshift (struct _IO_codecvt
*codecvt
,
51 __mbstate_t
*statep
, char *to_start
,
52 char *to_end
, char **to_stop
);
53 static enum __codecvt_result
do_in (struct _IO_codecvt
*codecvt
,
55 const char *from_start
,
57 const char **from_stop
, wchar_t *to_start
,
58 wchar_t *to_end
, wchar_t **to_stop
);
59 static int do_encoding (struct _IO_codecvt
*codecvt
);
60 static int do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
61 const char *from_start
,
62 const char *from_end
, _IO_size_t max
);
63 static int do_max_length (struct _IO_codecvt
*codecvt
);
64 static int do_always_noconv (struct _IO_codecvt
*codecvt
);
67 /* The functions used in `codecvt' for libio are always the same. */
68 struct _IO_codecvt __libio_codecvt
=
70 .__codecvt_destr
= NULL
, /* Destructor, never used. */
71 .__codecvt_do_out
= do_out
,
72 .__codecvt_do_unshift
= do_unshift
,
73 .__codecvt_do_in
= do_in
,
74 .__codecvt_do_encoding
= do_encoding
,
75 .__codecvt_do_always_noconv
= do_always_noconv
,
76 .__codecvt_do_length
= do_length
,
77 .__codecvt_do_max_length
= do_max_length
82 static struct __gconv_trans_data libio_translit
=
84 .__trans_fct
= __gconv_transliterate
89 /* Return orientation of stream. If mode is nonzero try to change
90 the orientation first. */
97 /* Normalize the value. */
98 mode
= mode
< 0 ? -1 : (mode
== 0 ? 0 : 1);
101 /* The caller simply wants to know about the current orientation. */
104 #if defined SHARED && defined _LIBC \
105 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
106 if (__builtin_expect (&_IO_stdin_used
== NULL
, 0)
107 && (fp
== _IO_stdin
|| fp
== _IO_stdout
|| fp
== _IO_stderr
))
108 /* This is for a stream in the glibc 2.0 format. */
113 /* The orientation already has been determined. */
116 /* Set the orientation appropriately. */
119 struct _IO_codecvt
*cc
= fp
->_codecvt
= &fp
->_wide_data
->_codecvt
;
121 fp
->_wide_data
->_IO_read_ptr
= fp
->_wide_data
->_IO_read_end
;
122 fp
->_wide_data
->_IO_write_ptr
= fp
->_wide_data
->_IO_write_base
;
124 /* Get the character conversion functions based on the currently
125 selected locale for LC_CTYPE. */
128 struct gconv_fcts fcts
;
130 /* Clear the state. We start all over again. */
131 memset (&fp
->_wide_data
->_IO_state
, '\0', sizeof (__mbstate_t
));
132 memset (&fp
->_wide_data
->_IO_last_state
, '\0', sizeof (__mbstate_t
));
134 __wcsmbs_clone_conv (&fcts
);
136 /* The functions are always the same. */
137 *cc
= __libio_codecvt
;
139 cc
->__cd_in
.__cd
.__nsteps
= 1; /* Only one step allowed. */
140 cc
->__cd_in
.__cd
.__steps
= fcts
.towc
;
142 cc
->__cd_in
.__cd
.__data
[0].__invocation_counter
= 0;
143 cc
->__cd_in
.__cd
.__data
[0].__internal_use
= 1;
144 cc
->__cd_in
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
145 cc
->__cd_in
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
147 /* XXX For now no transliteration. */
148 cc
->__cd_in
.__cd
.__data
[0].__trans
= NULL
;
150 cc
->__cd_out
.__cd
.__nsteps
= 1; /* Only one step allowed. */
151 cc
->__cd_out
.__cd
.__steps
= fcts
.tomb
;
153 cc
->__cd_out
.__cd
.__data
[0].__invocation_counter
= 0;
154 cc
->__cd_out
.__cd
.__data
[0].__internal_use
= 1;
155 cc
->__cd_out
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
156 cc
->__cd_out
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
158 /* And now the transliteration. */
160 cc
->__cd_out
.__cd
.__data
[0].__trans
= &libio_translit
;
162 cc
->__cd_out
.__cd
.__data
[0].__trans
= NULL
;
166 # ifdef _GLIBCPP_USE_WCHAR_T
168 /* Determine internal and external character sets.
170 XXX For now we make our life easy: we assume a fixed internal
171 encoding (as most sane systems have; hi HP/UX!). If somebody
172 cares about systems which changing internal charsets they
173 should come up with a solution for the determination of the
174 currently used internal character set. */
175 const char *internal_ccs
= _G_INTERNAL_CCS
;
176 const char *external_ccs
= NULL
;
178 # ifdef HAVE_NL_LANGINFO
179 external_ccs
= nl_langinfo (CODESET
);
181 if (external_ccs
== NULL
)
182 external_ccs
= "ISO-8859-1";
184 cc
->__cd_in
= iconv_open (internal_ccs
, external_ccs
);
185 if (cc
->__cd_in
!= (iconv_t
) -1)
186 cc
->__cd_out
= iconv_open (external_ccs
, internal_ccs
);
188 if (cc
->__cd_in
== (iconv_t
) -1 || cc
->__cd_out
== (iconv_t
) -1)
193 # error "somehow determine this from LC_CTYPE"
197 /* From now on use the wide character callback functions. */
198 ((struct _IO_FILE_plus
*) fp
)->vtable
= fp
->_wide_data
->_wide_vtable
;
201 /* Set the mode now. */
208 weak_alias (_IO_fwide
, fwide
)
212 static enum __codecvt_result
213 do_out (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
214 const wchar_t *from_start
, const wchar_t *from_end
,
215 const wchar_t **from_stop
, char *to_start
, char *to_end
,
218 enum __codecvt_result result
;
221 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
224 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
226 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= to_start
;
227 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= to_end
;
228 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
230 status
= DL_CALL_FCT (gs
->__fct
,
231 (gs
, codecvt
->__cd_out
.__cd
.__data
, &from_start_copy
,
232 (const unsigned char *) from_end
, NULL
,
235 *from_stop
= (wchar_t *) from_start_copy
;
236 *to_stop
= codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
241 case __GCONV_EMPTY_INPUT
:
242 result
= __codecvt_ok
;
245 case __GCONV_FULL_OUTPUT
:
246 case __GCONV_INCOMPLETE_INPUT
:
247 result
= __codecvt_partial
;
251 result
= __codecvt_error
;
255 # ifdef _GLIBCPP_USE_WCHAR_T
257 const char *from_start_copy
= (const char *) from_start
;
258 size_t from_len
= from_end
- from_start
;
259 char *to_start_copy
= to_start
;
260 size_t to_len
= to_end
- to_start
;
261 res
= iconv (codecvt
->__cd_out
, &from_start_copy
, &from_len
,
262 &to_start_copy
, &to_len
);
264 if (res
== 0 || from_len
== 0)
265 result
= __codecvt_ok
;
266 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
267 result
= __codecvt_partial
;
269 result
= __codecvt_error
;
272 /* Decide what to do. */
273 result
= __codecvt_error
;
281 static enum __codecvt_result
282 do_unshift (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
283 char *to_start
, char *to_end
, char **to_stop
)
285 enum __codecvt_result result
;
288 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
292 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= to_start
;
293 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= to_end
;
294 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
296 status
= DL_CALL_FCT (gs
->__fct
,
297 (gs
, codecvt
->__cd_out
.__cd
.__data
, NULL
, NULL
,
298 NULL
, &dummy
, 1, 0));
300 *to_stop
= codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
305 case __GCONV_EMPTY_INPUT
:
306 result
= __codecvt_ok
;
309 case __GCONV_FULL_OUTPUT
:
310 case __GCONV_INCOMPLETE_INPUT
:
311 result
= __codecvt_partial
;
315 result
= __codecvt_error
;
319 # ifdef _GLIBCPP_USE_WCHAR_T
321 char *to_start_copy
= (char *) to_start
;
322 size_t to_len
= to_end
- to_start
;
324 res
= iconv (codecvt
->__cd_out
, NULL
, NULL
, &to_start_copy
, &to_len
);
327 result
= __codecvt_ok
;
328 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
329 result
= __codecvt_partial
;
331 result
= __codecvt_error
;
333 /* Decide what to do. */
334 result
= __codecvt_error
;
342 static enum __codecvt_result
343 do_in (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
344 const char *from_start
, const char *from_end
, const char **from_stop
,
345 wchar_t *to_start
, wchar_t *to_end
, wchar_t **to_stop
)
347 enum __codecvt_result result
;
350 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
353 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
355 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (char *) to_start
;
356 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (char *) to_end
;
357 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
359 status
= DL_CALL_FCT (gs
->__fct
,
360 (gs
, codecvt
->__cd_in
.__cd
.__data
, &from_start_copy
,
361 from_end
, NULL
, &dummy
, 0, 0));
363 *from_stop
= from_start_copy
;
364 *to_stop
= (wchar_t *) codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
;
369 case __GCONV_EMPTY_INPUT
:
370 result
= __codecvt_ok
;
373 case __GCONV_FULL_OUTPUT
:
374 case __GCONV_INCOMPLETE_INPUT
:
375 result
= __codecvt_partial
;
379 result
= __codecvt_error
;
383 # ifdef _GLIBCPP_USE_WCHAR_T
385 const char *from_start_copy
= (const char *) from_start
;
386 size_t from_len
= from_end
- from_start
;
387 char *to_start_copy
= (char *) from_start
;
388 size_t to_len
= to_end
- to_start
;
390 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
391 &to_start_copy
, &to_len
);
394 result
= __codecvt_ok
;
395 else if (to_len
== 0)
396 result
= __codecvt_partial
;
397 else if (from_len
< codecvt
->__codecvt_do_max_length (codecvt
))
398 result
= __codecvt_partial
;
400 result
= __codecvt_error
;
402 /* Decide what to do. */
403 result
= __codecvt_error
;
412 do_encoding (struct _IO_codecvt
*codecvt
)
415 /* See whether the encoding is stateful. */
416 if (codecvt
->__cd_in
.__cd
.__steps
[0].__stateful
)
418 /* Fortunately not. Now determine the input bytes for the conversion
419 necessary for each wide character. */
420 if (codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
421 != codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
)
422 /* Not a constant value. */
425 return codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
;
427 /* Worst case scenario. */
434 do_always_noconv (struct _IO_codecvt
*codecvt
)
441 do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
442 const char *from_start
, const char *from_end
, _IO_size_t max
)
446 const unsigned char *cp
= (const unsigned char *) from_start
;
448 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
452 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (char *) to_buf
;
453 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (char *) &to_buf
[max
];
454 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
456 status
= DL_CALL_FCT (gs
->__fct
,
457 (gs
, codecvt
->__cd_in
.__cd
.__data
, &cp
, from_end
,
458 NULL
, &dummy
, 0, 0));
460 result
= cp
- (const unsigned char *) from_start
;
462 # ifdef _GLIBCPP_USE_WCHAR_T
463 const char *from_start_copy
= (const char *) from_start
;
464 size_t from_len
= from_end
- from_start
;
467 char *to_start
= (char *) to_buf
;
469 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
472 result
= from_start_copy
- (char *) from_start
;
474 /* Decide what to do. */
484 do_max_length (struct _IO_codecvt
*codecvt
)
487 return codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
;