1 // std::codecvt implementation details, GNU version -*- C++ -*-
3 // Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010
4 // Free Software Foundation, Inc.
6 // This file is part of the GNU ISO C++ Library. This library is free
7 // software; you can redistribute it and/or modify it under the
8 // terms of the GNU General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option)
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
17 // Under Section 7 of GPL version 3, you are granted additional
18 // permissions described in the GCC Runtime Library Exception, version
19 // 3.1, as published by the Free Software Foundation.
21 // You should have received a copy of the GNU General Public License and
22 // a copy of the GCC Runtime Library Exception along with this program;
23 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 // <http://www.gnu.org/licenses/>.
27 // ISO C++ 14882: 22.2.1.5 - Template class codecvt
30 // Written by Benjamin Kosnik <bkoz@redhat.com>
33 #include <cstdlib> // For MB_CUR_MAX
34 #include <climits> // For MB_LEN_MAX
35 #include <bits/c++locale_internal.h>
37 _GLIBCXX_BEGIN_NAMESPACE(std
)
40 #ifdef _GLIBCXX_USE_WCHAR_T
42 codecvt
<wchar_t, char, mbstate_t>::
43 do_out(state_type
& __state
, const intern_type
* __from
,
44 const intern_type
* __from_end
, const intern_type
*& __from_next
,
45 extern_type
* __to
, extern_type
* __to_end
,
46 extern_type
*& __to_next
) const
49 state_type
__tmp_state(__state
);
51 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
52 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
55 // wcsnrtombs is *very* fast but stops if encounters NUL characters:
56 // in case we fall back to wcrtomb and then continue, in a loop.
57 // NB: wcsnrtombs is a GNU extension
58 for (__from_next
= __from
, __to_next
= __to
;
59 __from_next
< __from_end
&& __to_next
< __to_end
62 const intern_type
* __from_chunk_end
= wmemchr(__from_next
, L
'\0',
63 __from_end
- __from_next
);
64 if (!__from_chunk_end
)
65 __from_chunk_end
= __from_end
;
68 const size_t __conv
= wcsnrtombs(__to_next
, &__from_next
,
69 __from_chunk_end
- __from_next
,
70 __to_end
- __to_next
, &__state
);
71 if (__conv
== static_cast<size_t>(-1))
73 // In case of error, in order to stop at the exact place we
74 // have to start again from the beginning with a series of
76 for (; __from
< __from_next
; ++__from
)
77 __to_next
+= wcrtomb(__to_next
, *__from
, &__tmp_state
);
78 __state
= __tmp_state
;
81 else if (__from_next
&& __from_next
< __from_chunk_end
)
88 __from_next
= __from_chunk_end
;
92 if (__from_next
< __from_end
&& __ret
== ok
)
94 extern_type __buf
[MB_LEN_MAX
];
95 __tmp_state
= __state
;
96 const size_t __conv2
= wcrtomb(__buf
, *__from_next
, &__tmp_state
);
97 if (__conv2
> static_cast<size_t>(__to_end
- __to_next
))
101 memcpy(__to_next
, __buf
, __conv2
);
102 __state
= __tmp_state
;
103 __to_next
+= __conv2
;
109 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
117 codecvt
<wchar_t, char, mbstate_t>::
118 do_in(state_type
& __state
, const extern_type
* __from
,
119 const extern_type
* __from_end
, const extern_type
*& __from_next
,
120 intern_type
* __to
, intern_type
* __to_end
,
121 intern_type
*& __to_next
) const
124 state_type
__tmp_state(__state
);
126 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
127 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
130 // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
131 // in case we store a L'\0' and then continue, in a loop.
132 // NB: mbsnrtowcs is a GNU extension
133 for (__from_next
= __from
, __to_next
= __to
;
134 __from_next
< __from_end
&& __to_next
< __to_end
137 const extern_type
* __from_chunk_end
;
138 __from_chunk_end
= static_cast<const extern_type
*>(memchr(__from_next
, '\0',
141 if (!__from_chunk_end
)
142 __from_chunk_end
= __from_end
;
144 __from
= __from_next
;
145 size_t __conv
= mbsnrtowcs(__to_next
, &__from_next
,
146 __from_chunk_end
- __from_next
,
147 __to_end
- __to_next
, &__state
);
148 if (__conv
== static_cast<size_t>(-1))
150 // In case of error, in order to stop at the exact place we
151 // have to start again from the beginning with a series of
153 for (;; ++__to_next
, __from
+= __conv
)
155 __conv
= mbrtowc(__to_next
, __from
, __from_end
- __from
,
157 if (__conv
== static_cast<size_t>(-1)
158 || __conv
== static_cast<size_t>(-2))
161 __from_next
= __from
;
162 __state
= __tmp_state
;
165 else if (__from_next
&& __from_next
< __from_chunk_end
)
167 // It is unclear what to return in this case (see DR 382).
173 __from_next
= __from_chunk_end
;
177 if (__from_next
< __from_end
&& __ret
== ok
)
179 if (__to_next
< __to_end
)
181 // XXX Probably wrong for stateful encodings
182 __tmp_state
= __state
;
184 *__to_next
++ = L
'\0';
191 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
199 codecvt
<wchar_t, char, mbstate_t>::
200 do_encoding() const throw()
202 // XXX This implementation assumes that the encoding is
203 // stateless and is either single-byte or variable-width.
205 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
206 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
210 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
217 codecvt
<wchar_t, char, mbstate_t>::
218 do_max_length() const throw()
220 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
221 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
223 // XXX Probably wrong for stateful encodings.
224 int __ret
= MB_CUR_MAX
;
225 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
232 codecvt
<wchar_t, char, mbstate_t>::
233 do_length(state_type
& __state
, const extern_type
* __from
,
234 const extern_type
* __end
, size_t __max
) const
237 state_type
__tmp_state(__state
);
239 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
240 __c_locale __old
= __uselocale(_M_c_locale_codecvt
);
243 // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
244 // in case we advance past it and then continue, in a loop.
245 // NB: mbsnrtowcs is a GNU extension
247 // A dummy internal buffer is needed in order for mbsnrtocws to consider
248 // its fourth parameter (it wouldn't with NULL as first parameter).
249 wchar_t* __to
= static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
251 while (__from
< __end
&& __max
)
253 const extern_type
* __from_chunk_end
;
254 __from_chunk_end
= static_cast<const extern_type
*>(memchr(__from
, '\0',
257 if (!__from_chunk_end
)
258 __from_chunk_end
= __end
;
260 const extern_type
* __tmp_from
= __from
;
261 size_t __conv
= mbsnrtowcs(__to
, &__from
,
262 __from_chunk_end
- __from
,
264 if (__conv
== static_cast<size_t>(-1))
266 // In case of error, in order to stop at the exact place we
267 // have to start again from the beginning with a series of
269 for (__from
= __tmp_from
;; __from
+= __conv
)
271 __conv
= mbrtowc(0, __from
, __end
- __from
,
273 if (__conv
== static_cast<size_t>(-1)
274 || __conv
== static_cast<size_t>(-2))
277 __state
= __tmp_state
;
278 __ret
+= __from
- __tmp_from
;
282 __from
= __from_chunk_end
;
284 __ret
+= __from
- __tmp_from
;
287 if (__from
< __end
&& __max
)
289 // XXX Probably wrong for stateful encodings
290 __tmp_state
= __state
;
297 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
305 _GLIBCXX_END_NAMESPACE