1 // wstring_convert implementation -*- C++ -*-
3 // Copyright (C) 2015-2024 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file bits/locale_conv.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{locale}
30 #ifndef _LOCALE_CONV_H
31 #define _LOCALE_CONV_H 1
33 #if __cplusplus < 201103L
34 # include <bits/c++0x_warning.h>
38 #include <bits/stringfwd.h>
39 #include <bits/allocator.h>
40 #include <bits/codecvt.h>
42 namespace std
_GLIBCXX_VISIBILITY(default)
44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
51 template<typename _OutStr
, typename _InChar
, typename _Codecvt
,
52 typename _State
, typename _Fn
>
54 __do_str_codecvt(const _InChar
* __first
, const _InChar
* __last
,
55 _OutStr
& __outstr
, const _Codecvt
& __cvt
, _State
& __state
,
56 size_t& __count
, _Fn __fn
)
58 if (__first
== __last
)
65 size_t __outchars
= 0;
66 auto __next
= __first
;
67 const auto __maxlen
= __cvt
.max_length() + 1;
69 codecvt_base::result __result
;
72 __outstr
.resize(__outstr
.size() + (__last
- __next
) * __maxlen
);
73 auto __outnext
= &__outstr
.front() + __outchars
;
74 auto const __outlast
= &__outstr
.back() + 1;
75 __result
= (__cvt
.*__fn
)(__state
, __next
, __last
, __next
,
76 __outnext
, __outlast
, __outnext
);
77 __outchars
= __outnext
- &__outstr
.front();
79 while (__result
== codecvt_base::partial
&& __next
!= __last
80 && ptrdiff_t(__outstr
.size() - __outchars
) < __maxlen
);
82 if (__result
== codecvt_base::error
)
84 __count
= __next
- __first
;
88 // The codecvt facet will only return noconv when the types are
89 // the same, so avoid instantiating basic_string::assign otherwise
90 if _GLIBCXX17_CONSTEXPR (is_same
<typename
_Codecvt::intern_type
,
91 typename
_Codecvt::extern_type
>())
92 if (__result
== codecvt_base::noconv
)
94 __outstr
.assign(__first
, __last
);
95 __count
= __last
- __first
;
99 __outstr
.resize(__outchars
);
100 __count
= __next
- __first
;
104 // Convert narrow character string to wide.
105 template<typename _CharT
, typename _Traits
, typename _Alloc
, typename _State
>
107 __str_codecvt_in(const char* __first
, const char* __last
,
108 basic_string
<_CharT
, _Traits
, _Alloc
>& __outstr
,
109 const codecvt
<_CharT
, char, _State
>& __cvt
,
110 _State
& __state
, size_t& __count
)
112 using _Codecvt
= codecvt
<_CharT
, char, _State
>;
114 = codecvt_base::result
115 (_Codecvt::*)(_State
&, const char*, const char*, const char*&,
116 _CharT
*, _CharT
*, _CharT
*&) const;
117 _ConvFn __fn
= &codecvt
<_CharT
, char, _State
>::in
;
118 return __do_str_codecvt(__first
, __last
, __outstr
, __cvt
, __state
,
122 // As above, but with no __count parameter
123 template<typename _CharT
, typename _Traits
, typename _Alloc
, typename _State
>
125 __str_codecvt_in(const char* __first
, const char* __last
,
126 basic_string
<_CharT
, _Traits
, _Alloc
>& __outstr
,
127 const codecvt
<_CharT
, char, _State
>& __cvt
)
131 return __str_codecvt_in(__first
, __last
, __outstr
, __cvt
, __state
, __n
);
134 // As above, but returns false for partial conversion
135 template<typename _CharT
, typename _Traits
, typename _Alloc
, typename _State
>
137 __str_codecvt_in_all(const char* __first
, const char* __last
,
138 basic_string
<_CharT
, _Traits
, _Alloc
>& __outstr
,
139 const codecvt
<_CharT
, char, _State
>& __cvt
)
143 return __str_codecvt_in(__first
, __last
, __outstr
, __cvt
, __state
, __n
)
144 && (__n
== size_t(__last
- __first
));
147 // Convert wide character string to narrow.
148 template<typename _CharT
, typename _Traits
, typename _Alloc
, typename _State
>
150 __str_codecvt_out(const _CharT
* __first
, const _CharT
* __last
,
151 basic_string
<char, _Traits
, _Alloc
>& __outstr
,
152 const codecvt
<_CharT
, char, _State
>& __cvt
,
153 _State
& __state
, size_t& __count
)
155 using _Codecvt
= codecvt
<_CharT
, char, _State
>;
157 = codecvt_base::result
158 (_Codecvt::*)(_State
&, const _CharT
*, const _CharT
*, const _CharT
*&,
159 char*, char*, char*&) const;
160 _ConvFn __fn
= &codecvt
<_CharT
, char, _State
>::out
;
161 return __do_str_codecvt(__first
, __last
, __outstr
, __cvt
, __state
,
165 // As above, but with no __count parameter
166 template<typename _CharT
, typename _Traits
, typename _Alloc
, typename _State
>
168 __str_codecvt_out(const _CharT
* __first
, const _CharT
* __last
,
169 basic_string
<char, _Traits
, _Alloc
>& __outstr
,
170 const codecvt
<_CharT
, char, _State
>& __cvt
)
174 return __str_codecvt_out(__first
, __last
, __outstr
, __cvt
, __state
, __n
);
177 // As above, but returns false for partial conversions
178 template<typename _CharT
, typename _Traits
, typename _Alloc
, typename _State
>
180 __str_codecvt_out_all(const _CharT
* __first
, const _CharT
* __last
,
181 basic_string
<char, _Traits
, _Alloc
>& __outstr
,
182 const codecvt
<_CharT
, char, _State
>& __cvt
)
186 return __str_codecvt_out(__first
, __last
, __outstr
, __cvt
, __state
, __n
)
187 && (__n
== size_t(__last
- __first
));
190 #ifdef _GLIBCXX_USE_CHAR8_T
192 // Convert wide character string to narrow.
193 template<typename _CharT
, typename _Traits
, typename _Alloc
, typename _State
>
195 __str_codecvt_out(const _CharT
* __first
, const _CharT
* __last
,
196 basic_string
<char8_t
, _Traits
, _Alloc
>& __outstr
,
197 const codecvt
<_CharT
, char8_t
, _State
>& __cvt
,
198 _State
& __state
, size_t& __count
)
200 using _Codecvt
= codecvt
<_CharT
, char8_t
, _State
>;
202 = codecvt_base::result
203 (_Codecvt::*)(_State
&, const _CharT
*, const _CharT
*, const _CharT
*&,
204 char8_t
*, char8_t
*, char8_t
*&) const;
205 _ConvFn __fn
= &codecvt
<_CharT
, char8_t
, _State
>::out
;
206 return __do_str_codecvt(__first
, __last
, __outstr
, __cvt
, __state
,
210 template<typename _CharT
, typename _Traits
, typename _Alloc
, typename _State
>
212 __str_codecvt_out(const _CharT
* __first
, const _CharT
* __last
,
213 basic_string
<char8_t
, _Traits
, _Alloc
>& __outstr
,
214 const codecvt
<_CharT
, char8_t
, _State
>& __cvt
)
218 return __str_codecvt_out(__first
, __last
, __outstr
, __cvt
, __state
, __n
);
221 #endif // _GLIBCXX_USE_CHAR8_T
225 template<typename _Tp
>
228 __attribute__((__nonnull__(2)))
230 _Scoped_ptr(_Tp
* __ptr
) noexcept
234 _Scoped_ptr(_Tp
* __ptr
, const char* __msg
)
238 __throw_logic_error(__msg
);
241 ~_Scoped_ptr() { delete _M_ptr
; }
243 _Scoped_ptr(const _Scoped_ptr
&) = delete;
244 _Scoped_ptr
& operator=(const _Scoped_ptr
&) = delete;
246 __attribute__((__returns_nonnull__
))
247 _Tp
* operator->() const noexcept
{ return _M_ptr
; }
249 _Tp
& operator*() const noexcept
{ return *_M_ptr
; }
256 _GLIBCXX_BEGIN_NAMESPACE_CXX11
258 /// String conversions
259 template<typename _Codecvt
, typename _Elem
= wchar_t,
260 typename _Wide_alloc
= allocator
<_Elem
>,
261 typename _Byte_alloc
= allocator
<char>>
262 class _GLIBCXX17_DEPRECATED wstring_convert
265 typedef basic_string
<char, char_traits
<char>, _Byte_alloc
> byte_string
;
266 typedef basic_string
<_Elem
, char_traits
<_Elem
>, _Wide_alloc
> wide_string
;
267 typedef typename
_Codecvt::state_type state_type
;
268 typedef typename
wide_string::traits_type::int_type int_type
;
270 /// Default constructor.
271 wstring_convert() : _M_cvt(new _Codecvt()) { }
275 * @param __pcvt The facet to use for conversions.
277 * Takes ownership of @p __pcvt and will delete it in the destructor.
280 wstring_convert(_Codecvt
* __pcvt
) : _M_cvt(__pcvt
, "wstring_convert")
283 /** Construct with an initial converstion state.
285 * @param __pcvt The facet to use for conversions.
286 * @param __state Initial conversion state.
288 * Takes ownership of @p __pcvt and will delete it in the destructor.
289 * The object's conversion state will persist between conversions.
291 wstring_convert(_Codecvt
* __pcvt
, state_type __state
)
292 : _M_cvt(__pcvt
, "wstring_convert"),
293 _M_state(__state
), _M_with_cvtstate(true)
296 /** Construct with error strings.
298 * @param __byte_err A string to return on failed conversions.
299 * @param __wide_err A wide string to return on failed conversions.
302 wstring_convert(const byte_string
& __byte_err
,
303 const wide_string
& __wide_err
= wide_string())
304 : _M_cvt(new _Codecvt
),
305 _M_byte_err_string(__byte_err
), _M_wide_err_string(__wide_err
),
306 _M_with_strings(true)
309 ~wstring_convert() = default;
311 // _GLIBCXX_RESOLVE_LIB_DEFECTS
312 // 2176. Special members for wstring_convert and wbuffer_convert
313 wstring_convert(const wstring_convert
&) = delete;
314 wstring_convert
& operator=(const wstring_convert
&) = delete;
316 /// @{ Convert from bytes.
318 from_bytes(char __byte
)
320 char __bytes
[2] = { __byte
};
321 return from_bytes(__bytes
, __bytes
+1);
325 from_bytes(const char* __ptr
)
326 { return from_bytes(__ptr
, __ptr
+char_traits
<char>::length(__ptr
)); }
329 from_bytes(const byte_string
& __str
)
331 auto __ptr
= __str
.data();
332 return from_bytes(__ptr
, __ptr
+ __str
.size());
336 from_bytes(const char* __first
, const char* __last
)
338 if (!_M_with_cvtstate
)
339 _M_state
= state_type();
340 wide_string __out
{ _M_wide_err_string
.get_allocator() };
341 if (__str_codecvt_in(__first
, __last
, __out
, *_M_cvt
, _M_state
,
345 return _M_wide_err_string
;
346 __throw_range_error("wstring_convert::from_bytes");
350 /// @{ Convert to bytes.
352 to_bytes(_Elem __wchar
)
354 _Elem __wchars
[2] = { __wchar
};
355 return to_bytes(__wchars
, __wchars
+1);
359 to_bytes(const _Elem
* __ptr
)
361 return to_bytes(__ptr
, __ptr
+wide_string::traits_type::length(__ptr
));
365 to_bytes(const wide_string
& __wstr
)
367 auto __ptr
= __wstr
.data();
368 return to_bytes(__ptr
, __ptr
+ __wstr
.size());
372 to_bytes(const _Elem
* __first
, const _Elem
* __last
)
374 if (!_M_with_cvtstate
)
375 _M_state
= state_type();
376 byte_string __out
{ _M_byte_err_string
.get_allocator() };
377 if (__str_codecvt_out(__first
, __last
, __out
, *_M_cvt
, _M_state
,
381 return _M_byte_err_string
;
382 __throw_range_error("wstring_convert::to_bytes");
386 // _GLIBCXX_RESOLVE_LIB_DEFECTS
387 // 2174. wstring_convert::converted() should be noexcept
388 /// The number of elements successfully converted in the last conversion.
389 size_t converted() const noexcept
{ return _M_count
; }
391 /// The final conversion state of the last conversion.
392 state_type
state() const { return _M_state
; }
395 __detail::_Scoped_ptr
<_Codecvt
> _M_cvt
;
396 byte_string _M_byte_err_string
;
397 wide_string _M_wide_err_string
;
398 state_type _M_state
= state_type();
400 bool _M_with_cvtstate
= false;
401 bool _M_with_strings
= false;
404 _GLIBCXX_END_NAMESPACE_CXX11
406 /// Buffer conversions
407 template<typename _Codecvt
, typename _Elem
= wchar_t,
408 typename _Tr
= char_traits
<_Elem
>>
409 class _GLIBCXX17_DEPRECATED wbuffer_convert
410 : public basic_streambuf
<_Elem
, _Tr
>
412 typedef basic_streambuf
<_Elem
, _Tr
> _Wide_streambuf
;
415 typedef typename
_Codecvt::state_type state_type
;
417 /// Default constructor.
418 wbuffer_convert() : wbuffer_convert(nullptr) { }
422 * @param __bytebuf The underlying byte stream buffer.
423 * @param __pcvt The facet to use for conversions.
424 * @param __state Initial conversion state.
426 * Takes ownership of @p __pcvt and will delete it in the destructor.
429 wbuffer_convert(streambuf
* __bytebuf
, _Codecvt
* __pcvt
= new _Codecvt
,
430 state_type __state
= state_type())
431 : _M_buf(__bytebuf
), _M_cvt(__pcvt
, "wbuffer_convert"),
432 _M_state(__state
), _M_always_noconv(_M_cvt
->always_noconv())
436 this->setp(_M_put_area
, _M_put_area
+ _S_buffer_length
);
437 this->setg(_M_get_area
+ _S_putback_length
,
438 _M_get_area
+ _S_putback_length
,
439 _M_get_area
+ _S_putback_length
);
443 ~wbuffer_convert() = default;
445 // _GLIBCXX_RESOLVE_LIB_DEFECTS
446 // 2176. Special members for wstring_convert and wbuffer_convert
447 wbuffer_convert(const wbuffer_convert
&) = delete;
448 wbuffer_convert
& operator=(const wbuffer_convert
&) = delete;
450 streambuf
* rdbuf() const noexcept
{ return _M_buf
; }
453 rdbuf(streambuf
*__bytebuf
) noexcept
455 auto __prev
= _M_buf
;
460 /// The conversion state following the last conversion.
461 state_type
state() const noexcept
{ return _M_state
; }
466 { return _M_buf
&& _M_conv_put() && !_M_buf
->pubsync() ? 0 : -1; }
468 typename
_Wide_streambuf::int_type
469 overflow(typename
_Wide_streambuf::int_type __out
)
471 if (!_M_buf
|| !_M_conv_put())
473 else if (!_Tr::eq_int_type(__out
, _Tr::eof()))
474 return this->sputc(__out
);
475 return _Tr::not_eof(__out
);
478 typename
_Wide_streambuf::int_type
484 if (this->gptr() < this->egptr() || (_M_buf
&& _M_conv_get()))
485 return _Tr::to_int_type(*this->gptr());
491 xsputn(const typename
_Wide_streambuf::char_type
* __s
, streamsize __n
)
493 if (!_M_buf
|| __n
== 0)
495 streamsize __done
= 0;
498 auto __nn
= std::min
<streamsize
>(this->epptr() - this->pptr(),
500 _Tr::copy(this->pptr(), __s
+ __done
, __nn
);
503 } while (__done
< __n
&& _M_conv_put());
508 // fill the get area from converted contents of the byte stream buffer
512 const streamsize __pb1
= this->gptr() - this->eback();
513 const streamsize __pb2
= _S_putback_length
;
514 const streamsize __npb
= std::min(__pb1
, __pb2
);
516 _Tr::move(_M_get_area
+ _S_putback_length
- __npb
,
517 this->gptr() - __npb
, __npb
);
519 streamsize __nbytes
= sizeof(_M_get_buf
) - _M_unconv
;
520 __nbytes
= std::min(__nbytes
, _M_buf
->in_avail());
523 __nbytes
= _M_buf
->sgetn(_M_get_buf
+ _M_unconv
, __nbytes
);
526 __nbytes
+= _M_unconv
;
528 // convert _M_get_buf into _M_get_area
530 _Elem
* __outbuf
= _M_get_area
+ _S_putback_length
;
531 _Elem
* __outnext
= __outbuf
;
532 const char* __bnext
= _M_get_buf
;
534 codecvt_base::result __result
;
535 if (_M_always_noconv
)
536 __result
= codecvt_base::noconv
;
539 _Elem
* __outend
= _M_get_area
+ _S_buffer_length
;
541 __result
= _M_cvt
->in(_M_state
,
542 __bnext
, __bnext
+ __nbytes
, __bnext
,
543 __outbuf
, __outend
, __outnext
);
546 if (__result
== codecvt_base::noconv
)
548 // cast is safe because noconv means _Elem is same type as char
549 auto __get_buf
= reinterpret_cast<const _Elem
*>(_M_get_buf
);
550 _Tr::copy(__outbuf
, __get_buf
, __nbytes
);
555 if ((_M_unconv
= _M_get_buf
+ __nbytes
- __bnext
))
556 char_traits
<char>::move(_M_get_buf
, __bnext
, _M_unconv
);
558 this->setg(__outbuf
, __outbuf
, __outnext
);
560 return __result
!= codecvt_base::error
;
569 _M_put(const char* __p
, streamsize __n
)
571 if (_M_buf
->sputn(__p
, __n
) < __n
)
576 // convert the put area and write to the byte stream buffer
580 _Elem
* const __first
= this->pbase();
581 const _Elem
* const __last
= this->pptr();
582 const streamsize __pending
= __last
- __first
;
584 if (_M_always_noconv
)
585 return _M_put(__first
, __pending
);
587 char __outbuf
[2 * _S_buffer_length
];
589 const _Elem
* __next
= __first
;
590 const _Elem
* __start
;
594 char* __outnext
= __outbuf
;
595 char* const __outlast
= __outbuf
+ sizeof(__outbuf
);
596 auto __result
= _M_cvt
->out(_M_state
, __next
, __last
, __next
,
597 __outnext
, __outlast
, __outnext
);
598 if (__result
== codecvt_base::error
)
600 else if (__result
== codecvt_base::noconv
)
601 return _M_put(__next
, __pending
);
603 if (!_M_put(__outbuf
, __outnext
- __outbuf
))
606 while (__next
!= __last
&& __next
!= __start
);
608 if (__next
!= __last
)
609 _Tr::move(__first
, __next
, __last
- __next
);
611 this->pbump(__first
- __next
);
612 return __next
!= __first
;
616 __detail::_Scoped_ptr
<_Codecvt
> _M_cvt
;
619 static const streamsize _S_buffer_length
= 32;
620 static const streamsize _S_putback_length
= 3;
621 _Elem _M_put_area
[_S_buffer_length
];
622 _Elem _M_get_area
[_S_buffer_length
];
623 streamsize _M_unconv
= 0;
624 char _M_get_buf
[_S_buffer_length
-_S_putback_length
];
625 bool _M_always_noconv
;
630 _GLIBCXX_END_NAMESPACE_VERSION
633 #endif // __cplusplus
635 #endif /* _LOCALE_CONV_H */