1 // Locale support (codecvt) -*- C++ -*-
3 // Copyright (C) 2000-2024 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file bits/codecvt.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{locale}
31 // ISO C++ 14882: 22.2.1.5 Template class codecvt
34 // Written by Benjamin Kosnik <bkoz@redhat.com>
39 #pragma GCC system_header
41 #include <bits/c++config.h>
42 #include <bits/locale_classes.h> // locale::facet
44 namespace std
_GLIBCXX_VISIBILITY(default)
46 _GLIBCXX_BEGIN_NAMESPACE_VERSION
48 /// Empty base class for codecvt facet [22.2.1.5].
62 * @brief Common base for codecvt functions.
64 * This template class provides implementations of the public functions
65 * that forward to the protected virtual functions.
67 * This template also provides abstract stubs for the protected virtual
70 template<typename _InternT
, typename _ExternT
, typename _StateT
>
71 class __codecvt_abstract_base
72 : public locale::facet
, public codecvt_base
76 typedef codecvt_base::result result
;
77 typedef _InternT intern_type
;
78 typedef _ExternT extern_type
;
79 typedef _StateT state_type
;
81 // 22.2.1.5.1 codecvt members
83 * @brief Convert from internal to external character set.
85 * Converts input string of intern_type to output string of
86 * extern_type. This is analogous to wcsrtombs. It does this by
87 * calling codecvt::do_out.
89 * The source and destination character sets are determined by the
90 * facet's locale, internal and external types.
92 * The characters in [from,from_end) are converted and written to
93 * [to,to_end). from_next and to_next are set to point to the
94 * character following the last successfully converted character,
95 * respectively. If the result needed no conversion, from_next and
96 * to_next are not affected.
98 * The @a state argument should be initialized if the input is at the
99 * beginning and carried from a previous call if continuing
100 * conversion. There are no guarantees about how @a state is used.
102 * The result returned is a member of codecvt_base::result. If
103 * all the input is converted, returns codecvt_base::ok. If no
104 * conversion is necessary, returns codecvt_base::noconv. If
105 * the input ends early or there is insufficient space in the
106 * output, returns codecvt_base::partial. Otherwise the
107 * conversion failed and codecvt_base::error is returned.
109 * @param __state Persistent conversion state data.
110 * @param __from Start of input.
111 * @param __from_end End of input.
112 * @param __from_next Returns start of unconverted data.
113 * @param __to Start of output buffer.
114 * @param __to_end End of output buffer.
115 * @param __to_next Returns start of unused output area.
116 * @return codecvt_base::result.
119 out(state_type
& __state
, const intern_type
* __from
,
120 const intern_type
* __from_end
, const intern_type
*& __from_next
,
121 extern_type
* __to
, extern_type
* __to_end
,
122 extern_type
*& __to_next
) const
124 return this->do_out(__state
, __from
, __from_end
, __from_next
,
125 __to
, __to_end
, __to_next
);
129 * @brief Reset conversion state.
131 * Writes characters to output that would restore @a state to initial
132 * conditions. The idea is that if a partial conversion occurs, then
133 * the converting the characters written by this function would leave
134 * the state in initial conditions, rather than partial conversion
135 * state. It does this by calling codecvt::do_unshift().
137 * For example, if 4 external characters always converted to 1 internal
138 * character, and input to in() had 6 external characters with state
139 * saved, this function would write two characters to the output and
140 * set the state to initialized conditions.
142 * The source and destination character sets are determined by the
143 * facet's locale, internal and external types.
145 * The result returned is a member of codecvt_base::result. If the
146 * state could be reset and data written, returns codecvt_base::ok. If
147 * no conversion is necessary, returns codecvt_base::noconv. If the
148 * output has insufficient space, returns codecvt_base::partial.
149 * Otherwise the reset failed and codecvt_base::error is returned.
151 * @param __state Persistent conversion state data.
152 * @param __to Start of output buffer.
153 * @param __to_end End of output buffer.
154 * @param __to_next Returns start of unused output area.
155 * @return codecvt_base::result.
158 unshift(state_type
& __state
, extern_type
* __to
, extern_type
* __to_end
,
159 extern_type
*& __to_next
) const
160 { return this->do_unshift(__state
, __to
,__to_end
,__to_next
); }
163 * @brief Convert from external to internal character set.
165 * Converts input string of extern_type to output string of
166 * intern_type. This is analogous to mbsrtowcs. It does this by
167 * calling codecvt::do_in.
169 * The source and destination character sets are determined by the
170 * facet's locale, internal and external types.
172 * The characters in [from,from_end) are converted and written to
173 * [to,to_end). from_next and to_next are set to point to the
174 * character following the last successfully converted character,
175 * respectively. If the result needed no conversion, from_next and
176 * to_next are not affected.
178 * The @a state argument should be initialized if the input is at the
179 * beginning and carried from a previous call if continuing
180 * conversion. There are no guarantees about how @a state is used.
182 * The result returned is a member of codecvt_base::result. If
183 * all the input is converted, returns codecvt_base::ok. If no
184 * conversion is necessary, returns codecvt_base::noconv. If
185 * the input ends early or there is insufficient space in the
186 * output, returns codecvt_base::partial. Otherwise the
187 * conversion failed and codecvt_base::error is returned.
189 * @param __state Persistent conversion state data.
190 * @param __from Start of input.
191 * @param __from_end End of input.
192 * @param __from_next Returns start of unconverted data.
193 * @param __to Start of output buffer.
194 * @param __to_end End of output buffer.
195 * @param __to_next Returns start of unused output area.
196 * @return codecvt_base::result.
199 in(state_type
& __state
, const extern_type
* __from
,
200 const extern_type
* __from_end
, const extern_type
*& __from_next
,
201 intern_type
* __to
, intern_type
* __to_end
,
202 intern_type
*& __to_next
) const
204 return this->do_in(__state
, __from
, __from_end
, __from_next
,
205 __to
, __to_end
, __to_next
);
209 encoding() const throw()
210 { return this->do_encoding(); }
213 always_noconv() const throw()
214 { return this->do_always_noconv(); }
217 length(state_type
& __state
, const extern_type
* __from
,
218 const extern_type
* __end
, size_t __max
) const
219 { return this->do_length(__state
, __from
, __end
, __max
); }
222 max_length() const throw()
223 { return this->do_max_length(); }
227 __codecvt_abstract_base(size_t __refs
= 0) : locale::facet(__refs
) { }
230 ~__codecvt_abstract_base() { }
233 * @brief Convert from internal to external character set.
235 * Converts input string of intern_type to output string of
236 * extern_type. This function is a hook for derived classes to change
237 * the value returned. @see out for more information.
240 do_out(state_type
& __state
, const intern_type
* __from
,
241 const intern_type
* __from_end
, const intern_type
*& __from_next
,
242 extern_type
* __to
, extern_type
* __to_end
,
243 extern_type
*& __to_next
) const = 0;
246 do_unshift(state_type
& __state
, extern_type
* __to
,
247 extern_type
* __to_end
, extern_type
*& __to_next
) const = 0;
250 do_in(state_type
& __state
, const extern_type
* __from
,
251 const extern_type
* __from_end
, const extern_type
*& __from_next
,
252 intern_type
* __to
, intern_type
* __to_end
,
253 intern_type
*& __to_next
) const = 0;
256 do_encoding() const throw() = 0;
259 do_always_noconv() const throw() = 0;
262 do_length(state_type
&, const extern_type
* __from
,
263 const extern_type
* __end
, size_t __max
) const = 0;
266 do_max_length() const throw() = 0;
270 * @brief Primary class template codecvt.
273 * NB: Generic, mostly useless implementation.
276 template<typename _InternT
, typename _ExternT
, typename _StateT
>
278 : public __codecvt_abstract_base
<_InternT
, _ExternT
, _StateT
>
282 typedef codecvt_base::result result
;
283 typedef _InternT intern_type
;
284 typedef _ExternT extern_type
;
285 typedef _StateT state_type
;
288 __c_locale _M_c_locale_codecvt
;
291 static locale::id id
;
294 codecvt(size_t __refs
= 0)
295 : __codecvt_abstract_base
<_InternT
, _ExternT
, _StateT
> (__refs
),
296 _M_c_locale_codecvt(0)
300 codecvt(__c_locale __cloc
, size_t __refs
= 0);
307 do_out(state_type
& __state
, const intern_type
* __from
,
308 const intern_type
* __from_end
, const intern_type
*& __from_next
,
309 extern_type
* __to
, extern_type
* __to_end
,
310 extern_type
*& __to_next
) const;
313 do_unshift(state_type
& __state
, extern_type
* __to
,
314 extern_type
* __to_end
, extern_type
*& __to_next
) const;
317 do_in(state_type
& __state
, const extern_type
* __from
,
318 const extern_type
* __from_end
, const extern_type
*& __from_next
,
319 intern_type
* __to
, intern_type
* __to_end
,
320 intern_type
*& __to_next
) const;
323 do_encoding() const throw();
326 do_always_noconv() const throw();
329 do_length(state_type
&, const extern_type
* __from
,
330 const extern_type
* __end
, size_t __max
) const;
333 do_max_length() const throw();
336 template<typename _InternT
, typename _ExternT
, typename _StateT
>
337 locale::id codecvt
<_InternT
, _ExternT
, _StateT
>::id
;
339 /// class codecvt<char, char, mbstate_t> specialization.
341 class codecvt
<char, char, mbstate_t>
342 : public __codecvt_abstract_base
<char, char, mbstate_t>
344 friend class messages
<char>;
348 typedef char intern_type
;
349 typedef char extern_type
;
350 typedef mbstate_t state_type
;
353 __c_locale _M_c_locale_codecvt
;
356 static locale::id id
;
359 codecvt(size_t __refs
= 0);
362 codecvt(__c_locale __cloc
, size_t __refs
= 0);
369 do_out(state_type
& __state
, const intern_type
* __from
,
370 const intern_type
* __from_end
, const intern_type
*& __from_next
,
371 extern_type
* __to
, extern_type
* __to_end
,
372 extern_type
*& __to_next
) const;
375 do_unshift(state_type
& __state
, extern_type
* __to
,
376 extern_type
* __to_end
, extern_type
*& __to_next
) const;
379 do_in(state_type
& __state
, const extern_type
* __from
,
380 const extern_type
* __from_end
, const extern_type
*& __from_next
,
381 intern_type
* __to
, intern_type
* __to_end
,
382 intern_type
*& __to_next
) const;
385 do_encoding() const throw();
388 do_always_noconv() const throw();
391 do_length(state_type
&, const extern_type
* __from
,
392 const extern_type
* __end
, size_t __max
) const;
395 do_max_length() const throw();
398 #ifdef _GLIBCXX_USE_WCHAR_T
399 /** @brief Class codecvt<wchar_t, char, mbstate_t> specialization.
401 * Converts between narrow and wide characters in the native character set
404 class codecvt
<wchar_t, char, mbstate_t>
405 : public __codecvt_abstract_base
<wchar_t, char, mbstate_t>
407 friend class messages
<wchar_t>;
411 typedef wchar_t intern_type
;
412 typedef char extern_type
;
413 typedef mbstate_t state_type
;
416 __c_locale _M_c_locale_codecvt
;
419 static locale::id id
;
422 codecvt(size_t __refs
= 0);
425 codecvt(__c_locale __cloc
, size_t __refs
= 0);
432 do_out(state_type
& __state
, const intern_type
* __from
,
433 const intern_type
* __from_end
, const intern_type
*& __from_next
,
434 extern_type
* __to
, extern_type
* __to_end
,
435 extern_type
*& __to_next
) const;
438 do_unshift(state_type
& __state
,
439 extern_type
* __to
, extern_type
* __to_end
,
440 extern_type
*& __to_next
) const;
443 do_in(state_type
& __state
,
444 const extern_type
* __from
, const extern_type
* __from_end
,
445 const extern_type
*& __from_next
,
446 intern_type
* __to
, intern_type
* __to_end
,
447 intern_type
*& __to_next
) const;
450 int do_encoding() const throw();
453 bool do_always_noconv() const throw();
456 int do_length(state_type
&, const extern_type
* __from
,
457 const extern_type
* __end
, size_t __max
) const;
460 do_max_length() const throw();
462 #endif //_GLIBCXX_USE_WCHAR_T
464 #if __cplusplus >= 201103L
465 /** @brief Class codecvt<char16_t, char, mbstate_t> specialization.
467 * Converts between UTF-16 and UTF-8.
470 class codecvt
<char16_t
, char, mbstate_t>
471 : public __codecvt_abstract_base
<char16_t
, char, mbstate_t>
475 typedef char16_t intern_type
;
476 typedef char extern_type
;
477 typedef mbstate_t state_type
;
480 static locale::id id
;
483 codecvt(size_t __refs
= 0)
484 : __codecvt_abstract_base
<char16_t
, char, mbstate_t>(__refs
) { }
491 do_out(state_type
& __state
, const intern_type
* __from
,
492 const intern_type
* __from_end
, const intern_type
*& __from_next
,
493 extern_type
* __to
, extern_type
* __to_end
,
494 extern_type
*& __to_next
) const;
497 do_unshift(state_type
& __state
,
498 extern_type
* __to
, extern_type
* __to_end
,
499 extern_type
*& __to_next
) const;
502 do_in(state_type
& __state
,
503 const extern_type
* __from
, const extern_type
* __from_end
,
504 const extern_type
*& __from_next
,
505 intern_type
* __to
, intern_type
* __to_end
,
506 intern_type
*& __to_next
) const;
509 int do_encoding() const throw();
512 bool do_always_noconv() const throw();
515 int do_length(state_type
&, const extern_type
* __from
,
516 const extern_type
* __end
, size_t __max
) const;
519 do_max_length() const throw();
522 /** @brief Class codecvt<char32_t, char, mbstate_t> specialization.
524 * Converts between UTF-32 and UTF-8.
527 class codecvt
<char32_t
, char, mbstate_t>
528 : public __codecvt_abstract_base
<char32_t
, char, mbstate_t>
532 typedef char32_t intern_type
;
533 typedef char extern_type
;
534 typedef mbstate_t state_type
;
537 static locale::id id
;
540 codecvt(size_t __refs
= 0)
541 : __codecvt_abstract_base
<char32_t
, char, mbstate_t>(__refs
) { }
548 do_out(state_type
& __state
, const intern_type
* __from
,
549 const intern_type
* __from_end
, const intern_type
*& __from_next
,
550 extern_type
* __to
, extern_type
* __to_end
,
551 extern_type
*& __to_next
) const;
554 do_unshift(state_type
& __state
,
555 extern_type
* __to
, extern_type
* __to_end
,
556 extern_type
*& __to_next
) const;
559 do_in(state_type
& __state
,
560 const extern_type
* __from
, const extern_type
* __from_end
,
561 const extern_type
*& __from_next
,
562 intern_type
* __to
, intern_type
* __to_end
,
563 intern_type
*& __to_next
) const;
566 int do_encoding() const throw();
569 bool do_always_noconv() const throw();
572 int do_length(state_type
&, const extern_type
* __from
,
573 const extern_type
* __end
, size_t __max
) const;
576 do_max_length() const throw();
579 #ifdef _GLIBCXX_USE_CHAR8_T
580 /** @brief Class codecvt<char16_t, char8_t, mbstate_t> specialization.
582 * Converts between UTF-16 and UTF-8.
585 class codecvt
<char16_t
, char8_t
, mbstate_t>
586 : public __codecvt_abstract_base
<char16_t
, char8_t
, mbstate_t>
590 typedef char16_t intern_type
;
591 typedef char8_t extern_type
;
592 typedef mbstate_t state_type
;
595 static locale::id id
;
598 codecvt(size_t __refs
= 0)
599 : __codecvt_abstract_base
<char16_t
, char8_t
, mbstate_t>(__refs
) { }
606 do_out(state_type
& __state
, const intern_type
* __from
,
607 const intern_type
* __from_end
, const intern_type
*& __from_next
,
608 extern_type
* __to
, extern_type
* __to_end
,
609 extern_type
*& __to_next
) const;
612 do_unshift(state_type
& __state
,
613 extern_type
* __to
, extern_type
* __to_end
,
614 extern_type
*& __to_next
) const;
617 do_in(state_type
& __state
,
618 const extern_type
* __from
, const extern_type
* __from_end
,
619 const extern_type
*& __from_next
,
620 intern_type
* __to
, intern_type
* __to_end
,
621 intern_type
*& __to_next
) const;
624 int do_encoding() const throw();
627 bool do_always_noconv() const throw();
630 int do_length(state_type
&, const extern_type
* __from
,
631 const extern_type
* __end
, size_t __max
) const;
634 do_max_length() const throw();
637 /** @brief Class codecvt<char32_t, char8_t, mbstate_t> specialization.
639 * Converts between UTF-32 and UTF-8.
642 class codecvt
<char32_t
, char8_t
, mbstate_t>
643 : public __codecvt_abstract_base
<char32_t
, char8_t
, mbstate_t>
647 typedef char32_t intern_type
;
648 typedef char8_t extern_type
;
649 typedef mbstate_t state_type
;
652 static locale::id id
;
655 codecvt(size_t __refs
= 0)
656 : __codecvt_abstract_base
<char32_t
, char8_t
, mbstate_t>(__refs
) { }
663 do_out(state_type
& __state
, const intern_type
* __from
,
664 const intern_type
* __from_end
, const intern_type
*& __from_next
,
665 extern_type
* __to
, extern_type
* __to_end
,
666 extern_type
*& __to_next
) const;
669 do_unshift(state_type
& __state
,
670 extern_type
* __to
, extern_type
* __to_end
,
671 extern_type
*& __to_next
) const;
674 do_in(state_type
& __state
,
675 const extern_type
* __from
, const extern_type
* __from_end
,
676 const extern_type
*& __from_next
,
677 intern_type
* __to
, intern_type
* __to_end
,
678 intern_type
*& __to_next
) const;
681 int do_encoding() const throw();
684 bool do_always_noconv() const throw();
687 int do_length(state_type
&, const extern_type
* __from
,
688 const extern_type
* __end
, size_t __max
) const;
691 do_max_length() const throw();
693 #endif // _GLIBCXX_USE_CHAR8_T
697 /// class codecvt_byname [22.2.1.6].
698 template<typename _InternT
, typename _ExternT
, typename _StateT
>
699 class codecvt_byname
: public codecvt
<_InternT
, _ExternT
, _StateT
>
703 codecvt_byname(const char* __s
, size_t __refs
= 0)
704 : codecvt
<_InternT
, _ExternT
, _StateT
>(__refs
)
706 if (__builtin_strcmp(__s
, "C") != 0
707 && __builtin_strcmp(__s
, "POSIX") != 0)
709 this->_S_destroy_c_locale(this->_M_c_locale_codecvt
);
710 this->_S_create_c_locale(this->_M_c_locale_codecvt
, __s
);
714 #if __cplusplus >= 201103L
716 codecvt_byname(const string
& __s
, size_t __refs
= 0)
717 : codecvt_byname(__s
.c_str(), __refs
) { }
722 ~codecvt_byname() { }
725 #if __cplusplus >= 201103L
727 class codecvt_byname
<char16_t
, char, mbstate_t>
728 : public codecvt
<char16_t
, char, mbstate_t>
732 codecvt_byname(const char*, size_t __refs
= 0)
733 : codecvt
<char16_t
, char, mbstate_t>(__refs
) { }
736 codecvt_byname(const string
& __s
, size_t __refs
= 0)
737 : codecvt_byname(__s
.c_str(), __refs
) { }
741 ~codecvt_byname() { }
745 class codecvt_byname
<char32_t
, char, mbstate_t>
746 : public codecvt
<char32_t
, char, mbstate_t>
750 codecvt_byname(const char*, size_t __refs
= 0)
751 : codecvt
<char32_t
, char, mbstate_t>(__refs
) { }
754 codecvt_byname(const string
& __s
, size_t __refs
= 0)
755 : codecvt_byname(__s
.c_str(), __refs
) { }
759 ~codecvt_byname() { }
762 #if defined(_GLIBCXX_USE_CHAR8_T)
764 class codecvt_byname
<char16_t
, char8_t
, mbstate_t>
765 : public codecvt
<char16_t
, char8_t
, mbstate_t>
769 codecvt_byname(const char*, size_t __refs
= 0)
770 : codecvt
<char16_t
, char8_t
, mbstate_t>(__refs
) { }
773 codecvt_byname(const string
& __s
, size_t __refs
= 0)
774 : codecvt_byname(__s
.c_str(), __refs
) { }
778 ~codecvt_byname() { }
782 class codecvt_byname
<char32_t
, char8_t
, mbstate_t>
783 : public codecvt
<char32_t
, char8_t
, mbstate_t>
787 codecvt_byname(const char*, size_t __refs
= 0)
788 : codecvt
<char32_t
, char8_t
, mbstate_t>(__refs
) { }
791 codecvt_byname(const string
& __s
, size_t __refs
= 0)
792 : codecvt_byname(__s
.c_str(), __refs
) { }
796 ~codecvt_byname() { }
802 // Inhibit implicit instantiations for required instantiations,
803 // which are defined via explicit instantiations elsewhere.
804 #if _GLIBCXX_EXTERN_TEMPLATE
805 extern template class codecvt_byname
<char, char, mbstate_t>;
808 const codecvt
<char, char, mbstate_t>&
809 use_facet
<codecvt
<char, char, mbstate_t> >(const locale
&);
813 has_facet
<codecvt
<char, char, mbstate_t> >(const locale
&);
815 #ifdef _GLIBCXX_USE_WCHAR_T
816 extern template class codecvt_byname
<wchar_t, char, mbstate_t>;
819 const codecvt
<wchar_t, char, mbstate_t>&
820 use_facet
<codecvt
<wchar_t, char, mbstate_t> >(const locale
&);
824 has_facet
<codecvt
<wchar_t, char, mbstate_t> >(const locale
&);
827 #if __cplusplus >= 201103L
828 extern template class codecvt_byname
<char16_t
, char, mbstate_t>;
829 extern template class codecvt_byname
<char32_t
, char, mbstate_t>;
831 #if defined(_GLIBCXX_USE_CHAR8_T)
832 extern template class codecvt_byname
<char16_t
, char8_t
, mbstate_t>;
833 extern template class codecvt_byname
<char32_t
, char8_t
, mbstate_t>;
840 _GLIBCXX_END_NAMESPACE_VERSION