1 // Locale support (codecvt) -*- C++ -*-
3 // Copyright (C) 2015-2018 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 #include <cstring> // std::memcpy, std::memcmp
27 #include <bits/stl_algobase.h> // std::min
29 namespace std
_GLIBCXX_VISIBILITY(default)
31 _GLIBCXX_BEGIN_NAMESPACE_VERSION
33 // The standard doesn't define these operators, which is annoying.
34 static underlying_type
<codecvt_mode
>::type
35 to_integer(codecvt_mode m
)
36 { return static_cast<underlying_type
<codecvt_mode
>::type
>(m
); }
38 static codecvt_mode
& operator&=(codecvt_mode
& m
, codecvt_mode n
)
39 { return m
= codecvt_mode(to_integer(m
) & to_integer(n
)); }
41 static codecvt_mode
& operator|=(codecvt_mode
& m
, codecvt_mode n
)
42 { return m
= codecvt_mode(to_integer(m
) | to_integer(n
)); }
44 static codecvt_mode
operator~(codecvt_mode m
)
45 { return codecvt_mode(~to_integer(m
)); }
49 // Largest code point that fits in a single UTF-16 code unit.
50 const char32_t max_single_utf16_unit
= 0xFFFF;
52 const char32_t max_code_point
= 0x10FFFF;
54 // The functions below rely on maxcode < incomplete_mb_character
55 // (which is enforced by the codecvt_utf* classes on construction).
56 const char32_t incomplete_mb_character
= char32_t(-2);
57 const char32_t invalid_mb_sequence
= char32_t(-1);
59 // Utility type for reading and writing code units of type Elem from
60 // a range defined by a pair of pointers.
61 template<typename Elem
, bool Aligned
= true>
68 range
& operator=(Elem e
)
74 // Read the next code unit.
75 Elem
operator*() const { return *next
; }
77 // Read the Nth code unit.
78 Elem
operator[](size_t n
) const { return next
[n
]; }
80 // Move to the next code unit.
87 // Move to the Nth code unit.
88 range
& operator+=(size_t n
)
94 // The number of code units remaining.
95 size_t size() const { return end
- next
; }
97 // The number of bytes remaining.
98 size_t nbytes() const { return (const char*)end
- (const char*)next
; }
101 // This specialization is used when accessing char16_t values through
102 // pointers to char, which might not be correctly aligned for char16_t.
103 template<typename Elem
>
104 struct range
<Elem
, false>
106 using value_type
= typename remove_const
<Elem
>::type
;
108 using char_pointer
= typename
109 conditional
<is_const
<Elem
>::value
, const char*, char*>::type
;
114 // Write a code unit.
115 range
& operator=(Elem e
)
117 memcpy(next
, &e
, sizeof(Elem
));
122 // Read the next code unit.
123 Elem
operator*() const
126 memcpy(&e
, next
, sizeof(Elem
));
130 // Read the Nth code unit.
131 Elem
operator[](size_t n
) const
134 memcpy(&e
, next
+ n
* sizeof(Elem
), sizeof(Elem
));
138 // Move to the next code unit.
141 next
+= sizeof(Elem
);
145 // Move to the Nth code unit.
146 range
& operator+=(size_t n
)
148 next
+= n
* sizeof(Elem
);
152 // The number of code units remaining.
153 size_t size() const { return nbytes() / sizeof(Elem
); }
155 // The number of bytes remaining.
156 size_t nbytes() const { return end
- next
; }
159 // Multibyte sequences can have "header" consisting of Byte Order Mark
160 const unsigned char utf8_bom
[3] = { 0xEF, 0xBB, 0xBF };
161 const unsigned char utf16_bom
[2] = { 0xFE, 0xFF };
162 const unsigned char utf16le_bom
[2] = { 0xFF, 0xFE };
164 // Write a BOM (space permitting).
165 template<typename C
, bool A
, size_t N
>
167 write_bom(range
<C
, A
>& to
, const unsigned char (&bom
)[N
])
169 static_assert( (N
/ sizeof(C
)) != 0, "" );
170 static_assert( (N
% sizeof(C
)) == 0, "" );
174 memcpy(to
.next
, bom
, N
);
175 to
+= (N
/ sizeof(C
));
179 // Try to read a BOM.
180 template<typename C
, bool A
, size_t N
>
182 read_bom(range
<C
, A
>& from
, const unsigned char (&bom
)[N
])
184 static_assert( (N
/ sizeof(C
)) != 0, "" );
185 static_assert( (N
% sizeof(C
)) == 0, "" );
187 if (from
.nbytes() >= N
&& !memcmp(from
.next
, bom
, N
))
189 from
+= (N
/ sizeof(C
));
195 // If generate_header is set in mode write out UTF-8 BOM.
197 write_utf8_bom(range
<char>& to
, codecvt_mode mode
)
199 if (mode
& generate_header
)
200 return write_bom(to
, utf8_bom
);
204 // If generate_header is set in mode write out the UTF-16 BOM indicated
205 // by whether little_endian is set in mode.
206 template<bool Aligned
>
208 write_utf16_bom(range
<char16_t
, Aligned
>& to
, codecvt_mode mode
)
210 if (mode
& generate_header
)
212 if (mode
& little_endian
)
213 return write_bom(to
, utf16le_bom
);
215 return write_bom(to
, utf16_bom
);
220 // If consume_header is set in mode update from.next to after any BOM.
222 read_utf8_bom(range
<const char>& from
, codecvt_mode mode
)
224 if (mode
& consume_header
)
225 read_bom(from
, utf8_bom
);
228 // If consume_header is not set in mode, no effects.
229 // Otherwise, if *from.next is a UTF-16 BOM increment from.next and then:
230 // - if the UTF-16BE BOM was found unset little_endian in mode, or
231 // - if the UTF-16LE BOM was found set little_endian in mode.
232 template<bool Aligned
>
234 read_utf16_bom(range
<const char16_t
, Aligned
>& from
, codecvt_mode
& mode
)
236 if (mode
& consume_header
)
238 if (read_bom(from
, utf16_bom
))
239 mode
&= ~little_endian
;
240 else if (read_bom(from
, utf16le_bom
))
241 mode
|= little_endian
;
245 // Read a codepoint from a UTF-8 multibyte sequence.
246 // Updates from.next if the codepoint is not greater than maxcode.
247 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
249 read_utf8_code_point(range
<const char>& from
, unsigned long maxcode
)
251 const size_t avail
= from
.size();
253 return incomplete_mb_character
;
254 unsigned char c1
= from
[0];
255 // https://en.wikipedia.org/wiki/UTF-8#Sample_code
261 else if (c1
< 0xC2) // continuation or overlong 2-byte sequence
262 return invalid_mb_sequence
;
263 else if (c1
< 0xE0) // 2-byte sequence
266 return incomplete_mb_character
;
267 unsigned char c2
= from
[1];
268 if ((c2
& 0xC0) != 0x80)
269 return invalid_mb_sequence
;
270 char32_t c
= (c1
<< 6) + c2
- 0x3080;
275 else if (c1
< 0xF0) // 3-byte sequence
278 return incomplete_mb_character
;
279 unsigned char c2
= from
[1];
280 if ((c2
& 0xC0) != 0x80)
281 return invalid_mb_sequence
;
282 if (c1
== 0xE0 && c2
< 0xA0) // overlong
283 return invalid_mb_sequence
;
284 unsigned char c3
= from
[2];
285 if ((c3
& 0xC0) != 0x80)
286 return invalid_mb_sequence
;
287 char32_t c
= (c1
<< 12) + (c2
<< 6) + c3
- 0xE2080;
292 else if (c1
< 0xF5) // 4-byte sequence
295 return incomplete_mb_character
;
296 unsigned char c2
= from
[1];
297 if ((c2
& 0xC0) != 0x80)
298 return invalid_mb_sequence
;
299 if (c1
== 0xF0 && c2
< 0x90) // overlong
300 return invalid_mb_sequence
;
301 if (c1
== 0xF4 && c2
>= 0x90) // > U+10FFFF
302 return invalid_mb_sequence
;
303 unsigned char c3
= from
[2];
304 if ((c3
& 0xC0) != 0x80)
305 return invalid_mb_sequence
;
306 unsigned char c4
= from
[3];
307 if ((c4
& 0xC0) != 0x80)
308 return invalid_mb_sequence
;
309 char32_t c
= (c1
<< 18) + (c2
<< 12) + (c3
<< 6) + c4
- 0x3C82080;
315 return invalid_mb_sequence
;
319 write_utf8_code_point(range
<char>& to
, char32_t code_point
)
321 if (code_point
< 0x80)
327 else if (code_point
<= 0x7FF)
331 to
= (code_point
>> 6) + 0xC0;
332 to
= (code_point
& 0x3F) + 0x80;
334 else if (code_point
<= 0xFFFF)
338 to
= (code_point
>> 12) + 0xE0;
339 to
= ((code_point
>> 6) & 0x3F) + 0x80;
340 to
= (code_point
& 0x3F) + 0x80;
342 else if (code_point
<= 0x10FFFF)
346 to
= (code_point
>> 18) + 0xF0;
347 to
= ((code_point
>> 12) & 0x3F) + 0x80;
348 to
= ((code_point
>> 6) & 0x3F) + 0x80;
349 to
= (code_point
& 0x3F) + 0x80;
357 adjust_byte_order(char16_t c
, codecvt_mode mode
)
359 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
360 return (mode
& little_endian
) ? __builtin_bswap16(c
) : c
;
362 return (mode
& little_endian
) ? c
: __builtin_bswap16(c
);
366 // Return true if c is a high-surrogate (aka leading) code point.
368 is_high_surrogate(char32_t c
)
370 return c
>= 0xD800 && c
<= 0xDBFF;
373 // Return true if c is a low-surrogate (aka trailing) code point.
375 is_low_surrogate(char32_t c
)
377 return c
>= 0xDC00 && c
<= 0xDFFF;
381 surrogate_pair_to_code_point(char32_t high
, char32_t low
)
383 return (high
<< 10) + low
- 0x35FDC00;
386 // Read a codepoint from a UTF-16 multibyte sequence.
387 // The sequence's endianness is indicated by (mode & little_endian).
388 // Updates from.next if the codepoint is not greater than maxcode.
389 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
390 template<bool Aligned
>
392 read_utf16_code_point(range
<const char16_t
, Aligned
>& from
,
393 unsigned long maxcode
, codecvt_mode mode
)
395 const size_t avail
= from
.size();
397 return incomplete_mb_character
;
399 char32_t c
= adjust_byte_order(from
[0], mode
);
400 if (is_high_surrogate(c
))
403 return incomplete_mb_character
;
404 const char16_t c2
= adjust_byte_order(from
[1], mode
);
405 if (is_low_surrogate(c2
))
407 c
= surrogate_pair_to_code_point(c
, c2
);
411 return invalid_mb_sequence
;
413 else if (is_low_surrogate(c
))
414 return invalid_mb_sequence
;
420 template<typename C
, bool A
>
422 write_utf16_code_point(range
<C
, A
>& to
, char32_t codepoint
, codecvt_mode mode
)
424 static_assert(sizeof(C
) >= 2, "a code unit must be at least 16-bit");
426 if (codepoint
<= max_single_utf16_unit
)
430 to
= adjust_byte_order(codepoint
, mode
);
434 else if (to
.size() > 1)
436 // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
437 const char32_t LEAD_OFFSET
= 0xD800 - (0x10000 >> 10);
438 char16_t lead
= LEAD_OFFSET
+ (codepoint
>> 10);
439 char16_t trail
= 0xDC00 + (codepoint
& 0x3FF);
440 to
= adjust_byte_order(lead
, mode
);
441 to
= adjust_byte_order(trail
, mode
);
449 ucs4_in(range
<const char>& from
, range
<char32_t
>& to
,
450 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {})
452 read_utf8_bom(from
, mode
);
453 while (from
.size() && to
.size())
455 const char32_t codepoint
= read_utf8_code_point(from
, maxcode
);
456 if (codepoint
== incomplete_mb_character
)
457 return codecvt_base::partial
;
458 if (codepoint
> maxcode
)
459 return codecvt_base::error
;
462 return from
.size() ? codecvt_base::partial
: codecvt_base::ok
;
467 ucs4_out(range
<const char32_t
>& from
, range
<char>& to
,
468 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {})
470 if (!write_utf8_bom(to
, mode
))
471 return codecvt_base::partial
;
474 const char32_t c
= from
[0];
476 return codecvt_base::error
;
477 if (!write_utf8_code_point(to
, c
))
478 return codecvt_base::partial
;
481 return codecvt_base::ok
;
486 ucs4_in(range
<const char16_t
, false>& from
, range
<char32_t
>& to
,
487 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {})
489 read_utf16_bom(from
, mode
);
490 while (from
.size() && to
.size())
492 const char32_t codepoint
= read_utf16_code_point(from
, maxcode
, mode
);
493 if (codepoint
== incomplete_mb_character
)
494 return codecvt_base::partial
;
495 if (codepoint
> maxcode
)
496 return codecvt_base::error
;
499 return from
.size() ? codecvt_base::partial
: codecvt_base::ok
;
504 ucs4_out(range
<const char32_t
>& from
, range
<char16_t
, false>& to
,
505 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {})
507 if (!write_utf16_bom(to
, mode
))
508 return codecvt_base::partial
;
511 const char32_t c
= from
[0];
513 return codecvt_base::error
;
514 if (!write_utf16_code_point(to
, c
, mode
))
515 return codecvt_base::partial
;
518 return codecvt_base::ok
;
521 // Flag indicating whether to process UTF-16 or UCS2
522 enum class surrogates
{ allowed
, disallowed
};
524 // utf8 -> utf16 (or utf8 -> ucs2 if s == surrogates::disallowed)
527 utf16_in(range
<const char>& from
, range
<C
>& to
,
528 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {},
529 surrogates s
= surrogates::allowed
)
531 read_utf8_bom(from
, mode
);
532 while (from
.size() && to
.size())
535 const char32_t codepoint
= read_utf8_code_point(from
, maxcode
);
536 if (codepoint
== incomplete_mb_character
)
538 if (s
== surrogates::allowed
)
539 return codecvt_base::partial
;
541 return codecvt_base::error
; // No surrogates in UCS2
543 if (codepoint
> maxcode
)
544 return codecvt_base::error
;
545 if (!write_utf16_code_point(to
, codepoint
, mode
))
547 from
= orig
; // rewind to previous position
548 return codecvt_base::partial
;
551 return codecvt_base::ok
;
554 // utf16 -> utf8 (or ucs2 -> utf8 if s == surrogates::disallowed)
557 utf16_out(range
<const C
>& from
, range
<char>& to
,
558 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {},
559 surrogates s
= surrogates::allowed
)
561 if (!write_utf8_bom(to
, mode
))
562 return codecvt_base::partial
;
565 char32_t c
= from
[0];
567 if (is_high_surrogate(c
))
569 if (s
== surrogates::disallowed
)
570 return codecvt_base::error
; // No surrogates in UCS-2
573 return codecvt_base::ok
; // stop converting at this point
575 const char32_t c2
= from
[1];
576 if (is_low_surrogate(c2
))
578 c
= surrogate_pair_to_code_point(c
, c2
);
582 return codecvt_base::error
;
584 else if (is_low_surrogate(c
))
585 return codecvt_base::error
;
587 return codecvt_base::error
;
588 if (!write_utf8_code_point(to
, c
))
589 return codecvt_base::partial
;
592 return codecvt_base::ok
;
595 // return pos such that [begin,pos) is valid UTF-16 string no longer than max
597 utf16_span(const char* begin
, const char* end
, size_t max
,
598 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
600 range
<const char> from
{ begin
, end
};
601 read_utf8_bom(from
, mode
);
603 while (count
+1 < max
)
605 char32_t c
= read_utf8_code_point(from
, maxcode
);
608 else if (c
> max_single_utf16_unit
)
612 if (count
+1 == max
) // take one more character if it fits in a single unit
613 read_utf8_code_point(from
, std::min(max_single_utf16_unit
, maxcode
));
619 ucs2_in(range
<const char>& from
, range
<char16_t
>& to
,
620 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
622 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
623 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
624 return utf16_in(from
, to
, maxcode
, mode
, surrogates::disallowed
);
629 ucs2_out(range
<const char16_t
>& from
, range
<char>& to
,
630 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
632 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
633 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
634 return utf16_out(from
, to
, maxcode
, mode
, surrogates::disallowed
);
639 ucs2_out(range
<const char16_t
>& from
, range
<char16_t
, false>& to
,
640 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
642 if (!write_utf16_bom(to
, mode
))
643 return codecvt_base::partial
;
644 while (from
.size() && to
.size())
646 char16_t c
= from
[0];
647 if (is_high_surrogate(c
))
648 return codecvt_base::error
;
650 return codecvt_base::error
;
651 to
= adjust_byte_order(c
, mode
);
654 return from
.size() == 0 ? codecvt_base::ok
: codecvt_base::partial
;
659 ucs2_in(range
<const char16_t
, false>& from
, range
<char16_t
>& to
,
660 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
662 read_utf16_bom(from
, mode
);
663 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
664 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
665 while (from
.size() && to
.size())
667 const char32_t c
= read_utf16_code_point(from
, maxcode
, mode
);
668 if (c
== incomplete_mb_character
)
669 return codecvt_base::error
; // UCS-2 only supports single units.
671 return codecvt_base::error
;
674 return from
.size() == 0 ? codecvt_base::ok
: codecvt_base::partial
;
678 ucs2_span(range
<const char16_t
, false>& from
, size_t max
,
679 char32_t maxcode
, codecvt_mode mode
)
681 read_utf16_bom(from
, mode
);
682 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
683 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
685 while (max
-- && c
<= maxcode
)
686 c
= read_utf16_code_point(from
, maxcode
, mode
);
687 return reinterpret_cast<const char16_t
*>(from
.next
);
691 ucs2_span(const char* begin
, const char* end
, size_t max
,
692 char32_t maxcode
, codecvt_mode mode
)
694 range
<const char> from
{ begin
, end
};
695 read_utf8_bom(from
, mode
);
696 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
697 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
699 while (max
-- && c
<= maxcode
)
700 c
= read_utf8_code_point(from
, maxcode
);
704 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
706 ucs4_span(const char* begin
, const char* end
, size_t max
,
707 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
709 range
<const char> from
{ begin
, end
};
710 read_utf8_bom(from
, mode
);
712 while (max
-- && c
<= maxcode
)
713 c
= read_utf8_code_point(from
, maxcode
);
717 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
719 ucs4_span(range
<const char16_t
, false>& from
, size_t max
,
720 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
722 read_utf16_bom(from
, mode
);
724 while (max
-- && c
<= maxcode
)
725 c
= read_utf16_code_point(from
, maxcode
, mode
);
726 return reinterpret_cast<const char16_t
*>(from
.next
);
730 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
731 // Converts from UTF-8 to UTF-16.
733 locale::id codecvt
<char16_t
, char, mbstate_t>::id
;
735 codecvt
<char16_t
, char, mbstate_t>::~codecvt() { }
738 codecvt
<char16_t
, char, mbstate_t>::
740 const intern_type
* __from
,
741 const intern_type
* __from_end
, const intern_type
*& __from_next
,
742 extern_type
* __to
, extern_type
* __to_end
,
743 extern_type
*& __to_next
) const
745 range
<const char16_t
> from
{ __from
, __from_end
};
746 range
<char> to
{ __to
, __to_end
};
747 auto res
= utf16_out(from
, to
);
748 __from_next
= from
.next
;
754 codecvt
<char16_t
, char, mbstate_t>::
755 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
756 extern_type
*& __to_next
) const
759 return noconv
; // we don't use mbstate_t for the unicode facets
763 codecvt
<char16_t
, char, mbstate_t>::
764 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
765 const extern_type
*& __from_next
,
766 intern_type
* __to
, intern_type
* __to_end
,
767 intern_type
*& __to_next
) const
769 range
<const char> from
{ __from
, __from_end
};
770 range
<char16_t
> to
{ __to
, __to_end
};
771 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
772 codecvt_mode mode
= {};
774 codecvt_mode mode
= little_endian
;
776 auto res
= utf16_in(from
, to
, max_code_point
, mode
);
777 __from_next
= from
.next
;
783 codecvt
<char16_t
, char, mbstate_t>::do_encoding() const throw()
784 { return 0; } // UTF-8 is not a fixed-width encoding
787 codecvt
<char16_t
, char, mbstate_t>::do_always_noconv() const throw()
791 codecvt
<char16_t
, char, mbstate_t>::
792 do_length(state_type
&, const extern_type
* __from
,
793 const extern_type
* __end
, size_t __max
) const
795 __end
= utf16_span(__from
, __end
, __max
);
796 return __end
- __from
;
800 codecvt
<char16_t
, char, mbstate_t>::do_max_length() const throw()
802 // A single character (one or two UTF-16 code units) requires
803 // up to four UTF-8 code units.
807 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
808 // Converts from UTF-8 to UTF-32 (aka UCS-4).
810 locale::id codecvt
<char32_t
, char, mbstate_t>::id
;
812 codecvt
<char32_t
, char, mbstate_t>::~codecvt() { }
815 codecvt
<char32_t
, char, mbstate_t>::
816 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
817 const intern_type
*& __from_next
,
818 extern_type
* __to
, extern_type
* __to_end
,
819 extern_type
*& __to_next
) const
821 range
<const char32_t
> from
{ __from
, __from_end
};
822 range
<char> to
{ __to
, __to_end
};
823 auto res
= ucs4_out(from
, to
);
824 __from_next
= from
.next
;
830 codecvt
<char32_t
, char, mbstate_t>::
831 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
832 extern_type
*& __to_next
) const
839 codecvt
<char32_t
, char, mbstate_t>::
840 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
841 const extern_type
*& __from_next
,
842 intern_type
* __to
, intern_type
* __to_end
,
843 intern_type
*& __to_next
) const
845 range
<const char> from
{ __from
, __from_end
};
846 range
<char32_t
> to
{ __to
, __to_end
};
847 auto res
= ucs4_in(from
, to
);
848 __from_next
= from
.next
;
854 codecvt
<char32_t
, char, mbstate_t>::do_encoding() const throw()
855 { return 0; } // UTF-8 is not a fixed-width encoding
858 codecvt
<char32_t
, char, mbstate_t>::do_always_noconv() const throw()
862 codecvt
<char32_t
, char, mbstate_t>::
863 do_length(state_type
&, const extern_type
* __from
,
864 const extern_type
* __end
, size_t __max
) const
866 __end
= ucs4_span(__from
, __end
, __max
);
867 return __end
- __from
;
871 codecvt
<char32_t
, char, mbstate_t>::do_max_length() const throw()
873 // A single character (one UTF-32 code unit) requires
874 // up to 4 UTF-8 code units.
878 // Define members of codecvt_utf8<char16_t> base class implementation.
879 // Converts from UTF-8 to UCS-2.
881 __codecvt_utf8_base
<char16_t
>::~__codecvt_utf8_base() { }
884 __codecvt_utf8_base
<char16_t
>::
885 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
886 const intern_type
*& __from_next
,
887 extern_type
* __to
, extern_type
* __to_end
,
888 extern_type
*& __to_next
) const
890 range
<const char16_t
> from
{ __from
, __from_end
};
891 range
<char> to
{ __to
, __to_end
};
892 auto res
= ucs2_out(from
, to
, _M_maxcode
, _M_mode
);
893 __from_next
= from
.next
;
899 __codecvt_utf8_base
<char16_t
>::
900 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
901 extern_type
*& __to_next
) const
908 __codecvt_utf8_base
<char16_t
>::
909 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
910 const extern_type
*& __from_next
,
911 intern_type
* __to
, intern_type
* __to_end
,
912 intern_type
*& __to_next
) const
914 range
<const char> from
{ __from
, __from_end
};
915 range
<char16_t
> to
{ __to
, __to_end
};
916 codecvt_mode mode
= codecvt_mode(_M_mode
& (consume_header
|generate_header
));
917 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
918 mode
= codecvt_mode(mode
| little_endian
);
920 auto res
= ucs2_in(from
, to
, _M_maxcode
, mode
);
921 __from_next
= from
.next
;
927 __codecvt_utf8_base
<char16_t
>::do_encoding() const throw()
928 { return 0; } // UTF-8 is not a fixed-width encoding
931 __codecvt_utf8_base
<char16_t
>::do_always_noconv() const throw()
935 __codecvt_utf8_base
<char16_t
>::
936 do_length(state_type
&, const extern_type
* __from
,
937 const extern_type
* __end
, size_t __max
) const
939 __end
= ucs2_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
940 return __end
- __from
;
944 __codecvt_utf8_base
<char16_t
>::do_max_length() const throw()
946 // A single UCS-2 character requires up to three UTF-8 code units.
947 // (UCS-2 cannot represent characters that use four UTF-8 code units).
949 if (_M_mode
& consume_header
)
950 max
+= sizeof(utf8_bom
);
954 // Define members of codecvt_utf8<char32_t> base class implementation.
955 // Converts from UTF-8 to UTF-32 (aka UCS-4).
957 __codecvt_utf8_base
<char32_t
>::~__codecvt_utf8_base() { }
960 __codecvt_utf8_base
<char32_t
>::
961 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
962 const intern_type
*& __from_next
,
963 extern_type
* __to
, extern_type
* __to_end
,
964 extern_type
*& __to_next
) const
966 range
<const char32_t
> from
{ __from
, __from_end
};
967 range
<char> to
{ __to
, __to_end
};
968 auto res
= ucs4_out(from
, to
, _M_maxcode
, _M_mode
);
969 __from_next
= from
.next
;
975 __codecvt_utf8_base
<char32_t
>::
976 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
977 extern_type
*& __to_next
) const
984 __codecvt_utf8_base
<char32_t
>::
985 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
986 const extern_type
*& __from_next
,
987 intern_type
* __to
, intern_type
* __to_end
,
988 intern_type
*& __to_next
) const
990 range
<const char> from
{ __from
, __from_end
};
991 range
<char32_t
> to
{ __to
, __to_end
};
992 auto res
= ucs4_in(from
, to
, _M_maxcode
, _M_mode
);
993 __from_next
= from
.next
;
999 __codecvt_utf8_base
<char32_t
>::do_encoding() const throw()
1000 { return 0; } // UTF-8 is not a fixed-width encoding
1003 __codecvt_utf8_base
<char32_t
>::do_always_noconv() const throw()
1007 __codecvt_utf8_base
<char32_t
>::
1008 do_length(state_type
&, const extern_type
* __from
,
1009 const extern_type
* __end
, size_t __max
) const
1011 __end
= ucs4_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1012 return __end
- __from
;
1016 __codecvt_utf8_base
<char32_t
>::do_max_length() const throw()
1018 // A single UCS-4 character requires up to four UTF-8 code units.
1020 if (_M_mode
& consume_header
)
1021 max
+= sizeof(utf8_bom
);
1025 #ifdef _GLIBCXX_USE_WCHAR_T
1027 #if __SIZEOF_WCHAR_T__ == 2
1028 static_assert(sizeof(wchar_t) == sizeof(char16_t
), "");
1029 #elif __SIZEOF_WCHAR_T__ == 4
1030 static_assert(sizeof(wchar_t) == sizeof(char32_t
), "");
1033 // Define members of codecvt_utf8<wchar_t> base class implementation.
1034 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1036 __codecvt_utf8_base
<wchar_t>::~__codecvt_utf8_base() { }
1038 codecvt_base::result
1039 __codecvt_utf8_base
<wchar_t>::
1040 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1041 const intern_type
*& __from_next
,
1042 extern_type
* __to
, extern_type
* __to_end
,
1043 extern_type
*& __to_next
) const
1045 range
<char> to
{ __to
, __to_end
};
1046 #if __SIZEOF_WCHAR_T__ == 2
1047 range
<const char16_t
> from
{
1048 reinterpret_cast<const char16_t
*>(__from
),
1049 reinterpret_cast<const char16_t
*>(__from_end
)
1051 auto res
= ucs2_out(from
, to
, _M_maxcode
, _M_mode
);
1052 #elif __SIZEOF_WCHAR_T__ == 4
1053 range
<const char32_t
> from
{
1054 reinterpret_cast<const char32_t
*>(__from
),
1055 reinterpret_cast<const char32_t
*>(__from_end
)
1057 auto res
= ucs4_out(from
, to
, _M_maxcode
, _M_mode
);
1059 return codecvt_base::error
;
1061 __from_next
= reinterpret_cast<const wchar_t*>(from
.next
);
1062 __to_next
= to
.next
;
1066 codecvt_base::result
1067 __codecvt_utf8_base
<wchar_t>::
1068 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1069 extern_type
*& __to_next
) const
1075 codecvt_base::result
1076 __codecvt_utf8_base
<wchar_t>::
1077 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1078 const extern_type
*& __from_next
,
1079 intern_type
* __to
, intern_type
* __to_end
,
1080 intern_type
*& __to_next
) const
1082 range
<const char> from
{ __from
, __from_end
};
1083 #if __SIZEOF_WCHAR_T__ == 2
1085 reinterpret_cast<char16_t
*>(__to
),
1086 reinterpret_cast<char16_t
*>(__to_end
)
1088 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1089 codecvt_mode mode
= {};
1091 codecvt_mode mode
= little_endian
;
1093 auto res
= ucs2_in(from
, to
, _M_maxcode
, mode
);
1094 #elif __SIZEOF_WCHAR_T__ == 4
1096 reinterpret_cast<char32_t
*>(__to
),
1097 reinterpret_cast<char32_t
*>(__to_end
)
1099 auto res
= ucs4_in(from
, to
, _M_maxcode
, _M_mode
);
1101 return codecvt_base::error
;
1103 __from_next
= from
.next
;
1104 __to_next
= reinterpret_cast<wchar_t*>(to
.next
);
1109 __codecvt_utf8_base
<wchar_t>::do_encoding() const throw()
1110 { return 0; } // UTF-8 is not a fixed-width encoding
1113 __codecvt_utf8_base
<wchar_t>::do_always_noconv() const throw()
1117 __codecvt_utf8_base
<wchar_t>::
1118 do_length(state_type
&, const extern_type
* __from
,
1119 const extern_type
* __end
, size_t __max
) const
1121 #if __SIZEOF_WCHAR_T__ == 2
1122 __end
= ucs2_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1123 #elif __SIZEOF_WCHAR_T__ == 4
1124 __end
= ucs4_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1128 return __end
- __from
;
1132 __codecvt_utf8_base
<wchar_t>::do_max_length() const throw()
1134 #if __SIZEOF_WCHAR_T__ == 2
1135 int max
= 3; // See __codecvt_utf8_base<char16_t>::do_max_length()
1137 int max
= 4; // See __codecvt_utf8_base<char32_t>::do_max_length()
1139 if (_M_mode
& consume_header
)
1140 max
+= sizeof(utf8_bom
);
1145 // Define members of codecvt_utf16<char16_t> base class implementation.
1146 // Converts from UTF-16 to UCS-2.
1148 __codecvt_utf16_base
<char16_t
>::~__codecvt_utf16_base() { }
1150 codecvt_base::result
1151 __codecvt_utf16_base
<char16_t
>::
1152 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1153 const intern_type
*& __from_next
,
1154 extern_type
* __to
, extern_type
* __to_end
,
1155 extern_type
*& __to_next
) const
1157 range
<const char16_t
> from
{ __from
, __from_end
};
1158 range
<char16_t
, false> to
{ __to
, __to_end
};
1159 auto res
= ucs2_out(from
, to
, _M_maxcode
, _M_mode
);
1160 __from_next
= from
.next
;
1161 __to_next
= reinterpret_cast<char*>(to
.next
);
1165 codecvt_base::result
1166 __codecvt_utf16_base
<char16_t
>::
1167 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1168 extern_type
*& __to_next
) const
1174 codecvt_base::result
1175 __codecvt_utf16_base
<char16_t
>::
1176 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1177 const extern_type
*& __from_next
,
1178 intern_type
* __to
, intern_type
* __to_end
,
1179 intern_type
*& __to_next
) const
1181 range
<const char16_t
, false> from
{ __from
, __from_end
};
1182 range
<char16_t
> to
{ __to
, __to_end
};
1183 auto res
= ucs2_in(from
, to
, _M_maxcode
, _M_mode
);
1184 __from_next
= reinterpret_cast<const char*>(from
.next
);
1185 __to_next
= to
.next
;
1186 if (res
== codecvt_base::ok
&& __from_next
!= __from_end
)
1187 res
= codecvt_base::error
;
1192 __codecvt_utf16_base
<char16_t
>::do_encoding() const throw()
1193 { return 0; } // UTF-16 is not a fixed-width encoding
1196 __codecvt_utf16_base
<char16_t
>::do_always_noconv() const throw()
1200 __codecvt_utf16_base
<char16_t
>::
1201 do_length(state_type
&, const extern_type
* __from
,
1202 const extern_type
* __end
, size_t __max
) const
1204 range
<const char16_t
, false> from
{ __from
, __end
};
1205 const char16_t
* next
= ucs2_span(from
, __max
, _M_maxcode
, _M_mode
);
1206 return reinterpret_cast<const char*>(next
) - __from
;
1210 __codecvt_utf16_base
<char16_t
>::do_max_length() const throw()
1212 // A single UCS-2 character requires one UTF-16 code unit (so two chars).
1213 // (UCS-2 cannot represent characters that use multiple UTF-16 code units).
1215 if (_M_mode
& consume_header
)
1216 max
+= sizeof(utf16_bom
);
1220 // Define members of codecvt_utf16<char32_t> base class implementation.
1221 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1223 __codecvt_utf16_base
<char32_t
>::~__codecvt_utf16_base() { }
1225 codecvt_base::result
1226 __codecvt_utf16_base
<char32_t
>::
1227 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1228 const intern_type
*& __from_next
,
1229 extern_type
* __to
, extern_type
* __to_end
,
1230 extern_type
*& __to_next
) const
1232 range
<const char32_t
> from
{ __from
, __from_end
};
1233 range
<char16_t
, false> to
{ __to
, __to_end
};
1234 auto res
= ucs4_out(from
, to
, _M_maxcode
, _M_mode
);
1235 __from_next
= from
.next
;
1236 __to_next
= reinterpret_cast<char*>(to
.next
);
1240 codecvt_base::result
1241 __codecvt_utf16_base
<char32_t
>::
1242 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1243 extern_type
*& __to_next
) const
1249 codecvt_base::result
1250 __codecvt_utf16_base
<char32_t
>::
1251 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1252 const extern_type
*& __from_next
,
1253 intern_type
* __to
, intern_type
* __to_end
,
1254 intern_type
*& __to_next
) const
1256 range
<const char16_t
, false> from
{ __from
, __from_end
};
1257 range
<char32_t
> to
{ __to
, __to_end
};
1258 auto res
= ucs4_in(from
, to
, _M_maxcode
, _M_mode
);
1259 __from_next
= reinterpret_cast<const char*>(from
.next
);
1260 __to_next
= to
.next
;
1261 if (res
== codecvt_base::ok
&& __from_next
!= __from_end
)
1262 res
= codecvt_base::error
;
1267 __codecvt_utf16_base
<char32_t
>::do_encoding() const throw()
1268 { return 0; } // UTF-16 is not a fixed-width encoding
1271 __codecvt_utf16_base
<char32_t
>::do_always_noconv() const throw()
1275 __codecvt_utf16_base
<char32_t
>::
1276 do_length(state_type
&, const extern_type
* __from
,
1277 const extern_type
* __end
, size_t __max
) const
1279 range
<const char16_t
, false> from
{ __from
, __end
};
1280 const char16_t
* next
= ucs4_span(from
, __max
, _M_maxcode
, _M_mode
);
1281 return reinterpret_cast<const char*>(next
) - __from
;
1285 __codecvt_utf16_base
<char32_t
>::do_max_length() const throw()
1287 // A single UCS-4 character requires one or two UTF-16 code units
1288 // (so up to four chars).
1290 if (_M_mode
& consume_header
)
1291 max
+= sizeof(utf16_bom
);
1295 #ifdef _GLIBCXX_USE_WCHAR_T
1296 // Define members of codecvt_utf16<wchar_t> base class implementation.
1297 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1299 __codecvt_utf16_base
<wchar_t>::~__codecvt_utf16_base() { }
1301 codecvt_base::result
1302 __codecvt_utf16_base
<wchar_t>::
1303 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1304 const intern_type
*& __from_next
,
1305 extern_type
* __to
, extern_type
* __to_end
,
1306 extern_type
*& __to_next
) const
1308 range
<char16_t
, false> to
{ __to
, __to_end
};
1309 #if __SIZEOF_WCHAR_T__ == 2
1310 range
<const char16_t
> from
{
1311 reinterpret_cast<const char16_t
*>(__from
),
1312 reinterpret_cast<const char16_t
*>(__from_end
),
1314 auto res
= ucs2_out(from
, to
, _M_maxcode
, _M_mode
);
1315 #elif __SIZEOF_WCHAR_T__ == 4
1316 range
<const char32_t
> from
{
1317 reinterpret_cast<const char32_t
*>(__from
),
1318 reinterpret_cast<const char32_t
*>(__from_end
),
1320 auto res
= ucs4_out(from
, to
, _M_maxcode
, _M_mode
);
1322 return codecvt_base::error
;
1324 __from_next
= reinterpret_cast<const wchar_t*>(from
.next
);
1325 __to_next
= reinterpret_cast<char*>(to
.next
);
1329 codecvt_base::result
1330 __codecvt_utf16_base
<wchar_t>::
1331 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1332 extern_type
*& __to_next
) const
1338 codecvt_base::result
1339 __codecvt_utf16_base
<wchar_t>::
1340 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1341 const extern_type
*& __from_next
,
1342 intern_type
* __to
, intern_type
* __to_end
,
1343 intern_type
*& __to_next
) const
1345 range
<const char16_t
, false> from
{ __from
, __from_end
};
1346 #if __SIZEOF_WCHAR_T__ == 2
1348 reinterpret_cast<char16_t
*>(__to
),
1349 reinterpret_cast<char16_t
*>(__to_end
),
1351 auto res
= ucs2_in(from
, to
, _M_maxcode
, _M_mode
);
1352 #elif __SIZEOF_WCHAR_T__ == 4
1354 reinterpret_cast<char32_t
*>(__to
),
1355 reinterpret_cast<char32_t
*>(__to_end
),
1357 auto res
= ucs4_in(from
, to
, _M_maxcode
, _M_mode
);
1359 return codecvt_base::error
;
1361 __from_next
= reinterpret_cast<const char*>(from
.next
);
1362 __to_next
= reinterpret_cast<wchar_t*>(to
.next
);
1363 if (res
== codecvt_base::ok
&& __from_next
!= __from_end
)
1364 res
= codecvt_base::error
;
1369 __codecvt_utf16_base
<wchar_t>::do_encoding() const throw()
1370 { return 0; } // UTF-16 is not a fixed-width encoding
1373 __codecvt_utf16_base
<wchar_t>::do_always_noconv() const throw()
1377 __codecvt_utf16_base
<wchar_t>::
1378 do_length(state_type
&, const extern_type
* __from
,
1379 const extern_type
* __end
, size_t __max
) const
1381 range
<const char16_t
, false> from
{ __from
, __end
};
1382 #if __SIZEOF_WCHAR_T__ == 2
1383 const char16_t
* next
= ucs2_span(from
, __max
, _M_maxcode
, _M_mode
);
1384 #elif __SIZEOF_WCHAR_T__ == 4
1385 const char16_t
* next
= ucs4_span(from
, __max
, _M_maxcode
, _M_mode
);
1387 return reinterpret_cast<const char*>(next
) - __from
;
1391 __codecvt_utf16_base
<wchar_t>::do_max_length() const throw()
1393 #if __SIZEOF_WCHAR_T__ == 2
1394 int max
= 2; // See __codecvt_utf16_base<char16_t>::do_max_length()
1396 int max
= 4; // See __codecvt_utf16_base<char32_t>::do_max_length()
1398 if (_M_mode
& consume_header
)
1399 max
+= sizeof(utf16_bom
);
1404 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1405 // Converts from UTF-8 to UTF-16.
1407 __codecvt_utf8_utf16_base
<char16_t
>::~__codecvt_utf8_utf16_base() { }
1409 codecvt_base::result
1410 __codecvt_utf8_utf16_base
<char16_t
>::
1411 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1412 const intern_type
*& __from_next
,
1413 extern_type
* __to
, extern_type
* __to_end
,
1414 extern_type
*& __to_next
) const
1416 range
<const char16_t
> from
{ __from
, __from_end
};
1417 range
<char> to
{ __to
, __to_end
};
1418 auto res
= utf16_out(from
, to
, _M_maxcode
, _M_mode
);
1419 __from_next
= from
.next
;
1420 __to_next
= to
.next
;
1424 codecvt_base::result
1425 __codecvt_utf8_utf16_base
<char16_t
>::
1426 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1427 extern_type
*& __to_next
) const
1433 codecvt_base::result
1434 __codecvt_utf8_utf16_base
<char16_t
>::
1435 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1436 const extern_type
*& __from_next
,
1437 intern_type
* __to
, intern_type
* __to_end
,
1438 intern_type
*& __to_next
) const
1440 range
<const char> from
{ __from
, __from_end
};
1441 range
<char16_t
> to
{ __to
, __to_end
};
1442 codecvt_mode mode
= codecvt_mode(_M_mode
& (consume_header
|generate_header
));
1443 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1444 mode
= codecvt_mode(mode
| little_endian
);
1446 auto res
= utf16_in(from
, to
, _M_maxcode
, mode
);
1447 __from_next
= from
.next
;
1448 __to_next
= to
.next
;
1453 __codecvt_utf8_utf16_base
<char16_t
>::do_encoding() const throw()
1454 { return 0; } // UTF-8 is not a fixed-width encoding
1457 __codecvt_utf8_utf16_base
<char16_t
>::do_always_noconv() const throw()
1461 __codecvt_utf8_utf16_base
<char16_t
>::
1462 do_length(state_type
&, const extern_type
* __from
,
1463 const extern_type
* __end
, size_t __max
) const
1465 __end
= utf16_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1466 return __end
- __from
;
1470 __codecvt_utf8_utf16_base
<char16_t
>::do_max_length() const throw()
1472 // A single character can be 1 or 2 UTF-16 code units,
1473 // requiring up to 4 UTF-8 code units.
1475 if (_M_mode
& consume_header
)
1476 max
+= sizeof(utf8_bom
);
1480 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1481 // Converts from UTF-8 to UTF-16.
1483 __codecvt_utf8_utf16_base
<char32_t
>::~__codecvt_utf8_utf16_base() { }
1485 codecvt_base::result
1486 __codecvt_utf8_utf16_base
<char32_t
>::
1487 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1488 const intern_type
*& __from_next
,
1489 extern_type
* __to
, extern_type
* __to_end
,
1490 extern_type
*& __to_next
) const
1492 range
<const char32_t
> from
{ __from
, __from_end
};
1493 range
<char> to
{ __to
, __to_end
};
1494 auto res
= utf16_out(from
, to
, _M_maxcode
, _M_mode
);
1495 __from_next
= from
.next
;
1496 __to_next
= to
.next
;
1500 codecvt_base::result
1501 __codecvt_utf8_utf16_base
<char32_t
>::
1502 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1503 extern_type
*& __to_next
) const
1509 codecvt_base::result
1510 __codecvt_utf8_utf16_base
<char32_t
>::
1511 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1512 const extern_type
*& __from_next
,
1513 intern_type
* __to
, intern_type
* __to_end
,
1514 intern_type
*& __to_next
) const
1516 range
<const char> from
{ __from
, __from_end
};
1517 range
<char32_t
> to
{ __to
, __to_end
};
1518 codecvt_mode mode
= codecvt_mode(_M_mode
& (consume_header
|generate_header
));
1519 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1520 mode
= codecvt_mode(mode
| little_endian
);
1522 auto res
= utf16_in(from
, to
, _M_maxcode
, mode
);
1523 __from_next
= from
.next
;
1524 __to_next
= to
.next
;
1529 __codecvt_utf8_utf16_base
<char32_t
>::do_encoding() const throw()
1530 { return 0; } // UTF-8 is not a fixed-width encoding
1533 __codecvt_utf8_utf16_base
<char32_t
>::do_always_noconv() const throw()
1537 __codecvt_utf8_utf16_base
<char32_t
>::
1538 do_length(state_type
&, const extern_type
* __from
,
1539 const extern_type
* __end
, size_t __max
) const
1541 __end
= utf16_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1542 return __end
- __from
;
1546 __codecvt_utf8_utf16_base
<char32_t
>::do_max_length() const throw()
1548 // A single character can be 1 or 2 UTF-16 code units,
1549 // requiring up to 4 UTF-8 code units.
1551 if (_M_mode
& consume_header
)
1552 max
+= sizeof(utf8_bom
);
1556 #ifdef _GLIBCXX_USE_WCHAR_T
1557 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1558 // Converts from UTF-8 to UTF-16.
1560 __codecvt_utf8_utf16_base
<wchar_t>::~__codecvt_utf8_utf16_base() { }
1562 codecvt_base::result
1563 __codecvt_utf8_utf16_base
<wchar_t>::
1564 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1565 const intern_type
*& __from_next
,
1566 extern_type
* __to
, extern_type
* __to_end
,
1567 extern_type
*& __to_next
) const
1569 range
<const wchar_t> from
{ __from
, __from_end
};
1570 range
<char> to
{ __to
, __to_end
};
1571 auto res
= utf16_out(from
, to
, _M_maxcode
, _M_mode
);
1572 __from_next
= from
.next
;
1573 __to_next
= to
.next
;
1577 codecvt_base::result
1578 __codecvt_utf8_utf16_base
<wchar_t>::
1579 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1580 extern_type
*& __to_next
) const
1586 codecvt_base::result
1587 __codecvt_utf8_utf16_base
<wchar_t>::
1588 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1589 const extern_type
*& __from_next
,
1590 intern_type
* __to
, intern_type
* __to_end
,
1591 intern_type
*& __to_next
) const
1593 range
<const char> from
{ __from
, __from_end
};
1594 range
<wchar_t> to
{ __to
, __to_end
};
1595 codecvt_mode mode
= codecvt_mode(_M_mode
& (consume_header
|generate_header
));
1596 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1597 mode
= codecvt_mode(mode
| little_endian
);
1599 auto res
= utf16_in(from
, to
, _M_maxcode
, mode
);
1600 __from_next
= from
.next
;
1601 __to_next
= to
.next
;
1606 __codecvt_utf8_utf16_base
<wchar_t>::do_encoding() const throw()
1607 { return 0; } // UTF-8 is not a fixed-width encoding
1610 __codecvt_utf8_utf16_base
<wchar_t>::do_always_noconv() const throw()
1614 __codecvt_utf8_utf16_base
<wchar_t>::
1615 do_length(state_type
&, const extern_type
* __from
,
1616 const extern_type
* __end
, size_t __max
) const
1618 __end
= utf16_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1619 return __end
- __from
;
1623 __codecvt_utf8_utf16_base
<wchar_t>::do_max_length() const throw()
1625 // A single character can be 1 or 2 UTF-16 code units,
1626 // requiring up to 4 UTF-8 code units.
1628 if (_M_mode
& consume_header
)
1629 max
+= sizeof(utf8_bom
);
1634 inline template class __codecvt_abstract_base
<char16_t
, char, mbstate_t>;
1635 inline template class __codecvt_abstract_base
<char32_t
, char, mbstate_t>;
1636 template class codecvt_byname
<char16_t
, char, mbstate_t>;
1637 template class codecvt_byname
<char32_t
, char, mbstate_t>;
1639 _GLIBCXX_END_NAMESPACE_VERSION