1 // Locale support (codecvt) -*- C++ -*-
3 // Copyright (C) 2015-2017 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 #include <cstring> // std::memcpy, std::memcmp
27 #include <bits/stl_algobase.h> // std::min
29 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
30 namespace std
_GLIBCXX_VISIBILITY(default)
32 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 // The standard doesn't define these operators, which is annoying.
35 static underlying_type
<codecvt_mode
>::type
36 to_integer(codecvt_mode m
)
37 { return static_cast<underlying_type
<codecvt_mode
>::type
>(m
); }
39 static codecvt_mode
& operator&=(codecvt_mode
& m
, codecvt_mode n
)
40 { return m
= codecvt_mode(to_integer(m
) & to_integer(n
)); }
42 static codecvt_mode
& operator|=(codecvt_mode
& m
, codecvt_mode n
)
43 { return m
= codecvt_mode(to_integer(m
) | to_integer(n
)); }
45 static codecvt_mode
operator~(codecvt_mode m
)
46 { return codecvt_mode(~to_integer(m
)); }
50 // Largest code point that fits in a single UTF-16 code unit.
51 const char32_t max_single_utf16_unit
= 0xFFFF;
53 const char32_t max_code_point
= 0x10FFFF;
55 // The functions below rely on maxcode < incomplete_mb_character
56 // (which is enforced by the codecvt_utf* classes on construction).
57 const char32_t incomplete_mb_character
= char32_t(-2);
58 const char32_t invalid_mb_sequence
= char32_t(-1);
60 // Utility type for reading and writing code units of type Elem from
61 // a range defined by a pair of pointers.
62 template<typename Elem
, bool Aligned
= true>
69 range
& operator=(Elem e
)
75 // Read the next code unit.
76 Elem
operator*() const { return *next
; }
78 // Read the Nth code unit.
79 Elem
operator[](size_t n
) const { return next
[n
]; }
81 // Move to the next code unit.
88 // Move to the Nth code unit.
89 range
& operator+=(size_t n
)
95 // The number of code units remaining.
96 size_t size() const { return end
- next
; }
98 // The number of bytes remaining.
99 size_t nbytes() const { return (const char*)end
- (const char*)next
; }
102 // This specialization is used when accessing char16_t values through
103 // pointers to char, which might not be correctly aligned for char16_t.
104 template<typename Elem
>
105 struct range
<Elem
, false>
107 using value_type
= typename remove_const
<Elem
>::type
;
109 using char_pointer
= typename
110 conditional
<is_const
<Elem
>::value
, const char*, char*>::type
;
115 // Write a code unit.
116 range
& operator=(Elem e
)
118 memcpy(next
, &e
, sizeof(Elem
));
123 // Read the next code unit.
124 Elem
operator*() const
127 memcpy(&e
, next
, sizeof(Elem
));
131 // Read the Nth code unit.
132 Elem
operator[](size_t n
) const
135 memcpy(&e
, next
+ n
* sizeof(Elem
), sizeof(Elem
));
139 // Move to the next code unit.
142 next
+= sizeof(Elem
);
146 // Move to the Nth code unit.
147 range
& operator+=(size_t n
)
149 next
+= n
* sizeof(Elem
);
153 // The number of code units remaining.
154 size_t size() const { return nbytes() / sizeof(Elem
); }
156 // The number of bytes remaining.
157 size_t nbytes() const { return end
- next
; }
160 // Multibyte sequences can have "header" consisting of Byte Order Mark
161 const unsigned char utf8_bom
[3] = { 0xEF, 0xBB, 0xBF };
162 const unsigned char utf16_bom
[2] = { 0xFE, 0xFF };
163 const unsigned char utf16le_bom
[2] = { 0xFF, 0xFE };
165 // Write a BOM (space permitting).
166 template<typename C
, bool A
, size_t N
>
168 write_bom(range
<C
, A
>& to
, const unsigned char (&bom
)[N
])
170 static_assert( (N
/ sizeof(C
)) != 0, "" );
171 static_assert( (N
% sizeof(C
)) == 0, "" );
175 memcpy(to
.next
, bom
, N
);
176 to
+= (N
/ sizeof(C
));
180 // Try to read a BOM.
181 template<typename C
, bool A
, size_t N
>
183 read_bom(range
<C
, A
>& from
, const unsigned char (&bom
)[N
])
185 static_assert( (N
/ sizeof(C
)) != 0, "" );
186 static_assert( (N
% sizeof(C
)) == 0, "" );
188 if (from
.nbytes() >= N
&& !memcmp(from
.next
, bom
, N
))
190 from
+= (N
/ sizeof(C
));
196 // If generate_header is set in mode write out UTF-8 BOM.
198 write_utf8_bom(range
<char>& to
, codecvt_mode mode
)
200 if (mode
& generate_header
)
201 return write_bom(to
, utf8_bom
);
205 // If generate_header is set in mode write out the UTF-16 BOM indicated
206 // by whether little_endian is set in mode.
207 template<bool Aligned
>
209 write_utf16_bom(range
<char16_t
, Aligned
>& to
, codecvt_mode mode
)
211 if (mode
& generate_header
)
213 if (mode
& little_endian
)
214 return write_bom(to
, utf16le_bom
);
216 return write_bom(to
, utf16_bom
);
221 // If consume_header is set in mode update from.next to after any BOM.
223 read_utf8_bom(range
<const char>& from
, codecvt_mode mode
)
225 if (mode
& consume_header
)
226 read_bom(from
, utf8_bom
);
229 // If consume_header is not set in mode, no effects.
230 // Otherwise, if *from.next is a UTF-16 BOM increment from.next and then:
231 // - if the UTF-16BE BOM was found unset little_endian in mode, or
232 // - if the UTF-16LE BOM was found set little_endian in mode.
233 template<bool Aligned
>
235 read_utf16_bom(range
<const char16_t
, Aligned
>& from
, codecvt_mode
& mode
)
237 if (mode
& consume_header
)
239 if (read_bom(from
, utf16_bom
))
240 mode
&= ~little_endian
;
241 else if (read_bom(from
, utf16le_bom
))
242 mode
|= little_endian
;
246 // Read a codepoint from a UTF-8 multibyte sequence.
247 // Updates from.next if the codepoint is not greater than maxcode.
248 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
250 read_utf8_code_point(range
<const char>& from
, unsigned long maxcode
)
252 const size_t avail
= from
.size();
254 return incomplete_mb_character
;
255 unsigned char c1
= from
[0];
256 // https://en.wikipedia.org/wiki/UTF-8#Sample_code
262 else if (c1
< 0xC2) // continuation or overlong 2-byte sequence
263 return invalid_mb_sequence
;
264 else if (c1
< 0xE0) // 2-byte sequence
267 return incomplete_mb_character
;
268 unsigned char c2
= from
[1];
269 if ((c2
& 0xC0) != 0x80)
270 return invalid_mb_sequence
;
271 char32_t c
= (c1
<< 6) + c2
- 0x3080;
276 else if (c1
< 0xF0) // 3-byte sequence
279 return incomplete_mb_character
;
280 unsigned char c2
= from
[1];
281 if ((c2
& 0xC0) != 0x80)
282 return invalid_mb_sequence
;
283 if (c1
== 0xE0 && c2
< 0xA0) // overlong
284 return invalid_mb_sequence
;
285 unsigned char c3
= from
[2];
286 if ((c3
& 0xC0) != 0x80)
287 return invalid_mb_sequence
;
288 char32_t c
= (c1
<< 12) + (c2
<< 6) + c3
- 0xE2080;
293 else if (c1
< 0xF5) // 4-byte sequence
296 return incomplete_mb_character
;
297 unsigned char c2
= from
[1];
298 if ((c2
& 0xC0) != 0x80)
299 return invalid_mb_sequence
;
300 if (c1
== 0xF0 && c2
< 0x90) // overlong
301 return invalid_mb_sequence
;
302 if (c1
== 0xF4 && c2
>= 0x90) // > U+10FFFF
303 return invalid_mb_sequence
;
304 unsigned char c3
= from
[2];
305 if ((c3
& 0xC0) != 0x80)
306 return invalid_mb_sequence
;
307 unsigned char c4
= from
[3];
308 if ((c4
& 0xC0) != 0x80)
309 return invalid_mb_sequence
;
310 char32_t c
= (c1
<< 18) + (c2
<< 12) + (c3
<< 6) + c4
- 0x3C82080;
316 return invalid_mb_sequence
;
320 write_utf8_code_point(range
<char>& to
, char32_t code_point
)
322 if (code_point
< 0x80)
328 else if (code_point
<= 0x7FF)
332 to
= (code_point
>> 6) + 0xC0;
333 to
= (code_point
& 0x3F) + 0x80;
335 else if (code_point
<= 0xFFFF)
339 to
= (code_point
>> 12) + 0xE0;
340 to
= ((code_point
>> 6) & 0x3F) + 0x80;
341 to
= (code_point
& 0x3F) + 0x80;
343 else if (code_point
<= 0x10FFFF)
347 to
= (code_point
>> 18) + 0xF0;
348 to
= ((code_point
>> 12) & 0x3F) + 0x80;
349 to
= ((code_point
>> 6) & 0x3F) + 0x80;
350 to
= (code_point
& 0x3F) + 0x80;
358 adjust_byte_order(char16_t c
, codecvt_mode mode
)
360 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
361 return (mode
& little_endian
) ? __builtin_bswap16(c
) : c
;
363 return (mode
& little_endian
) ? c
: __builtin_bswap16(c
);
367 // Return true if c is a high-surrogate (aka leading) code point.
369 is_high_surrogate(char32_t c
)
371 return c
>= 0xD800 && c
<= 0xDBFF;
374 // Return true if c is a low-surrogate (aka trailing) code point.
376 is_low_surrogate(char32_t c
)
378 return c
>= 0xDC00 && c
<= 0xDFFF;
382 surrogate_pair_to_code_point(char32_t high
, char32_t low
)
384 return (high
<< 10) + low
- 0x35FDC00;
387 // Read a codepoint from a UTF-16 multibyte sequence.
388 // The sequence's endianness is indicated by (mode & little_endian).
389 // Updates from.next if the codepoint is not greater than maxcode.
390 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
391 template<bool Aligned
>
393 read_utf16_code_point(range
<const char16_t
, Aligned
>& from
,
394 unsigned long maxcode
, codecvt_mode mode
)
396 const size_t avail
= from
.size();
398 return incomplete_mb_character
;
400 char32_t c
= adjust_byte_order(from
[0], mode
);
401 if (is_high_surrogate(c
))
404 return incomplete_mb_character
;
405 const char16_t c2
= adjust_byte_order(from
[1], mode
);
406 if (is_low_surrogate(c2
))
408 c
= surrogate_pair_to_code_point(c
, c2
);
412 return invalid_mb_sequence
;
414 else if (is_low_surrogate(c
))
415 return invalid_mb_sequence
;
421 template<typename C
, bool A
>
423 write_utf16_code_point(range
<C
, A
>& to
, char32_t codepoint
, codecvt_mode mode
)
425 static_assert(sizeof(C
) >= 2, "a code unit must be at least 16-bit");
427 if (codepoint
<= max_single_utf16_unit
)
431 to
= adjust_byte_order(codepoint
, mode
);
435 else if (to
.size() > 1)
437 // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
438 const char32_t LEAD_OFFSET
= 0xD800 - (0x10000 >> 10);
439 char16_t lead
= LEAD_OFFSET
+ (codepoint
>> 10);
440 char16_t trail
= 0xDC00 + (codepoint
& 0x3FF);
441 to
= adjust_byte_order(lead
, mode
);
442 to
= adjust_byte_order(trail
, mode
);
450 ucs4_in(range
<const char>& from
, range
<char32_t
>& to
,
451 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {})
453 read_utf8_bom(from
, mode
);
454 while (from
.size() && to
.size())
456 const char32_t codepoint
= read_utf8_code_point(from
, maxcode
);
457 if (codepoint
== incomplete_mb_character
)
458 return codecvt_base::partial
;
459 if (codepoint
> maxcode
)
460 return codecvt_base::error
;
463 return from
.size() ? codecvt_base::partial
: codecvt_base::ok
;
468 ucs4_out(range
<const char32_t
>& from
, range
<char>& to
,
469 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {})
471 if (!write_utf8_bom(to
, mode
))
472 return codecvt_base::partial
;
475 const char32_t c
= from
[0];
477 return codecvt_base::error
;
478 if (!write_utf8_code_point(to
, c
))
479 return codecvt_base::partial
;
482 return codecvt_base::ok
;
487 ucs4_in(range
<const char16_t
, false>& from
, range
<char32_t
>& to
,
488 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {})
490 read_utf16_bom(from
, mode
);
491 while (from
.size() && to
.size())
493 const char32_t codepoint
= read_utf16_code_point(from
, maxcode
, mode
);
494 if (codepoint
== incomplete_mb_character
)
495 return codecvt_base::partial
;
496 if (codepoint
> maxcode
)
497 return codecvt_base::error
;
500 return from
.size() ? codecvt_base::partial
: codecvt_base::ok
;
505 ucs4_out(range
<const char32_t
>& from
, range
<char16_t
, false>& to
,
506 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {})
508 if (!write_utf16_bom(to
, mode
))
509 return codecvt_base::partial
;
512 const char32_t c
= from
[0];
514 return codecvt_base::error
;
515 if (!write_utf16_code_point(to
, c
, mode
))
516 return codecvt_base::partial
;
519 return codecvt_base::ok
;
522 // Flag indicating whether to process UTF-16 or UCS2
523 enum class surrogates
{ allowed
, disallowed
};
525 // utf8 -> utf16 (or utf8 -> ucs2 if s == surrogates::disallowed)
528 utf16_in(range
<const char>& from
, range
<C
>& to
,
529 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {},
530 surrogates s
= surrogates::allowed
)
532 read_utf8_bom(from
, mode
);
533 while (from
.size() && to
.size())
536 const char32_t codepoint
= read_utf8_code_point(from
, maxcode
);
537 if (codepoint
== incomplete_mb_character
)
539 if (s
== surrogates::allowed
)
540 return codecvt_base::partial
;
542 return codecvt_base::error
; // No surrogates in UCS2
544 if (codepoint
> maxcode
)
545 return codecvt_base::error
;
546 if (!write_utf16_code_point(to
, codepoint
, mode
))
548 from
= orig
; // rewind to previous position
549 return codecvt_base::partial
;
552 return codecvt_base::ok
;
555 // utf16 -> utf8 (or ucs2 -> utf8 if s == surrogates::disallowed)
558 utf16_out(range
<const C
>& from
, range
<char>& to
,
559 unsigned long maxcode
= max_code_point
, codecvt_mode mode
= {},
560 surrogates s
= surrogates::allowed
)
562 if (!write_utf8_bom(to
, mode
))
563 return codecvt_base::partial
;
566 char32_t c
= from
[0];
568 if (is_high_surrogate(c
))
570 if (s
== surrogates::disallowed
)
571 return codecvt_base::error
; // No surrogates in UCS-2
574 return codecvt_base::ok
; // stop converting at this point
576 const char32_t c2
= from
[1];
577 if (is_low_surrogate(c2
))
579 c
= surrogate_pair_to_code_point(c
, c2
);
583 return codecvt_base::error
;
585 else if (is_low_surrogate(c
))
586 return codecvt_base::error
;
588 return codecvt_base::error
;
589 if (!write_utf8_code_point(to
, c
))
590 return codecvt_base::partial
;
593 return codecvt_base::ok
;
596 // return pos such that [begin,pos) is valid UTF-16 string no longer than max
598 utf16_span(const char* begin
, const char* end
, size_t max
,
599 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
601 range
<const char> from
{ begin
, end
};
602 read_utf8_bom(from
, mode
);
604 while (count
+1 < max
)
606 char32_t c
= read_utf8_code_point(from
, maxcode
);
609 else if (c
> max_single_utf16_unit
)
613 if (count
+1 == max
) // take one more character if it fits in a single unit
614 read_utf8_code_point(from
, std::min(max_single_utf16_unit
, maxcode
));
620 ucs2_in(range
<const char>& from
, range
<char16_t
>& to
,
621 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
623 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
624 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
625 return utf16_in(from
, to
, maxcode
, mode
, surrogates::disallowed
);
630 ucs2_out(range
<const char16_t
>& from
, range
<char>& to
,
631 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
633 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
634 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
635 return utf16_out(from
, to
, maxcode
, mode
, surrogates::disallowed
);
640 ucs2_out(range
<const char16_t
>& from
, range
<char16_t
, false>& to
,
641 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
643 if (!write_utf16_bom(to
, mode
))
644 return codecvt_base::partial
;
645 while (from
.size() && to
.size())
647 char16_t c
= from
[0];
648 if (is_high_surrogate(c
))
649 return codecvt_base::error
;
651 return codecvt_base::error
;
652 to
= adjust_byte_order(c
, mode
);
655 return from
.size() == 0 ? codecvt_base::ok
: codecvt_base::partial
;
660 ucs2_in(range
<const char16_t
, false>& from
, range
<char16_t
>& to
,
661 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
663 read_utf16_bom(from
, mode
);
664 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
665 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
666 while (from
.size() && to
.size())
668 const char32_t c
= read_utf16_code_point(from
, maxcode
, mode
);
669 if (c
== incomplete_mb_character
)
670 return codecvt_base::error
; // UCS-2 only supports single units.
672 return codecvt_base::error
;
675 return from
.size() == 0 ? codecvt_base::ok
: codecvt_base::partial
;
679 ucs2_span(range
<const char16_t
, false>& from
, size_t max
,
680 char32_t maxcode
, codecvt_mode mode
)
682 read_utf16_bom(from
, mode
);
683 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
684 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
686 while (max
-- && c
<= maxcode
)
687 c
= read_utf16_code_point(from
, maxcode
, mode
);
688 return reinterpret_cast<const char16_t
*>(from
.next
);
692 ucs2_span(const char* begin
, const char* end
, size_t max
,
693 char32_t maxcode
, codecvt_mode mode
)
695 range
<const char> from
{ begin
, end
};
696 read_utf8_bom(from
, mode
);
697 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
698 maxcode
= std::min(max_single_utf16_unit
, maxcode
);
700 while (max
-- && c
<= maxcode
)
701 c
= read_utf8_code_point(from
, maxcode
);
705 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
707 ucs4_span(const char* begin
, const char* end
, size_t max
,
708 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
710 range
<const char> from
{ begin
, end
};
711 read_utf8_bom(from
, mode
);
713 while (max
-- && c
<= maxcode
)
714 c
= read_utf8_code_point(from
, maxcode
);
718 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
720 ucs4_span(range
<const char16_t
, false>& from
, size_t max
,
721 char32_t maxcode
= max_code_point
, codecvt_mode mode
= {})
723 read_utf16_bom(from
, mode
);
725 while (max
-- && c
<= maxcode
)
726 c
= read_utf16_code_point(from
, maxcode
, mode
);
727 return reinterpret_cast<const char16_t
*>(from
.next
);
731 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
732 // Converts from UTF-8 to UTF-16.
734 locale::id codecvt
<char16_t
, char, mbstate_t>::id
;
736 codecvt
<char16_t
, char, mbstate_t>::~codecvt() { }
739 codecvt
<char16_t
, char, mbstate_t>::
741 const intern_type
* __from
,
742 const intern_type
* __from_end
, const intern_type
*& __from_next
,
743 extern_type
* __to
, extern_type
* __to_end
,
744 extern_type
*& __to_next
) const
746 range
<const char16_t
> from
{ __from
, __from_end
};
747 range
<char> to
{ __to
, __to_end
};
748 auto res
= utf16_out(from
, to
);
749 __from_next
= from
.next
;
755 codecvt
<char16_t
, char, mbstate_t>::
756 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
757 extern_type
*& __to_next
) const
760 return noconv
; // we don't use mbstate_t for the unicode facets
764 codecvt
<char16_t
, char, mbstate_t>::
765 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
766 const extern_type
*& __from_next
,
767 intern_type
* __to
, intern_type
* __to_end
,
768 intern_type
*& __to_next
) const
770 range
<const char> from
{ __from
, __from_end
};
771 range
<char16_t
> to
{ __to
, __to_end
};
772 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
773 codecvt_mode mode
= {};
775 codecvt_mode mode
= little_endian
;
777 auto res
= utf16_in(from
, to
, max_code_point
, mode
);
778 __from_next
= from
.next
;
784 codecvt
<char16_t
, char, mbstate_t>::do_encoding() const throw()
785 { return 0; } // UTF-8 is not a fixed-width encoding
788 codecvt
<char16_t
, char, mbstate_t>::do_always_noconv() const throw()
792 codecvt
<char16_t
, char, mbstate_t>::
793 do_length(state_type
&, const extern_type
* __from
,
794 const extern_type
* __end
, size_t __max
) const
796 __end
= utf16_span(__from
, __end
, __max
);
797 return __end
- __from
;
801 codecvt
<char16_t
, char, mbstate_t>::do_max_length() const throw()
803 // A single character (one or two UTF-16 code units) requires
804 // up to four UTF-8 code units.
808 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
809 // Converts from UTF-8 to UTF-32 (aka UCS-4).
811 locale::id codecvt
<char32_t
, char, mbstate_t>::id
;
813 codecvt
<char32_t
, char, mbstate_t>::~codecvt() { }
816 codecvt
<char32_t
, char, mbstate_t>::
817 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
818 const intern_type
*& __from_next
,
819 extern_type
* __to
, extern_type
* __to_end
,
820 extern_type
*& __to_next
) const
822 range
<const char32_t
> from
{ __from
, __from_end
};
823 range
<char> to
{ __to
, __to_end
};
824 auto res
= ucs4_out(from
, to
);
825 __from_next
= from
.next
;
831 codecvt
<char32_t
, char, mbstate_t>::
832 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
833 extern_type
*& __to_next
) const
840 codecvt
<char32_t
, char, mbstate_t>::
841 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
842 const extern_type
*& __from_next
,
843 intern_type
* __to
, intern_type
* __to_end
,
844 intern_type
*& __to_next
) const
846 range
<const char> from
{ __from
, __from_end
};
847 range
<char32_t
> to
{ __to
, __to_end
};
848 auto res
= ucs4_in(from
, to
);
849 __from_next
= from
.next
;
855 codecvt
<char32_t
, char, mbstate_t>::do_encoding() const throw()
856 { return 0; } // UTF-8 is not a fixed-width encoding
859 codecvt
<char32_t
, char, mbstate_t>::do_always_noconv() const throw()
863 codecvt
<char32_t
, char, mbstate_t>::
864 do_length(state_type
&, const extern_type
* __from
,
865 const extern_type
* __end
, size_t __max
) const
867 __end
= ucs4_span(__from
, __end
, __max
);
868 return __end
- __from
;
872 codecvt
<char32_t
, char, mbstate_t>::do_max_length() const throw()
874 // A single character (one UTF-32 code unit) requires
875 // up to 4 UTF-8 code units.
879 // Define members of codecvt_utf8<char16_t> base class implementation.
880 // Converts from UTF-8 to UCS-2.
882 __codecvt_utf8_base
<char16_t
>::~__codecvt_utf8_base() { }
885 __codecvt_utf8_base
<char16_t
>::
886 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
887 const intern_type
*& __from_next
,
888 extern_type
* __to
, extern_type
* __to_end
,
889 extern_type
*& __to_next
) const
891 range
<const char16_t
> from
{ __from
, __from_end
};
892 range
<char> to
{ __to
, __to_end
};
893 auto res
= ucs2_out(from
, to
, _M_maxcode
, _M_mode
);
894 __from_next
= from
.next
;
900 __codecvt_utf8_base
<char16_t
>::
901 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
902 extern_type
*& __to_next
) const
909 __codecvt_utf8_base
<char16_t
>::
910 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
911 const extern_type
*& __from_next
,
912 intern_type
* __to
, intern_type
* __to_end
,
913 intern_type
*& __to_next
) const
915 range
<const char> from
{ __from
, __from_end
};
916 range
<char16_t
> to
{ __to
, __to_end
};
917 codecvt_mode mode
= codecvt_mode(_M_mode
& (consume_header
|generate_header
));
918 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
919 mode
= codecvt_mode(mode
| little_endian
);
921 auto res
= ucs2_in(from
, to
, _M_maxcode
, mode
);
922 __from_next
= from
.next
;
928 __codecvt_utf8_base
<char16_t
>::do_encoding() const throw()
929 { return 0; } // UTF-8 is not a fixed-width encoding
932 __codecvt_utf8_base
<char16_t
>::do_always_noconv() const throw()
936 __codecvt_utf8_base
<char16_t
>::
937 do_length(state_type
&, const extern_type
* __from
,
938 const extern_type
* __end
, size_t __max
) const
940 __end
= ucs2_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
941 return __end
- __from
;
945 __codecvt_utf8_base
<char16_t
>::do_max_length() const throw()
947 // A single UCS-2 character requires up to three UTF-8 code units.
948 // (UCS-2 cannot represent characters that use four UTF-8 code units).
950 if (_M_mode
& consume_header
)
951 max
+= sizeof(utf8_bom
);
955 // Define members of codecvt_utf8<char32_t> base class implementation.
956 // Converts from UTF-8 to UTF-32 (aka UCS-4).
958 __codecvt_utf8_base
<char32_t
>::~__codecvt_utf8_base() { }
961 __codecvt_utf8_base
<char32_t
>::
962 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
963 const intern_type
*& __from_next
,
964 extern_type
* __to
, extern_type
* __to_end
,
965 extern_type
*& __to_next
) const
967 range
<const char32_t
> from
{ __from
, __from_end
};
968 range
<char> to
{ __to
, __to_end
};
969 auto res
= ucs4_out(from
, to
, _M_maxcode
, _M_mode
);
970 __from_next
= from
.next
;
976 __codecvt_utf8_base
<char32_t
>::
977 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
978 extern_type
*& __to_next
) const
985 __codecvt_utf8_base
<char32_t
>::
986 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
987 const extern_type
*& __from_next
,
988 intern_type
* __to
, intern_type
* __to_end
,
989 intern_type
*& __to_next
) const
991 range
<const char> from
{ __from
, __from_end
};
992 range
<char32_t
> to
{ __to
, __to_end
};
993 auto res
= ucs4_in(from
, to
, _M_maxcode
, _M_mode
);
994 __from_next
= from
.next
;
1000 __codecvt_utf8_base
<char32_t
>::do_encoding() const throw()
1001 { return 0; } // UTF-8 is not a fixed-width encoding
1004 __codecvt_utf8_base
<char32_t
>::do_always_noconv() const throw()
1008 __codecvt_utf8_base
<char32_t
>::
1009 do_length(state_type
&, const extern_type
* __from
,
1010 const extern_type
* __end
, size_t __max
) const
1012 __end
= ucs4_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1013 return __end
- __from
;
1017 __codecvt_utf8_base
<char32_t
>::do_max_length() const throw()
1019 // A single UCS-4 character requires up to four UTF-8 code units.
1021 if (_M_mode
& consume_header
)
1022 max
+= sizeof(utf8_bom
);
1026 #ifdef _GLIBCXX_USE_WCHAR_T
1028 #if __SIZEOF_WCHAR_T__ == 2
1029 static_assert(sizeof(wchar_t) == sizeof(char16_t
), "");
1030 #elif __SIZEOF_WCHAR_T__ == 4
1031 static_assert(sizeof(wchar_t) == sizeof(char32_t
), "");
1034 // Define members of codecvt_utf8<wchar_t> base class implementation.
1035 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1037 __codecvt_utf8_base
<wchar_t>::~__codecvt_utf8_base() { }
1039 codecvt_base::result
1040 __codecvt_utf8_base
<wchar_t>::
1041 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1042 const intern_type
*& __from_next
,
1043 extern_type
* __to
, extern_type
* __to_end
,
1044 extern_type
*& __to_next
) const
1046 range
<char> to
{ __to
, __to_end
};
1047 #if __SIZEOF_WCHAR_T__ == 2
1048 range
<const char16_t
> from
{
1049 reinterpret_cast<const char16_t
*>(__from
),
1050 reinterpret_cast<const char16_t
*>(__from_end
)
1052 auto res
= ucs2_out(from
, to
, _M_maxcode
, _M_mode
);
1053 #elif __SIZEOF_WCHAR_T__ == 4
1054 range
<const char32_t
> from
{
1055 reinterpret_cast<const char32_t
*>(__from
),
1056 reinterpret_cast<const char32_t
*>(__from_end
)
1058 auto res
= ucs4_out(from
, to
, _M_maxcode
, _M_mode
);
1060 return codecvt_base::error
;
1062 __from_next
= reinterpret_cast<const wchar_t*>(from
.next
);
1063 __to_next
= to
.next
;
1067 codecvt_base::result
1068 __codecvt_utf8_base
<wchar_t>::
1069 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1070 extern_type
*& __to_next
) const
1076 codecvt_base::result
1077 __codecvt_utf8_base
<wchar_t>::
1078 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1079 const extern_type
*& __from_next
,
1080 intern_type
* __to
, intern_type
* __to_end
,
1081 intern_type
*& __to_next
) const
1083 range
<const char> from
{ __from
, __from_end
};
1084 #if __SIZEOF_WCHAR_T__ == 2
1086 reinterpret_cast<char16_t
*>(__to
),
1087 reinterpret_cast<char16_t
*>(__to_end
)
1089 auto res
= ucs2_in(from
, to
, _M_maxcode
, _M_mode
);
1090 #elif __SIZEOF_WCHAR_T__ == 4
1092 reinterpret_cast<char32_t
*>(__to
),
1093 reinterpret_cast<char32_t
*>(__to_end
)
1095 auto res
= ucs4_in(from
, to
, _M_maxcode
, _M_mode
);
1097 return codecvt_base::error
;
1099 __from_next
= from
.next
;
1100 __to_next
= reinterpret_cast<wchar_t*>(to
.next
);
1105 __codecvt_utf8_base
<wchar_t>::do_encoding() const throw()
1106 { return 0; } // UTF-8 is not a fixed-width encoding
1109 __codecvt_utf8_base
<wchar_t>::do_always_noconv() const throw()
1113 __codecvt_utf8_base
<wchar_t>::
1114 do_length(state_type
&, const extern_type
* __from
,
1115 const extern_type
* __end
, size_t __max
) const
1117 #if __SIZEOF_WCHAR_T__ == 2
1118 __end
= ucs2_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1119 #elif __SIZEOF_WCHAR_T__ == 4
1120 __end
= ucs4_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1124 return __end
- __from
;
1128 __codecvt_utf8_base
<wchar_t>::do_max_length() const throw()
1130 #if __SIZEOF_WCHAR_T__ == 2
1131 int max
= 3; // See __codecvt_utf8_base<char16_t>::do_max_length()
1133 int max
= 4; // See __codecvt_utf8_base<char32_t>::do_max_length()
1135 if (_M_mode
& consume_header
)
1136 max
+= sizeof(utf8_bom
);
1141 // Define members of codecvt_utf16<char16_t> base class implementation.
1142 // Converts from UTF-16 to UCS-2.
1144 __codecvt_utf16_base
<char16_t
>::~__codecvt_utf16_base() { }
1146 codecvt_base::result
1147 __codecvt_utf16_base
<char16_t
>::
1148 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1149 const intern_type
*& __from_next
,
1150 extern_type
* __to
, extern_type
* __to_end
,
1151 extern_type
*& __to_next
) const
1153 range
<const char16_t
> from
{ __from
, __from_end
};
1154 range
<char16_t
, false> to
{ __to
, __to_end
};
1155 auto res
= ucs2_out(from
, to
, _M_maxcode
, _M_mode
);
1156 __from_next
= from
.next
;
1157 __to_next
= reinterpret_cast<char*>(to
.next
);
1161 codecvt_base::result
1162 __codecvt_utf16_base
<char16_t
>::
1163 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1164 extern_type
*& __to_next
) const
1170 codecvt_base::result
1171 __codecvt_utf16_base
<char16_t
>::
1172 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1173 const extern_type
*& __from_next
,
1174 intern_type
* __to
, intern_type
* __to_end
,
1175 intern_type
*& __to_next
) const
1177 range
<const char16_t
, false> from
{ __from
, __from_end
};
1178 range
<char16_t
> to
{ __to
, __to_end
};
1179 auto res
= ucs2_in(from
, to
, _M_maxcode
, _M_mode
);
1180 __from_next
= reinterpret_cast<const char*>(from
.next
);
1181 __to_next
= to
.next
;
1182 if (res
== codecvt_base::ok
&& __from_next
!= __from_end
)
1183 res
= codecvt_base::error
;
1188 __codecvt_utf16_base
<char16_t
>::do_encoding() const throw()
1189 { return 0; } // UTF-16 is not a fixed-width encoding
1192 __codecvt_utf16_base
<char16_t
>::do_always_noconv() const throw()
1196 __codecvt_utf16_base
<char16_t
>::
1197 do_length(state_type
&, const extern_type
* __from
,
1198 const extern_type
* __end
, size_t __max
) const
1200 range
<const char16_t
, false> from
{ __from
, __end
};
1201 const char16_t
* next
= ucs2_span(from
, __max
, _M_maxcode
, _M_mode
);
1202 return reinterpret_cast<const char*>(next
) - __from
;
1206 __codecvt_utf16_base
<char16_t
>::do_max_length() const throw()
1208 // A single UCS-2 character requires one UTF-16 code unit (so two chars).
1209 // (UCS-2 cannot represent characters that use multiple UTF-16 code units).
1211 if (_M_mode
& consume_header
)
1212 max
+= sizeof(utf16_bom
);
1216 // Define members of codecvt_utf16<char32_t> base class implementation.
1217 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1219 __codecvt_utf16_base
<char32_t
>::~__codecvt_utf16_base() { }
1221 codecvt_base::result
1222 __codecvt_utf16_base
<char32_t
>::
1223 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1224 const intern_type
*& __from_next
,
1225 extern_type
* __to
, extern_type
* __to_end
,
1226 extern_type
*& __to_next
) const
1228 range
<const char32_t
> from
{ __from
, __from_end
};
1229 range
<char16_t
, false> to
{ __to
, __to_end
};
1230 auto res
= ucs4_out(from
, to
, _M_maxcode
, _M_mode
);
1231 __from_next
= from
.next
;
1232 __to_next
= reinterpret_cast<char*>(to
.next
);
1236 codecvt_base::result
1237 __codecvt_utf16_base
<char32_t
>::
1238 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1239 extern_type
*& __to_next
) const
1245 codecvt_base::result
1246 __codecvt_utf16_base
<char32_t
>::
1247 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1248 const extern_type
*& __from_next
,
1249 intern_type
* __to
, intern_type
* __to_end
,
1250 intern_type
*& __to_next
) const
1252 range
<const char16_t
, false> from
{ __from
, __from_end
};
1253 range
<char32_t
> to
{ __to
, __to_end
};
1254 auto res
= ucs4_in(from
, to
, _M_maxcode
, _M_mode
);
1255 __from_next
= reinterpret_cast<const char*>(from
.next
);
1256 __to_next
= to
.next
;
1257 if (res
== codecvt_base::ok
&& __from_next
!= __from_end
)
1258 res
= codecvt_base::error
;
1263 __codecvt_utf16_base
<char32_t
>::do_encoding() const throw()
1264 { return 0; } // UTF-16 is not a fixed-width encoding
1267 __codecvt_utf16_base
<char32_t
>::do_always_noconv() const throw()
1271 __codecvt_utf16_base
<char32_t
>::
1272 do_length(state_type
&, const extern_type
* __from
,
1273 const extern_type
* __end
, size_t __max
) const
1275 range
<const char16_t
, false> from
{ __from
, __end
};
1276 const char16_t
* next
= ucs4_span(from
, __max
, _M_maxcode
, _M_mode
);
1277 return reinterpret_cast<const char*>(next
) - __from
;
1281 __codecvt_utf16_base
<char32_t
>::do_max_length() const throw()
1283 // A single UCS-4 character requires one or two UTF-16 code units
1284 // (so up to four chars).
1286 if (_M_mode
& consume_header
)
1287 max
+= sizeof(utf16_bom
);
1291 #ifdef _GLIBCXX_USE_WCHAR_T
1292 // Define members of codecvt_utf16<wchar_t> base class implementation.
1293 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1295 __codecvt_utf16_base
<wchar_t>::~__codecvt_utf16_base() { }
1297 codecvt_base::result
1298 __codecvt_utf16_base
<wchar_t>::
1299 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1300 const intern_type
*& __from_next
,
1301 extern_type
* __to
, extern_type
* __to_end
,
1302 extern_type
*& __to_next
) const
1304 range
<char16_t
, false> to
{ __to
, __to_end
};
1305 #if __SIZEOF_WCHAR_T__ == 2
1306 range
<const char16_t
> from
{
1307 reinterpret_cast<const char16_t
*>(__from
),
1308 reinterpret_cast<const char16_t
*>(__from_end
),
1310 auto res
= ucs2_out(from
, to
, _M_maxcode
, _M_mode
);
1311 #elif __SIZEOF_WCHAR_T__ == 4
1312 range
<const char32_t
> from
{
1313 reinterpret_cast<const char32_t
*>(__from
),
1314 reinterpret_cast<const char32_t
*>(__from_end
),
1316 auto res
= ucs4_out(from
, to
, _M_maxcode
, _M_mode
);
1318 return codecvt_base::error
;
1320 __from_next
= reinterpret_cast<const wchar_t*>(from
.next
);
1321 __to_next
= reinterpret_cast<char*>(to
.next
);
1325 codecvt_base::result
1326 __codecvt_utf16_base
<wchar_t>::
1327 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1328 extern_type
*& __to_next
) const
1334 codecvt_base::result
1335 __codecvt_utf16_base
<wchar_t>::
1336 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1337 const extern_type
*& __from_next
,
1338 intern_type
* __to
, intern_type
* __to_end
,
1339 intern_type
*& __to_next
) const
1341 range
<const char16_t
, false> from
{ __from
, __from_end
};
1342 #if __SIZEOF_WCHAR_T__ == 2
1344 reinterpret_cast<char16_t
*>(__to
),
1345 reinterpret_cast<char16_t
*>(__to_end
),
1347 auto res
= ucs2_in(from
, to
, _M_maxcode
, _M_mode
);
1348 #elif __SIZEOF_WCHAR_T__ == 4
1350 reinterpret_cast<char32_t
*>(__to
),
1351 reinterpret_cast<char32_t
*>(__to_end
),
1353 auto res
= ucs4_in(from
, to
, _M_maxcode
, _M_mode
);
1355 return codecvt_base::error
;
1357 __from_next
= reinterpret_cast<const char*>(from
.next
);
1358 __to_next
= reinterpret_cast<wchar_t*>(to
.next
);
1359 if (res
== codecvt_base::ok
&& __from_next
!= __from_end
)
1360 res
= codecvt_base::error
;
1365 __codecvt_utf16_base
<wchar_t>::do_encoding() const throw()
1366 { return 0; } // UTF-16 is not a fixed-width encoding
1369 __codecvt_utf16_base
<wchar_t>::do_always_noconv() const throw()
1373 __codecvt_utf16_base
<wchar_t>::
1374 do_length(state_type
&, const extern_type
* __from
,
1375 const extern_type
* __end
, size_t __max
) const
1377 range
<const char16_t
, false> from
{ __from
, __end
};
1378 #if __SIZEOF_WCHAR_T__ == 2
1379 const char16_t
* next
= ucs2_span(from
, __max
, _M_maxcode
, _M_mode
);
1380 #elif __SIZEOF_WCHAR_T__ == 4
1381 const char16_t
* next
= ucs4_span(from
, __max
, _M_maxcode
, _M_mode
);
1383 return reinterpret_cast<const char*>(next
) - __from
;
1387 __codecvt_utf16_base
<wchar_t>::do_max_length() const throw()
1389 #if __SIZEOF_WCHAR_T__ == 2
1390 int max
= 2; // See __codecvt_utf16_base<char16_t>::do_max_length()
1392 int max
= 4; // See __codecvt_utf16_base<char32_t>::do_max_length()
1394 if (_M_mode
& consume_header
)
1395 max
+= sizeof(utf16_bom
);
1400 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1401 // Converts from UTF-8 to UTF-16.
1403 __codecvt_utf8_utf16_base
<char16_t
>::~__codecvt_utf8_utf16_base() { }
1405 codecvt_base::result
1406 __codecvt_utf8_utf16_base
<char16_t
>::
1407 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1408 const intern_type
*& __from_next
,
1409 extern_type
* __to
, extern_type
* __to_end
,
1410 extern_type
*& __to_next
) const
1412 range
<const char16_t
> from
{ __from
, __from_end
};
1413 range
<char> to
{ __to
, __to_end
};
1414 auto res
= utf16_out(from
, to
, _M_maxcode
, _M_mode
);
1415 __from_next
= from
.next
;
1416 __to_next
= to
.next
;
1420 codecvt_base::result
1421 __codecvt_utf8_utf16_base
<char16_t
>::
1422 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1423 extern_type
*& __to_next
) const
1429 codecvt_base::result
1430 __codecvt_utf8_utf16_base
<char16_t
>::
1431 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1432 const extern_type
*& __from_next
,
1433 intern_type
* __to
, intern_type
* __to_end
,
1434 intern_type
*& __to_next
) const
1436 range
<const char> from
{ __from
, __from_end
};
1437 range
<char16_t
> to
{ __to
, __to_end
};
1438 codecvt_mode mode
= codecvt_mode(_M_mode
& (consume_header
|generate_header
));
1439 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1440 mode
= codecvt_mode(mode
| little_endian
);
1442 auto res
= utf16_in(from
, to
, _M_maxcode
, mode
);
1443 __from_next
= from
.next
;
1444 __to_next
= to
.next
;
1449 __codecvt_utf8_utf16_base
<char16_t
>::do_encoding() const throw()
1450 { return 0; } // UTF-8 is not a fixed-width encoding
1453 __codecvt_utf8_utf16_base
<char16_t
>::do_always_noconv() const throw()
1457 __codecvt_utf8_utf16_base
<char16_t
>::
1458 do_length(state_type
&, const extern_type
* __from
,
1459 const extern_type
* __end
, size_t __max
) const
1461 __end
= utf16_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1462 return __end
- __from
;
1466 __codecvt_utf8_utf16_base
<char16_t
>::do_max_length() const throw()
1468 // A single character can be 1 or 2 UTF-16 code units,
1469 // requiring up to 4 UTF-8 code units.
1471 if (_M_mode
& consume_header
)
1472 max
+= sizeof(utf8_bom
);
1476 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1477 // Converts from UTF-8 to UTF-16.
1479 __codecvt_utf8_utf16_base
<char32_t
>::~__codecvt_utf8_utf16_base() { }
1481 codecvt_base::result
1482 __codecvt_utf8_utf16_base
<char32_t
>::
1483 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1484 const intern_type
*& __from_next
,
1485 extern_type
* __to
, extern_type
* __to_end
,
1486 extern_type
*& __to_next
) const
1488 range
<const char32_t
> from
{ __from
, __from_end
};
1489 range
<char> to
{ __to
, __to_end
};
1490 auto res
= utf16_out(from
, to
, _M_maxcode
, _M_mode
);
1491 __from_next
= from
.next
;
1492 __to_next
= to
.next
;
1496 codecvt_base::result
1497 __codecvt_utf8_utf16_base
<char32_t
>::
1498 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1499 extern_type
*& __to_next
) const
1505 codecvt_base::result
1506 __codecvt_utf8_utf16_base
<char32_t
>::
1507 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1508 const extern_type
*& __from_next
,
1509 intern_type
* __to
, intern_type
* __to_end
,
1510 intern_type
*& __to_next
) const
1512 range
<const char> from
{ __from
, __from_end
};
1513 range
<char32_t
> to
{ __to
, __to_end
};
1514 codecvt_mode mode
= codecvt_mode(_M_mode
& (consume_header
|generate_header
));
1515 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1516 mode
= codecvt_mode(mode
| little_endian
);
1518 auto res
= utf16_in(from
, to
, _M_maxcode
, mode
);
1519 __from_next
= from
.next
;
1520 __to_next
= to
.next
;
1525 __codecvt_utf8_utf16_base
<char32_t
>::do_encoding() const throw()
1526 { return 0; } // UTF-8 is not a fixed-width encoding
1529 __codecvt_utf8_utf16_base
<char32_t
>::do_always_noconv() const throw()
1533 __codecvt_utf8_utf16_base
<char32_t
>::
1534 do_length(state_type
&, const extern_type
* __from
,
1535 const extern_type
* __end
, size_t __max
) const
1537 __end
= utf16_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1538 return __end
- __from
;
1542 __codecvt_utf8_utf16_base
<char32_t
>::do_max_length() const throw()
1544 // A single character can be 1 or 2 UTF-16 code units,
1545 // requiring up to 4 UTF-8 code units.
1547 if (_M_mode
& consume_header
)
1548 max
+= sizeof(utf8_bom
);
1552 #ifdef _GLIBCXX_USE_WCHAR_T
1553 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1554 // Converts from UTF-8 to UTF-16.
1556 __codecvt_utf8_utf16_base
<wchar_t>::~__codecvt_utf8_utf16_base() { }
1558 codecvt_base::result
1559 __codecvt_utf8_utf16_base
<wchar_t>::
1560 do_out(state_type
&, const intern_type
* __from
, const intern_type
* __from_end
,
1561 const intern_type
*& __from_next
,
1562 extern_type
* __to
, extern_type
* __to_end
,
1563 extern_type
*& __to_next
) const
1565 range
<const wchar_t> from
{ __from
, __from_end
};
1566 range
<char> to
{ __to
, __to_end
};
1567 auto res
= utf16_out(from
, to
, _M_maxcode
, _M_mode
);
1568 __from_next
= from
.next
;
1569 __to_next
= to
.next
;
1573 codecvt_base::result
1574 __codecvt_utf8_utf16_base
<wchar_t>::
1575 do_unshift(state_type
&, extern_type
* __to
, extern_type
*,
1576 extern_type
*& __to_next
) const
1582 codecvt_base::result
1583 __codecvt_utf8_utf16_base
<wchar_t>::
1584 do_in(state_type
&, const extern_type
* __from
, const extern_type
* __from_end
,
1585 const extern_type
*& __from_next
,
1586 intern_type
* __to
, intern_type
* __to_end
,
1587 intern_type
*& __to_next
) const
1589 range
<const char> from
{ __from
, __from_end
};
1590 range
<wchar_t> to
{ __to
, __to_end
};
1591 codecvt_mode mode
= codecvt_mode(_M_mode
& (consume_header
|generate_header
));
1592 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1593 mode
= codecvt_mode(mode
| little_endian
);
1595 auto res
= utf16_in(from
, to
, _M_maxcode
, mode
);
1596 __from_next
= from
.next
;
1597 __to_next
= to
.next
;
1602 __codecvt_utf8_utf16_base
<wchar_t>::do_encoding() const throw()
1603 { return 0; } // UTF-8 is not a fixed-width encoding
1606 __codecvt_utf8_utf16_base
<wchar_t>::do_always_noconv() const throw()
1610 __codecvt_utf8_utf16_base
<wchar_t>::
1611 do_length(state_type
&, const extern_type
* __from
,
1612 const extern_type
* __end
, size_t __max
) const
1614 __end
= utf16_span(__from
, __end
, __max
, _M_maxcode
, _M_mode
);
1615 return __end
- __from
;
1619 __codecvt_utf8_utf16_base
<wchar_t>::do_max_length() const throw()
1621 // A single character can be 1 or 2 UTF-16 code units,
1622 // requiring up to 4 UTF-8 code units.
1624 if (_M_mode
& consume_header
)
1625 max
+= sizeof(utf8_bom
);
1630 inline template class __codecvt_abstract_base
<char16_t
, char, mbstate_t>;
1631 inline template class __codecvt_abstract_base
<char32_t
, char, mbstate_t>;
1632 template class codecvt_byname
<char16_t
, char, mbstate_t>;
1633 template class codecvt_byname
<char32_t
, char, mbstate_t>;
1635 _GLIBCXX_END_NAMESPACE_VERSION
1637 #endif // _GLIBCXX_USE_C99_STDINT_TR1