go/types: implement SizesFor for gccgo
[official-gcc.git] / libstdc++-v3 / src / c++11 / codecvt.cc
blob259de80775844abfd1e17990eb95029b969cb6eb
1 // Locale support (codecvt) -*- C++ -*-
3 // Copyright (C) 2015-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 #include <codecvt>
26 #include <cstring> // std::memcpy, std::memcmp
27 #include <bits/stl_algobase.h> // std::min
29 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
30 namespace std _GLIBCXX_VISIBILITY(default)
32 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 // The standard doesn't define these operators, which is annoying.
35 static underlying_type<codecvt_mode>::type
36 to_integer(codecvt_mode m)
37 { return static_cast<underlying_type<codecvt_mode>::type>(m); }
39 static codecvt_mode& operator&=(codecvt_mode& m, codecvt_mode n)
40 { return m = codecvt_mode(to_integer(m) & to_integer(n)); }
42 static codecvt_mode& operator|=(codecvt_mode& m, codecvt_mode n)
43 { return m = codecvt_mode(to_integer(m) | to_integer(n)); }
45 static codecvt_mode operator~(codecvt_mode m)
46 { return codecvt_mode(~to_integer(m)); }
48 namespace
50 // Largest code point that fits in a single UTF-16 code unit.
51 const char32_t max_single_utf16_unit = 0xFFFF;
53 const char32_t max_code_point = 0x10FFFF;
55 // The functions below rely on maxcode < incomplete_mb_character
56 // (which is enforced by the codecvt_utf* classes on construction).
57 const char32_t incomplete_mb_character = char32_t(-2);
58 const char32_t invalid_mb_sequence = char32_t(-1);
60 // Utility type for reading and writing code units of type Elem from
61 // a range defined by a pair of pointers.
62 template<typename Elem, bool Aligned = true>
63 struct range
65 Elem* next;
66 Elem* end;
68 // Write a code unit.
69 range& operator=(Elem e)
71 *next++ = e;
72 return *this;
75 // Read the next code unit.
76 Elem operator*() const { return *next; }
78 // Read the Nth code unit.
79 Elem operator[](size_t n) const { return next[n]; }
81 // Move to the next code unit.
82 range& operator++()
84 ++next;
85 return *this;
88 // Move to the Nth code unit.
89 range& operator+=(size_t n)
91 next += n;
92 return *this;
95 // The number of code units remaining.
96 size_t size() const { return end - next; }
98 // The number of bytes remaining.
99 size_t nbytes() const { return (const char*)end - (const char*)next; }
102 // This specialization is used when accessing char16_t values through
103 // pointers to char, which might not be correctly aligned for char16_t.
104 template<typename Elem>
105 struct range<Elem, false>
107 using value_type = typename remove_const<Elem>::type;
109 using char_pointer = typename
110 conditional<is_const<Elem>::value, const char*, char*>::type;
112 char_pointer next;
113 char_pointer end;
115 // Write a code unit.
116 range& operator=(Elem e)
118 memcpy(next, &e, sizeof(Elem));
119 ++*this;
120 return *this;
123 // Read the next code unit.
124 Elem operator*() const
126 value_type e;
127 memcpy(&e, next, sizeof(Elem));
128 return e;
131 // Read the Nth code unit.
132 Elem operator[](size_t n) const
134 value_type e;
135 memcpy(&e, next + n * sizeof(Elem), sizeof(Elem));
136 return e;
139 // Move to the next code unit.
140 range& operator++()
142 next += sizeof(Elem);
143 return *this;
146 // Move to the Nth code unit.
147 range& operator+=(size_t n)
149 next += n * sizeof(Elem);
150 return *this;
153 // The number of code units remaining.
154 size_t size() const { return nbytes() / sizeof(Elem); }
156 // The number of bytes remaining.
157 size_t nbytes() const { return end - next; }
160 // Multibyte sequences can have "header" consisting of Byte Order Mark
161 const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
162 const unsigned char utf16_bom[2] = { 0xFE, 0xFF };
163 const unsigned char utf16le_bom[2] = { 0xFF, 0xFE };
165 // Write a BOM (space permitting).
166 template<typename C, bool A, size_t N>
167 bool
168 write_bom(range<C, A>& to, const unsigned char (&bom)[N])
170 static_assert( (N / sizeof(C)) != 0, "" );
171 static_assert( (N % sizeof(C)) == 0, "" );
173 if (to.nbytes() < N)
174 return false;
175 memcpy(to.next, bom, N);
176 to += (N / sizeof(C));
177 return true;
180 // Try to read a BOM.
181 template<typename C, bool A, size_t N>
182 bool
183 read_bom(range<C, A>& from, const unsigned char (&bom)[N])
185 static_assert( (N / sizeof(C)) != 0, "" );
186 static_assert( (N % sizeof(C)) == 0, "" );
188 if (from.nbytes() >= N && !memcmp(from.next, bom, N))
190 from += (N / sizeof(C));
191 return true;
193 return false;
196 // If generate_header is set in mode write out UTF-8 BOM.
197 bool
198 write_utf8_bom(range<char>& to, codecvt_mode mode)
200 if (mode & generate_header)
201 return write_bom(to, utf8_bom);
202 return true;
205 // If generate_header is set in mode write out the UTF-16 BOM indicated
206 // by whether little_endian is set in mode.
207 template<bool Aligned>
208 bool
209 write_utf16_bom(range<char16_t, Aligned>& to, codecvt_mode mode)
211 if (mode & generate_header)
213 if (mode & little_endian)
214 return write_bom(to, utf16le_bom);
215 else
216 return write_bom(to, utf16_bom);
218 return true;
221 // If consume_header is set in mode update from.next to after any BOM.
222 void
223 read_utf8_bom(range<const char>& from, codecvt_mode mode)
225 if (mode & consume_header)
226 read_bom(from, utf8_bom);
229 // If consume_header is not set in mode, no effects.
230 // Otherwise, if *from.next is a UTF-16 BOM increment from.next and then:
231 // - if the UTF-16BE BOM was found unset little_endian in mode, or
232 // - if the UTF-16LE BOM was found set little_endian in mode.
233 template<bool Aligned>
234 void
235 read_utf16_bom(range<const char16_t, Aligned>& from, codecvt_mode& mode)
237 if (mode & consume_header)
239 if (read_bom(from, utf16_bom))
240 mode &= ~little_endian;
241 else if (read_bom(from, utf16le_bom))
242 mode |= little_endian;
246 // Read a codepoint from a UTF-8 multibyte sequence.
247 // Updates from.next if the codepoint is not greater than maxcode.
248 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
249 char32_t
250 read_utf8_code_point(range<const char>& from, unsigned long maxcode)
252 const size_t avail = from.size();
253 if (avail == 0)
254 return incomplete_mb_character;
255 unsigned char c1 = from[0];
256 // https://en.wikipedia.org/wiki/UTF-8#Sample_code
257 if (c1 < 0x80)
259 ++from;
260 return c1;
262 else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
263 return invalid_mb_sequence;
264 else if (c1 < 0xE0) // 2-byte sequence
266 if (avail < 2)
267 return incomplete_mb_character;
268 unsigned char c2 = from[1];
269 if ((c2 & 0xC0) != 0x80)
270 return invalid_mb_sequence;
271 char32_t c = (c1 << 6) + c2 - 0x3080;
272 if (c <= maxcode)
273 from += 2;
274 return c;
276 else if (c1 < 0xF0) // 3-byte sequence
278 if (avail < 3)
279 return incomplete_mb_character;
280 unsigned char c2 = from[1];
281 if ((c2 & 0xC0) != 0x80)
282 return invalid_mb_sequence;
283 if (c1 == 0xE0 && c2 < 0xA0) // overlong
284 return invalid_mb_sequence;
285 unsigned char c3 = from[2];
286 if ((c3 & 0xC0) != 0x80)
287 return invalid_mb_sequence;
288 char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
289 if (c <= maxcode)
290 from += 3;
291 return c;
293 else if (c1 < 0xF5) // 4-byte sequence
295 if (avail < 4)
296 return incomplete_mb_character;
297 unsigned char c2 = from[1];
298 if ((c2 & 0xC0) != 0x80)
299 return invalid_mb_sequence;
300 if (c1 == 0xF0 && c2 < 0x90) // overlong
301 return invalid_mb_sequence;
302 if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
303 return invalid_mb_sequence;
304 unsigned char c3 = from[2];
305 if ((c3 & 0xC0) != 0x80)
306 return invalid_mb_sequence;
307 unsigned char c4 = from[3];
308 if ((c4 & 0xC0) != 0x80)
309 return invalid_mb_sequence;
310 char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
311 if (c <= maxcode)
312 from += 4;
313 return c;
315 else // > U+10FFFF
316 return invalid_mb_sequence;
319 bool
320 write_utf8_code_point(range<char>& to, char32_t code_point)
322 if (code_point < 0x80)
324 if (to.size() < 1)
325 return false;
326 to = code_point;
328 else if (code_point <= 0x7FF)
330 if (to.size() < 2)
331 return false;
332 to = (code_point >> 6) + 0xC0;
333 to = (code_point & 0x3F) + 0x80;
335 else if (code_point <= 0xFFFF)
337 if (to.size() < 3)
338 return false;
339 to = (code_point >> 12) + 0xE0;
340 to = ((code_point >> 6) & 0x3F) + 0x80;
341 to = (code_point & 0x3F) + 0x80;
343 else if (code_point <= 0x10FFFF)
345 if (to.size() < 4)
346 return false;
347 to = (code_point >> 18) + 0xF0;
348 to = ((code_point >> 12) & 0x3F) + 0x80;
349 to = ((code_point >> 6) & 0x3F) + 0x80;
350 to = (code_point & 0x3F) + 0x80;
352 else
353 return false;
354 return true;
357 inline char16_t
358 adjust_byte_order(char16_t c, codecvt_mode mode)
360 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
361 return (mode & little_endian) ? __builtin_bswap16(c) : c;
362 #else
363 return (mode & little_endian) ? c : __builtin_bswap16(c);
364 #endif
367 // Return true if c is a high-surrogate (aka leading) code point.
368 inline bool
369 is_high_surrogate(char32_t c)
371 return c >= 0xD800 && c <= 0xDBFF;
374 // Return true if c is a low-surrogate (aka trailing) code point.
375 inline bool
376 is_low_surrogate(char32_t c)
378 return c >= 0xDC00 && c <= 0xDFFF;
381 inline char32_t
382 surrogate_pair_to_code_point(char32_t high, char32_t low)
384 return (high << 10) + low - 0x35FDC00;
387 // Read a codepoint from a UTF-16 multibyte sequence.
388 // The sequence's endianness is indicated by (mode & little_endian).
389 // Updates from.next if the codepoint is not greater than maxcode.
390 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
391 template<bool Aligned>
392 char32_t
393 read_utf16_code_point(range<const char16_t, Aligned>& from,
394 unsigned long maxcode, codecvt_mode mode)
396 const size_t avail = from.size();
397 if (avail == 0)
398 return incomplete_mb_character;
399 int inc = 1;
400 char32_t c = adjust_byte_order(from[0], mode);
401 if (is_high_surrogate(c))
403 if (avail < 2)
404 return incomplete_mb_character;
405 const char16_t c2 = adjust_byte_order(from[1], mode);
406 if (is_low_surrogate(c2))
408 c = surrogate_pair_to_code_point(c, c2);
409 inc = 2;
411 else
412 return invalid_mb_sequence;
414 else if (is_low_surrogate(c))
415 return invalid_mb_sequence;
416 if (c <= maxcode)
417 from += inc;
418 return c;
421 template<typename C, bool A>
422 bool
423 write_utf16_code_point(range<C, A>& to, char32_t codepoint, codecvt_mode mode)
425 static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
427 if (codepoint <= max_single_utf16_unit)
429 if (to.size() > 0)
431 to = adjust_byte_order(codepoint, mode);
432 return true;
435 else if (to.size() > 1)
437 // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
438 const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
439 char16_t lead = LEAD_OFFSET + (codepoint >> 10);
440 char16_t trail = 0xDC00 + (codepoint & 0x3FF);
441 to = adjust_byte_order(lead, mode);
442 to = adjust_byte_order(trail, mode);
443 return true;
445 return false;
448 // utf8 -> ucs4
449 codecvt_base::result
450 ucs4_in(range<const char>& from, range<char32_t>& to,
451 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
453 read_utf8_bom(from, mode);
454 while (from.size() && to.size())
456 const char32_t codepoint = read_utf8_code_point(from, maxcode);
457 if (codepoint == incomplete_mb_character)
458 return codecvt_base::partial;
459 if (codepoint > maxcode)
460 return codecvt_base::error;
461 to = codepoint;
463 return from.size() ? codecvt_base::partial : codecvt_base::ok;
466 // ucs4 -> utf8
467 codecvt_base::result
468 ucs4_out(range<const char32_t>& from, range<char>& to,
469 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
471 if (!write_utf8_bom(to, mode))
472 return codecvt_base::partial;
473 while (from.size())
475 const char32_t c = from[0];
476 if (c > maxcode)
477 return codecvt_base::error;
478 if (!write_utf8_code_point(to, c))
479 return codecvt_base::partial;
480 ++from;
482 return codecvt_base::ok;
485 // utf16 -> ucs4
486 codecvt_base::result
487 ucs4_in(range<const char16_t, false>& from, range<char32_t>& to,
488 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
490 read_utf16_bom(from, mode);
491 while (from.size() && to.size())
493 const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
494 if (codepoint == incomplete_mb_character)
495 return codecvt_base::partial;
496 if (codepoint > maxcode)
497 return codecvt_base::error;
498 to = codepoint;
500 return from.size() ? codecvt_base::partial : codecvt_base::ok;
503 // ucs4 -> utf16
504 codecvt_base::result
505 ucs4_out(range<const char32_t>& from, range<char16_t, false>& to,
506 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
508 if (!write_utf16_bom(to, mode))
509 return codecvt_base::partial;
510 while (from.size())
512 const char32_t c = from[0];
513 if (c > maxcode)
514 return codecvt_base::error;
515 if (!write_utf16_code_point(to, c, mode))
516 return codecvt_base::partial;
517 ++from;
519 return codecvt_base::ok;
522 // Flag indicating whether to process UTF-16 or UCS2
523 enum class surrogates { allowed, disallowed };
525 // utf8 -> utf16 (or utf8 -> ucs2 if s == surrogates::disallowed)
526 template<typename C>
527 codecvt_base::result
528 utf16_in(range<const char>& from, range<C>& to,
529 unsigned long maxcode = max_code_point, codecvt_mode mode = {},
530 surrogates s = surrogates::allowed)
532 read_utf8_bom(from, mode);
533 while (from.size() && to.size())
535 auto orig = from;
536 const char32_t codepoint = read_utf8_code_point(from, maxcode);
537 if (codepoint == incomplete_mb_character)
539 if (s == surrogates::allowed)
540 return codecvt_base::partial;
541 else
542 return codecvt_base::error; // No surrogates in UCS2
544 if (codepoint > maxcode)
545 return codecvt_base::error;
546 if (!write_utf16_code_point(to, codepoint, mode))
548 from = orig; // rewind to previous position
549 return codecvt_base::partial;
552 return codecvt_base::ok;
555 // utf16 -> utf8 (or ucs2 -> utf8 if s == surrogates::disallowed)
556 template<typename C>
557 codecvt_base::result
558 utf16_out(range<const C>& from, range<char>& to,
559 unsigned long maxcode = max_code_point, codecvt_mode mode = {},
560 surrogates s = surrogates::allowed)
562 if (!write_utf8_bom(to, mode))
563 return codecvt_base::partial;
564 while (from.size())
566 char32_t c = from[0];
567 int inc = 1;
568 if (is_high_surrogate(c))
570 if (s == surrogates::disallowed)
571 return codecvt_base::error; // No surrogates in UCS-2
573 if (from.size() < 2)
574 return codecvt_base::ok; // stop converting at this point
576 const char32_t c2 = from[1];
577 if (is_low_surrogate(c2))
579 c = surrogate_pair_to_code_point(c, c2);
580 inc = 2;
582 else
583 return codecvt_base::error;
585 else if (is_low_surrogate(c))
586 return codecvt_base::error;
587 if (c > maxcode)
588 return codecvt_base::error;
589 if (!write_utf8_code_point(to, c))
590 return codecvt_base::partial;
591 from += inc;
593 return codecvt_base::ok;
596 // return pos such that [begin,pos) is valid UTF-16 string no longer than max
597 const char*
598 utf16_span(const char* begin, const char* end, size_t max,
599 char32_t maxcode = max_code_point, codecvt_mode mode = {})
601 range<const char> from{ begin, end };
602 read_utf8_bom(from, mode);
603 size_t count = 0;
604 while (count+1 < max)
606 char32_t c = read_utf8_code_point(from, maxcode);
607 if (c > maxcode)
608 return from.next;
609 else if (c > max_single_utf16_unit)
610 ++count;
611 ++count;
613 if (count+1 == max) // take one more character if it fits in a single unit
614 read_utf8_code_point(from, std::min(max_single_utf16_unit, maxcode));
615 return from.next;
618 // utf8 -> ucs2
619 codecvt_base::result
620 ucs2_in(range<const char>& from, range<char16_t>& to,
621 char32_t maxcode = max_code_point, codecvt_mode mode = {})
623 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
624 maxcode = std::min(max_single_utf16_unit, maxcode);
625 return utf16_in(from, to, maxcode, mode, surrogates::disallowed);
628 // ucs2 -> utf8
629 codecvt_base::result
630 ucs2_out(range<const char16_t>& from, range<char>& to,
631 char32_t maxcode = max_code_point, codecvt_mode mode = {})
633 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
634 maxcode = std::min(max_single_utf16_unit, maxcode);
635 return utf16_out(from, to, maxcode, mode, surrogates::disallowed);
638 // ucs2 -> utf16
639 codecvt_base::result
640 ucs2_out(range<const char16_t>& from, range<char16_t, false>& to,
641 char32_t maxcode = max_code_point, codecvt_mode mode = {})
643 if (!write_utf16_bom(to, mode))
644 return codecvt_base::partial;
645 while (from.size() && to.size())
647 char16_t c = from[0];
648 if (is_high_surrogate(c))
649 return codecvt_base::error;
650 if (c > maxcode)
651 return codecvt_base::error;
652 to = adjust_byte_order(c, mode);
653 ++from;
655 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
658 // utf16 -> ucs2
659 codecvt_base::result
660 ucs2_in(range<const char16_t, false>& from, range<char16_t>& to,
661 char32_t maxcode = max_code_point, codecvt_mode mode = {})
663 read_utf16_bom(from, mode);
664 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
665 maxcode = std::min(max_single_utf16_unit, maxcode);
666 while (from.size() && to.size())
668 const char32_t c = read_utf16_code_point(from, maxcode, mode);
669 if (c == incomplete_mb_character)
670 return codecvt_base::error; // UCS-2 only supports single units.
671 if (c > maxcode)
672 return codecvt_base::error;
673 to = c;
675 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
678 const char16_t*
679 ucs2_span(range<const char16_t, false>& from, size_t max,
680 char32_t maxcode, codecvt_mode mode)
682 read_utf16_bom(from, mode);
683 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
684 maxcode = std::min(max_single_utf16_unit, maxcode);
685 char32_t c = 0;
686 while (max-- && c <= maxcode)
687 c = read_utf16_code_point(from, maxcode, mode);
688 return reinterpret_cast<const char16_t*>(from.next);
691 const char*
692 ucs2_span(const char* begin, const char* end, size_t max,
693 char32_t maxcode, codecvt_mode mode)
695 range<const char> from{ begin, end };
696 read_utf8_bom(from, mode);
697 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
698 maxcode = std::min(max_single_utf16_unit, maxcode);
699 char32_t c = 0;
700 while (max-- && c <= maxcode)
701 c = read_utf8_code_point(from, maxcode);
702 return from.next;
705 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
706 const char*
707 ucs4_span(const char* begin, const char* end, size_t max,
708 char32_t maxcode = max_code_point, codecvt_mode mode = {})
710 range<const char> from{ begin, end };
711 read_utf8_bom(from, mode);
712 char32_t c = 0;
713 while (max-- && c <= maxcode)
714 c = read_utf8_code_point(from, maxcode);
715 return from.next;
718 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
719 const char16_t*
720 ucs4_span(range<const char16_t, false>& from, size_t max,
721 char32_t maxcode = max_code_point, codecvt_mode mode = {})
723 read_utf16_bom(from, mode);
724 char32_t c = 0;
725 while (max-- && c <= maxcode)
726 c = read_utf16_code_point(from, maxcode, mode);
727 return reinterpret_cast<const char16_t*>(from.next);
731 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
732 // Converts from UTF-8 to UTF-16.
734 locale::id codecvt<char16_t, char, mbstate_t>::id;
736 codecvt<char16_t, char, mbstate_t>::~codecvt() { }
738 codecvt_base::result
739 codecvt<char16_t, char, mbstate_t>::
740 do_out(state_type&,
741 const intern_type* __from,
742 const intern_type* __from_end, const intern_type*& __from_next,
743 extern_type* __to, extern_type* __to_end,
744 extern_type*& __to_next) const
746 range<const char16_t> from{ __from, __from_end };
747 range<char> to{ __to, __to_end };
748 auto res = utf16_out(from, to);
749 __from_next = from.next;
750 __to_next = to.next;
751 return res;
754 codecvt_base::result
755 codecvt<char16_t, char, mbstate_t>::
756 do_unshift(state_type&, extern_type* __to, extern_type*,
757 extern_type*& __to_next) const
759 __to_next = __to;
760 return noconv; // we don't use mbstate_t for the unicode facets
763 codecvt_base::result
764 codecvt<char16_t, char, mbstate_t>::
765 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
766 const extern_type*& __from_next,
767 intern_type* __to, intern_type* __to_end,
768 intern_type*& __to_next) const
770 range<const char> from{ __from, __from_end };
771 range<char16_t> to{ __to, __to_end };
772 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
773 codecvt_mode mode = {};
774 #else
775 codecvt_mode mode = little_endian;
776 #endif
777 auto res = utf16_in(from, to, max_code_point, mode);
778 __from_next = from.next;
779 __to_next = to.next;
780 return res;
784 codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
785 { return 0; } // UTF-8 is not a fixed-width encoding
787 bool
788 codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
789 { return false; }
792 codecvt<char16_t, char, mbstate_t>::
793 do_length(state_type&, const extern_type* __from,
794 const extern_type* __end, size_t __max) const
796 __end = utf16_span(__from, __end, __max);
797 return __end - __from;
801 codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
803 // A single character (one or two UTF-16 code units) requires
804 // up to four UTF-8 code units.
805 return 4;
808 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
809 // Converts from UTF-8 to UTF-32 (aka UCS-4).
811 locale::id codecvt<char32_t, char, mbstate_t>::id;
813 codecvt<char32_t, char, mbstate_t>::~codecvt() { }
815 codecvt_base::result
816 codecvt<char32_t, char, mbstate_t>::
817 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
818 const intern_type*& __from_next,
819 extern_type* __to, extern_type* __to_end,
820 extern_type*& __to_next) const
822 range<const char32_t> from{ __from, __from_end };
823 range<char> to{ __to, __to_end };
824 auto res = ucs4_out(from, to);
825 __from_next = from.next;
826 __to_next = to.next;
827 return res;
830 codecvt_base::result
831 codecvt<char32_t, char, mbstate_t>::
832 do_unshift(state_type&, extern_type* __to, extern_type*,
833 extern_type*& __to_next) const
835 __to_next = __to;
836 return noconv;
839 codecvt_base::result
840 codecvt<char32_t, char, mbstate_t>::
841 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
842 const extern_type*& __from_next,
843 intern_type* __to, intern_type* __to_end,
844 intern_type*& __to_next) const
846 range<const char> from{ __from, __from_end };
847 range<char32_t> to{ __to, __to_end };
848 auto res = ucs4_in(from, to);
849 __from_next = from.next;
850 __to_next = to.next;
851 return res;
855 codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
856 { return 0; } // UTF-8 is not a fixed-width encoding
858 bool
859 codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
860 { return false; }
863 codecvt<char32_t, char, mbstate_t>::
864 do_length(state_type&, const extern_type* __from,
865 const extern_type* __end, size_t __max) const
867 __end = ucs4_span(__from, __end, __max);
868 return __end - __from;
872 codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
874 // A single character (one UTF-32 code unit) requires
875 // up to 4 UTF-8 code units.
876 return 4;
879 // Define members of codecvt_utf8<char16_t> base class implementation.
880 // Converts from UTF-8 to UCS-2.
882 __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
884 codecvt_base::result
885 __codecvt_utf8_base<char16_t>::
886 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
887 const intern_type*& __from_next,
888 extern_type* __to, extern_type* __to_end,
889 extern_type*& __to_next) const
891 range<const char16_t> from{ __from, __from_end };
892 range<char> to{ __to, __to_end };
893 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
894 __from_next = from.next;
895 __to_next = to.next;
896 return res;
899 codecvt_base::result
900 __codecvt_utf8_base<char16_t>::
901 do_unshift(state_type&, extern_type* __to, extern_type*,
902 extern_type*& __to_next) const
904 __to_next = __to;
905 return noconv;
908 codecvt_base::result
909 __codecvt_utf8_base<char16_t>::
910 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
911 const extern_type*& __from_next,
912 intern_type* __to, intern_type* __to_end,
913 intern_type*& __to_next) const
915 range<const char> from{ __from, __from_end };
916 range<char16_t> to{ __to, __to_end };
917 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
918 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
919 mode = codecvt_mode(mode | little_endian);
920 #endif
921 auto res = ucs2_in(from, to, _M_maxcode, mode);
922 __from_next = from.next;
923 __to_next = to.next;
924 return res;
928 __codecvt_utf8_base<char16_t>::do_encoding() const throw()
929 { return 0; } // UTF-8 is not a fixed-width encoding
931 bool
932 __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
933 { return false; }
936 __codecvt_utf8_base<char16_t>::
937 do_length(state_type&, const extern_type* __from,
938 const extern_type* __end, size_t __max) const
940 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
941 return __end - __from;
945 __codecvt_utf8_base<char16_t>::do_max_length() const throw()
947 // A single UCS-2 character requires up to three UTF-8 code units.
948 // (UCS-2 cannot represent characters that use four UTF-8 code units).
949 int max = 3;
950 if (_M_mode & consume_header)
951 max += sizeof(utf8_bom);
952 return max;
955 // Define members of codecvt_utf8<char32_t> base class implementation.
956 // Converts from UTF-8 to UTF-32 (aka UCS-4).
958 __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
960 codecvt_base::result
961 __codecvt_utf8_base<char32_t>::
962 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
963 const intern_type*& __from_next,
964 extern_type* __to, extern_type* __to_end,
965 extern_type*& __to_next) const
967 range<const char32_t> from{ __from, __from_end };
968 range<char> to{ __to, __to_end };
969 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
970 __from_next = from.next;
971 __to_next = to.next;
972 return res;
975 codecvt_base::result
976 __codecvt_utf8_base<char32_t>::
977 do_unshift(state_type&, extern_type* __to, extern_type*,
978 extern_type*& __to_next) const
980 __to_next = __to;
981 return noconv;
984 codecvt_base::result
985 __codecvt_utf8_base<char32_t>::
986 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
987 const extern_type*& __from_next,
988 intern_type* __to, intern_type* __to_end,
989 intern_type*& __to_next) const
991 range<const char> from{ __from, __from_end };
992 range<char32_t> to{ __to, __to_end };
993 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
994 __from_next = from.next;
995 __to_next = to.next;
996 return res;
1000 __codecvt_utf8_base<char32_t>::do_encoding() const throw()
1001 { return 0; } // UTF-8 is not a fixed-width encoding
1003 bool
1004 __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
1005 { return false; }
1008 __codecvt_utf8_base<char32_t>::
1009 do_length(state_type&, const extern_type* __from,
1010 const extern_type* __end, size_t __max) const
1012 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
1013 return __end - __from;
1017 __codecvt_utf8_base<char32_t>::do_max_length() const throw()
1019 // A single UCS-4 character requires up to four UTF-8 code units.
1020 int max = 4;
1021 if (_M_mode & consume_header)
1022 max += sizeof(utf8_bom);
1023 return max;
1026 #ifdef _GLIBCXX_USE_WCHAR_T
1028 #if __SIZEOF_WCHAR_T__ == 2
1029 static_assert(sizeof(wchar_t) == sizeof(char16_t), "");
1030 #elif __SIZEOF_WCHAR_T__ == 4
1031 static_assert(sizeof(wchar_t) == sizeof(char32_t), "");
1032 #endif
1034 // Define members of codecvt_utf8<wchar_t> base class implementation.
1035 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1037 __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
1039 codecvt_base::result
1040 __codecvt_utf8_base<wchar_t>::
1041 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1042 const intern_type*& __from_next,
1043 extern_type* __to, extern_type* __to_end,
1044 extern_type*& __to_next) const
1046 range<char> to{ __to, __to_end };
1047 #if __SIZEOF_WCHAR_T__ == 2
1048 range<const char16_t> from{
1049 reinterpret_cast<const char16_t*>(__from),
1050 reinterpret_cast<const char16_t*>(__from_end)
1052 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1053 #elif __SIZEOF_WCHAR_T__ == 4
1054 range<const char32_t> from{
1055 reinterpret_cast<const char32_t*>(__from),
1056 reinterpret_cast<const char32_t*>(__from_end)
1058 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1059 #else
1060 return codecvt_base::error;
1061 #endif
1062 __from_next = reinterpret_cast<const wchar_t*>(from.next);
1063 __to_next = to.next;
1064 return res;
1067 codecvt_base::result
1068 __codecvt_utf8_base<wchar_t>::
1069 do_unshift(state_type&, extern_type* __to, extern_type*,
1070 extern_type*& __to_next) const
1072 __to_next = __to;
1073 return noconv;
1076 codecvt_base::result
1077 __codecvt_utf8_base<wchar_t>::
1078 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1079 const extern_type*& __from_next,
1080 intern_type* __to, intern_type* __to_end,
1081 intern_type*& __to_next) const
1083 range<const char> from{ __from, __from_end };
1084 #if __SIZEOF_WCHAR_T__ == 2
1085 range<char16_t> to{
1086 reinterpret_cast<char16_t*>(__to),
1087 reinterpret_cast<char16_t*>(__to_end)
1089 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1090 #elif __SIZEOF_WCHAR_T__ == 4
1091 range<char32_t> to{
1092 reinterpret_cast<char32_t*>(__to),
1093 reinterpret_cast<char32_t*>(__to_end)
1095 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1096 #else
1097 return codecvt_base::error;
1098 #endif
1099 __from_next = from.next;
1100 __to_next = reinterpret_cast<wchar_t*>(to.next);
1101 return res;
1105 __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
1106 { return 0; } // UTF-8 is not a fixed-width encoding
1108 bool
1109 __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
1110 { return false; }
1113 __codecvt_utf8_base<wchar_t>::
1114 do_length(state_type&, const extern_type* __from,
1115 const extern_type* __end, size_t __max) const
1117 #if __SIZEOF_WCHAR_T__ == 2
1118 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
1119 #elif __SIZEOF_WCHAR_T__ == 4
1120 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
1121 #else
1122 __end = __from;
1123 #endif
1124 return __end - __from;
1128 __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
1130 #if __SIZEOF_WCHAR_T__ == 2
1131 int max = 3; // See __codecvt_utf8_base<char16_t>::do_max_length()
1132 #else
1133 int max = 4; // See __codecvt_utf8_base<char32_t>::do_max_length()
1134 #endif
1135 if (_M_mode & consume_header)
1136 max += sizeof(utf8_bom);
1137 return max;
1139 #endif
1141 // Define members of codecvt_utf16<char16_t> base class implementation.
1142 // Converts from UTF-16 to UCS-2.
1144 __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
1146 codecvt_base::result
1147 __codecvt_utf16_base<char16_t>::
1148 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1149 const intern_type*& __from_next,
1150 extern_type* __to, extern_type* __to_end,
1151 extern_type*& __to_next) const
1153 range<const char16_t> from{ __from, __from_end };
1154 range<char16_t, false> to{ __to, __to_end };
1155 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1156 __from_next = from.next;
1157 __to_next = reinterpret_cast<char*>(to.next);
1158 return res;
1161 codecvt_base::result
1162 __codecvt_utf16_base<char16_t>::
1163 do_unshift(state_type&, extern_type* __to, extern_type*,
1164 extern_type*& __to_next) const
1166 __to_next = __to;
1167 return noconv;
1170 codecvt_base::result
1171 __codecvt_utf16_base<char16_t>::
1172 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1173 const extern_type*& __from_next,
1174 intern_type* __to, intern_type* __to_end,
1175 intern_type*& __to_next) const
1177 range<const char16_t, false> from{ __from, __from_end };
1178 range<char16_t> to{ __to, __to_end };
1179 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1180 __from_next = reinterpret_cast<const char*>(from.next);
1181 __to_next = to.next;
1182 if (res == codecvt_base::ok && __from_next != __from_end)
1183 res = codecvt_base::error;
1184 return res;
1188 __codecvt_utf16_base<char16_t>::do_encoding() const throw()
1189 { return 0; } // UTF-16 is not a fixed-width encoding
1191 bool
1192 __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
1193 { return false; }
1196 __codecvt_utf16_base<char16_t>::
1197 do_length(state_type&, const extern_type* __from,
1198 const extern_type* __end, size_t __max) const
1200 range<const char16_t, false> from{ __from, __end };
1201 const char16_t* next = ucs2_span(from, __max, _M_maxcode, _M_mode);
1202 return reinterpret_cast<const char*>(next) - __from;
1206 __codecvt_utf16_base<char16_t>::do_max_length() const throw()
1208 // A single UCS-2 character requires one UTF-16 code unit (so two chars).
1209 // (UCS-2 cannot represent characters that use multiple UTF-16 code units).
1210 int max = 2;
1211 if (_M_mode & consume_header)
1212 max += sizeof(utf16_bom);
1213 return max;
1216 // Define members of codecvt_utf16<char32_t> base class implementation.
1217 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1219 __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
1221 codecvt_base::result
1222 __codecvt_utf16_base<char32_t>::
1223 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1224 const intern_type*& __from_next,
1225 extern_type* __to, extern_type* __to_end,
1226 extern_type*& __to_next) const
1228 range<const char32_t> from{ __from, __from_end };
1229 range<char16_t, false> to{ __to, __to_end };
1230 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1231 __from_next = from.next;
1232 __to_next = reinterpret_cast<char*>(to.next);
1233 return res;
1236 codecvt_base::result
1237 __codecvt_utf16_base<char32_t>::
1238 do_unshift(state_type&, extern_type* __to, extern_type*,
1239 extern_type*& __to_next) const
1241 __to_next = __to;
1242 return noconv;
1245 codecvt_base::result
1246 __codecvt_utf16_base<char32_t>::
1247 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1248 const extern_type*& __from_next,
1249 intern_type* __to, intern_type* __to_end,
1250 intern_type*& __to_next) const
1252 range<const char16_t, false> from{ __from, __from_end };
1253 range<char32_t> to{ __to, __to_end };
1254 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1255 __from_next = reinterpret_cast<const char*>(from.next);
1256 __to_next = to.next;
1257 if (res == codecvt_base::ok && __from_next != __from_end)
1258 res = codecvt_base::error;
1259 return res;
1263 __codecvt_utf16_base<char32_t>::do_encoding() const throw()
1264 { return 0; } // UTF-16 is not a fixed-width encoding
1266 bool
1267 __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
1268 { return false; }
1271 __codecvt_utf16_base<char32_t>::
1272 do_length(state_type&, const extern_type* __from,
1273 const extern_type* __end, size_t __max) const
1275 range<const char16_t, false> from{ __from, __end };
1276 const char16_t* next = ucs4_span(from, __max, _M_maxcode, _M_mode);
1277 return reinterpret_cast<const char*>(next) - __from;
1281 __codecvt_utf16_base<char32_t>::do_max_length() const throw()
1283 // A single UCS-4 character requires one or two UTF-16 code units
1284 // (so up to four chars).
1285 int max = 4;
1286 if (_M_mode & consume_header)
1287 max += sizeof(utf16_bom);
1288 return max;
1291 #ifdef _GLIBCXX_USE_WCHAR_T
1292 // Define members of codecvt_utf16<wchar_t> base class implementation.
1293 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1295 __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
1297 codecvt_base::result
1298 __codecvt_utf16_base<wchar_t>::
1299 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1300 const intern_type*& __from_next,
1301 extern_type* __to, extern_type* __to_end,
1302 extern_type*& __to_next) const
1304 range<char16_t, false> to{ __to, __to_end };
1305 #if __SIZEOF_WCHAR_T__ == 2
1306 range<const char16_t> from{
1307 reinterpret_cast<const char16_t*>(__from),
1308 reinterpret_cast<const char16_t*>(__from_end),
1310 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1311 #elif __SIZEOF_WCHAR_T__ == 4
1312 range<const char32_t> from{
1313 reinterpret_cast<const char32_t*>(__from),
1314 reinterpret_cast<const char32_t*>(__from_end),
1316 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1317 #else
1318 return codecvt_base::error;
1319 #endif
1320 __from_next = reinterpret_cast<const wchar_t*>(from.next);
1321 __to_next = reinterpret_cast<char*>(to.next);
1322 return res;
1325 codecvt_base::result
1326 __codecvt_utf16_base<wchar_t>::
1327 do_unshift(state_type&, extern_type* __to, extern_type*,
1328 extern_type*& __to_next) const
1330 __to_next = __to;
1331 return noconv;
1334 codecvt_base::result
1335 __codecvt_utf16_base<wchar_t>::
1336 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1337 const extern_type*& __from_next,
1338 intern_type* __to, intern_type* __to_end,
1339 intern_type*& __to_next) const
1341 range<const char16_t, false> from{ __from, __from_end };
1342 #if __SIZEOF_WCHAR_T__ == 2
1343 range<char16_t> to{
1344 reinterpret_cast<char16_t*>(__to),
1345 reinterpret_cast<char16_t*>(__to_end),
1347 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1348 #elif __SIZEOF_WCHAR_T__ == 4
1349 range<char32_t> to{
1350 reinterpret_cast<char32_t*>(__to),
1351 reinterpret_cast<char32_t*>(__to_end),
1353 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1354 #else
1355 return codecvt_base::error;
1356 #endif
1357 __from_next = reinterpret_cast<const char*>(from.next);
1358 __to_next = reinterpret_cast<wchar_t*>(to.next);
1359 if (res == codecvt_base::ok && __from_next != __from_end)
1360 res = codecvt_base::error;
1361 return res;
1365 __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
1366 { return 0; } // UTF-16 is not a fixed-width encoding
1368 bool
1369 __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
1370 { return false; }
1373 __codecvt_utf16_base<wchar_t>::
1374 do_length(state_type&, const extern_type* __from,
1375 const extern_type* __end, size_t __max) const
1377 range<const char16_t, false> from{ __from, __end };
1378 #if __SIZEOF_WCHAR_T__ == 2
1379 const char16_t* next = ucs2_span(from, __max, _M_maxcode, _M_mode);
1380 #elif __SIZEOF_WCHAR_T__ == 4
1381 const char16_t* next = ucs4_span(from, __max, _M_maxcode, _M_mode);
1382 #endif
1383 return reinterpret_cast<const char*>(next) - __from;
1387 __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
1389 #if __SIZEOF_WCHAR_T__ == 2
1390 int max = 2; // See __codecvt_utf16_base<char16_t>::do_max_length()
1391 #else
1392 int max = 4; // See __codecvt_utf16_base<char32_t>::do_max_length()
1393 #endif
1394 if (_M_mode & consume_header)
1395 max += sizeof(utf16_bom);
1396 return max;
1398 #endif
1400 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1401 // Converts from UTF-8 to UTF-16.
1403 __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
1405 codecvt_base::result
1406 __codecvt_utf8_utf16_base<char16_t>::
1407 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1408 const intern_type*& __from_next,
1409 extern_type* __to, extern_type* __to_end,
1410 extern_type*& __to_next) const
1412 range<const char16_t> from{ __from, __from_end };
1413 range<char> to{ __to, __to_end };
1414 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1415 __from_next = from.next;
1416 __to_next = to.next;
1417 return res;
1420 codecvt_base::result
1421 __codecvt_utf8_utf16_base<char16_t>::
1422 do_unshift(state_type&, extern_type* __to, extern_type*,
1423 extern_type*& __to_next) const
1425 __to_next = __to;
1426 return noconv;
1429 codecvt_base::result
1430 __codecvt_utf8_utf16_base<char16_t>::
1431 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1432 const extern_type*& __from_next,
1433 intern_type* __to, intern_type* __to_end,
1434 intern_type*& __to_next) const
1436 range<const char> from{ __from, __from_end };
1437 range<char16_t> to{ __to, __to_end };
1438 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1439 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1440 mode = codecvt_mode(mode | little_endian);
1441 #endif
1442 auto res = utf16_in(from, to, _M_maxcode, mode);
1443 __from_next = from.next;
1444 __to_next = to.next;
1445 return res;
1449 __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
1450 { return 0; } // UTF-8 is not a fixed-width encoding
1452 bool
1453 __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
1454 { return false; }
1457 __codecvt_utf8_utf16_base<char16_t>::
1458 do_length(state_type&, const extern_type* __from,
1459 const extern_type* __end, size_t __max) const
1461 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1462 return __end - __from;
1466 __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
1468 // A single character can be 1 or 2 UTF-16 code units,
1469 // requiring up to 4 UTF-8 code units.
1470 int max = 4;
1471 if (_M_mode & consume_header)
1472 max += sizeof(utf8_bom);
1473 return max;
1476 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1477 // Converts from UTF-8 to UTF-16.
1479 __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
1481 codecvt_base::result
1482 __codecvt_utf8_utf16_base<char32_t>::
1483 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1484 const intern_type*& __from_next,
1485 extern_type* __to, extern_type* __to_end,
1486 extern_type*& __to_next) const
1488 range<const char32_t> from{ __from, __from_end };
1489 range<char> to{ __to, __to_end };
1490 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1491 __from_next = from.next;
1492 __to_next = to.next;
1493 return res;
1496 codecvt_base::result
1497 __codecvt_utf8_utf16_base<char32_t>::
1498 do_unshift(state_type&, extern_type* __to, extern_type*,
1499 extern_type*& __to_next) const
1501 __to_next = __to;
1502 return noconv;
1505 codecvt_base::result
1506 __codecvt_utf8_utf16_base<char32_t>::
1507 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1508 const extern_type*& __from_next,
1509 intern_type* __to, intern_type* __to_end,
1510 intern_type*& __to_next) const
1512 range<const char> from{ __from, __from_end };
1513 range<char32_t> to{ __to, __to_end };
1514 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1515 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1516 mode = codecvt_mode(mode | little_endian);
1517 #endif
1518 auto res = utf16_in(from, to, _M_maxcode, mode);
1519 __from_next = from.next;
1520 __to_next = to.next;
1521 return res;
1525 __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
1526 { return 0; } // UTF-8 is not a fixed-width encoding
1528 bool
1529 __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
1530 { return false; }
1533 __codecvt_utf8_utf16_base<char32_t>::
1534 do_length(state_type&, const extern_type* __from,
1535 const extern_type* __end, size_t __max) const
1537 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1538 return __end - __from;
1542 __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
1544 // A single character can be 1 or 2 UTF-16 code units,
1545 // requiring up to 4 UTF-8 code units.
1546 int max = 4;
1547 if (_M_mode & consume_header)
1548 max += sizeof(utf8_bom);
1549 return max;
1552 #ifdef _GLIBCXX_USE_WCHAR_T
1553 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1554 // Converts from UTF-8 to UTF-16.
1556 __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
1558 codecvt_base::result
1559 __codecvt_utf8_utf16_base<wchar_t>::
1560 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1561 const intern_type*& __from_next,
1562 extern_type* __to, extern_type* __to_end,
1563 extern_type*& __to_next) const
1565 range<const wchar_t> from{ __from, __from_end };
1566 range<char> to{ __to, __to_end };
1567 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1568 __from_next = from.next;
1569 __to_next = to.next;
1570 return res;
1573 codecvt_base::result
1574 __codecvt_utf8_utf16_base<wchar_t>::
1575 do_unshift(state_type&, extern_type* __to, extern_type*,
1576 extern_type*& __to_next) const
1578 __to_next = __to;
1579 return noconv;
1582 codecvt_base::result
1583 __codecvt_utf8_utf16_base<wchar_t>::
1584 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1585 const extern_type*& __from_next,
1586 intern_type* __to, intern_type* __to_end,
1587 intern_type*& __to_next) const
1589 range<const char> from{ __from, __from_end };
1590 range<wchar_t> to{ __to, __to_end };
1591 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1592 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1593 mode = codecvt_mode(mode | little_endian);
1594 #endif
1595 auto res = utf16_in(from, to, _M_maxcode, mode);
1596 __from_next = from.next;
1597 __to_next = to.next;
1598 return res;
1602 __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
1603 { return 0; } // UTF-8 is not a fixed-width encoding
1605 bool
1606 __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
1607 { return false; }
1610 __codecvt_utf8_utf16_base<wchar_t>::
1611 do_length(state_type&, const extern_type* __from,
1612 const extern_type* __end, size_t __max) const
1614 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1615 return __end - __from;
1619 __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
1621 // A single character can be 1 or 2 UTF-16 code units,
1622 // requiring up to 4 UTF-8 code units.
1623 int max = 4;
1624 if (_M_mode & consume_header)
1625 max += sizeof(utf8_bom);
1626 return max;
1628 #endif
1630 inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
1631 inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
1632 template class codecvt_byname<char16_t, char, mbstate_t>;
1633 template class codecvt_byname<char32_t, char, mbstate_t>;
1635 _GLIBCXX_END_NAMESPACE_VERSION
1637 #endif // _GLIBCXX_USE_C99_STDINT_TR1