Fix warning with -Wsign-compare -Wsystem-headers
[official-gcc.git] / libstdc++-v3 / src / c++11 / codecvt.cc
blob503f2fe1ff34d4de7c5bd2338141e4e39beab7e8
1 // Locale support (codecvt) -*- C++ -*-
3 // Copyright (C) 2015-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 #include <codecvt>
26 #include <cstring> // std::memcpy, std::memcmp
27 #include <bits/stl_algobase.h> // std::min
29 namespace std _GLIBCXX_VISIBILITY(default)
31 _GLIBCXX_BEGIN_NAMESPACE_VERSION
33 // The standard doesn't define these operators, which is annoying.
34 static underlying_type<codecvt_mode>::type
35 to_integer(codecvt_mode m)
36 { return static_cast<underlying_type<codecvt_mode>::type>(m); }
38 static codecvt_mode& operator&=(codecvt_mode& m, codecvt_mode n)
39 { return m = codecvt_mode(to_integer(m) & to_integer(n)); }
41 static codecvt_mode& operator|=(codecvt_mode& m, codecvt_mode n)
42 { return m = codecvt_mode(to_integer(m) | to_integer(n)); }
44 static codecvt_mode operator~(codecvt_mode m)
45 { return codecvt_mode(~to_integer(m)); }
47 namespace
49 // Largest code point that fits in a single UTF-16 code unit.
50 const char32_t max_single_utf16_unit = 0xFFFF;
52 const char32_t max_code_point = 0x10FFFF;
54 // The functions below rely on maxcode < incomplete_mb_character
55 // (which is enforced by the codecvt_utf* classes on construction).
56 const char32_t incomplete_mb_character = char32_t(-2);
57 const char32_t invalid_mb_sequence = char32_t(-1);
59 // Utility type for reading and writing code units of type Elem from
60 // a range defined by a pair of pointers.
61 template<typename Elem, bool Aligned = true>
62 struct range
64 Elem* next;
65 Elem* end;
67 // Write a code unit.
68 range& operator=(Elem e)
70 *next++ = e;
71 return *this;
74 // Read the next code unit.
75 Elem operator*() const { return *next; }
77 // Read the Nth code unit.
78 Elem operator[](size_t n) const { return next[n]; }
80 // Move to the next code unit.
81 range& operator++()
83 ++next;
84 return *this;
87 // Move to the Nth code unit.
88 range& operator+=(size_t n)
90 next += n;
91 return *this;
94 // The number of code units remaining.
95 size_t size() const { return end - next; }
97 // The number of bytes remaining.
98 size_t nbytes() const { return (const char*)end - (const char*)next; }
101 // This specialization is used when accessing char16_t values through
102 // pointers to char, which might not be correctly aligned for char16_t.
103 template<typename Elem>
104 struct range<Elem, false>
106 using value_type = typename remove_const<Elem>::type;
108 using char_pointer = typename
109 conditional<is_const<Elem>::value, const char*, char*>::type;
111 char_pointer next;
112 char_pointer end;
114 // Write a code unit.
115 range& operator=(Elem e)
117 memcpy(next, &e, sizeof(Elem));
118 ++*this;
119 return *this;
122 // Read the next code unit.
123 Elem operator*() const
125 value_type e;
126 memcpy(&e, next, sizeof(Elem));
127 return e;
130 // Read the Nth code unit.
131 Elem operator[](size_t n) const
133 value_type e;
134 memcpy(&e, next + n * sizeof(Elem), sizeof(Elem));
135 return e;
138 // Move to the next code unit.
139 range& operator++()
141 next += sizeof(Elem);
142 return *this;
145 // Move to the Nth code unit.
146 range& operator+=(size_t n)
148 next += n * sizeof(Elem);
149 return *this;
152 // The number of code units remaining.
153 size_t size() const { return nbytes() / sizeof(Elem); }
155 // The number of bytes remaining.
156 size_t nbytes() const { return end - next; }
159 // Multibyte sequences can have "header" consisting of Byte Order Mark
160 const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
161 const unsigned char utf16_bom[2] = { 0xFE, 0xFF };
162 const unsigned char utf16le_bom[2] = { 0xFF, 0xFE };
164 // Write a BOM (space permitting).
165 template<typename C, bool A, size_t N>
166 bool
167 write_bom(range<C, A>& to, const unsigned char (&bom)[N])
169 static_assert( (N / sizeof(C)) != 0, "" );
170 static_assert( (N % sizeof(C)) == 0, "" );
172 if (to.nbytes() < N)
173 return false;
174 memcpy(to.next, bom, N);
175 to += (N / sizeof(C));
176 return true;
179 // Try to read a BOM.
180 template<typename C, bool A, size_t N>
181 bool
182 read_bom(range<C, A>& from, const unsigned char (&bom)[N])
184 static_assert( (N / sizeof(C)) != 0, "" );
185 static_assert( (N % sizeof(C)) == 0, "" );
187 if (from.nbytes() >= N && !memcmp(from.next, bom, N))
189 from += (N / sizeof(C));
190 return true;
192 return false;
195 // If generate_header is set in mode write out UTF-8 BOM.
196 bool
197 write_utf8_bom(range<char>& to, codecvt_mode mode)
199 if (mode & generate_header)
200 return write_bom(to, utf8_bom);
201 return true;
204 // If generate_header is set in mode write out the UTF-16 BOM indicated
205 // by whether little_endian is set in mode.
206 template<bool Aligned>
207 bool
208 write_utf16_bom(range<char16_t, Aligned>& to, codecvt_mode mode)
210 if (mode & generate_header)
212 if (mode & little_endian)
213 return write_bom(to, utf16le_bom);
214 else
215 return write_bom(to, utf16_bom);
217 return true;
220 // If consume_header is set in mode update from.next to after any BOM.
221 void
222 read_utf8_bom(range<const char>& from, codecvt_mode mode)
224 if (mode & consume_header)
225 read_bom(from, utf8_bom);
228 // If consume_header is not set in mode, no effects.
229 // Otherwise, if *from.next is a UTF-16 BOM increment from.next and then:
230 // - if the UTF-16BE BOM was found unset little_endian in mode, or
231 // - if the UTF-16LE BOM was found set little_endian in mode.
232 template<bool Aligned>
233 void
234 read_utf16_bom(range<const char16_t, Aligned>& from, codecvt_mode& mode)
236 if (mode & consume_header)
238 if (read_bom(from, utf16_bom))
239 mode &= ~little_endian;
240 else if (read_bom(from, utf16le_bom))
241 mode |= little_endian;
245 // Read a codepoint from a UTF-8 multibyte sequence.
246 // Updates from.next if the codepoint is not greater than maxcode.
247 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
248 char32_t
249 read_utf8_code_point(range<const char>& from, unsigned long maxcode)
251 const size_t avail = from.size();
252 if (avail == 0)
253 return incomplete_mb_character;
254 unsigned char c1 = from[0];
255 // https://en.wikipedia.org/wiki/UTF-8#Sample_code
256 if (c1 < 0x80)
258 ++from;
259 return c1;
261 else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
262 return invalid_mb_sequence;
263 else if (c1 < 0xE0) // 2-byte sequence
265 if (avail < 2)
266 return incomplete_mb_character;
267 unsigned char c2 = from[1];
268 if ((c2 & 0xC0) != 0x80)
269 return invalid_mb_sequence;
270 char32_t c = (c1 << 6) + c2 - 0x3080;
271 if (c <= maxcode)
272 from += 2;
273 return c;
275 else if (c1 < 0xF0) // 3-byte sequence
277 if (avail < 3)
278 return incomplete_mb_character;
279 unsigned char c2 = from[1];
280 if ((c2 & 0xC0) != 0x80)
281 return invalid_mb_sequence;
282 if (c1 == 0xE0 && c2 < 0xA0) // overlong
283 return invalid_mb_sequence;
284 unsigned char c3 = from[2];
285 if ((c3 & 0xC0) != 0x80)
286 return invalid_mb_sequence;
287 char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
288 if (c <= maxcode)
289 from += 3;
290 return c;
292 else if (c1 < 0xF5) // 4-byte sequence
294 if (avail < 4)
295 return incomplete_mb_character;
296 unsigned char c2 = from[1];
297 if ((c2 & 0xC0) != 0x80)
298 return invalid_mb_sequence;
299 if (c1 == 0xF0 && c2 < 0x90) // overlong
300 return invalid_mb_sequence;
301 if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
302 return invalid_mb_sequence;
303 unsigned char c3 = from[2];
304 if ((c3 & 0xC0) != 0x80)
305 return invalid_mb_sequence;
306 unsigned char c4 = from[3];
307 if ((c4 & 0xC0) != 0x80)
308 return invalid_mb_sequence;
309 char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
310 if (c <= maxcode)
311 from += 4;
312 return c;
314 else // > U+10FFFF
315 return invalid_mb_sequence;
318 bool
319 write_utf8_code_point(range<char>& to, char32_t code_point)
321 if (code_point < 0x80)
323 if (to.size() < 1)
324 return false;
325 to = code_point;
327 else if (code_point <= 0x7FF)
329 if (to.size() < 2)
330 return false;
331 to = (code_point >> 6) + 0xC0;
332 to = (code_point & 0x3F) + 0x80;
334 else if (code_point <= 0xFFFF)
336 if (to.size() < 3)
337 return false;
338 to = (code_point >> 12) + 0xE0;
339 to = ((code_point >> 6) & 0x3F) + 0x80;
340 to = (code_point & 0x3F) + 0x80;
342 else if (code_point <= 0x10FFFF)
344 if (to.size() < 4)
345 return false;
346 to = (code_point >> 18) + 0xF0;
347 to = ((code_point >> 12) & 0x3F) + 0x80;
348 to = ((code_point >> 6) & 0x3F) + 0x80;
349 to = (code_point & 0x3F) + 0x80;
351 else
352 return false;
353 return true;
356 inline char16_t
357 adjust_byte_order(char16_t c, codecvt_mode mode)
359 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
360 return (mode & little_endian) ? __builtin_bswap16(c) : c;
361 #else
362 return (mode & little_endian) ? c : __builtin_bswap16(c);
363 #endif
366 // Return true if c is a high-surrogate (aka leading) code point.
367 inline bool
368 is_high_surrogate(char32_t c)
370 return c >= 0xD800 && c <= 0xDBFF;
373 // Return true if c is a low-surrogate (aka trailing) code point.
374 inline bool
375 is_low_surrogate(char32_t c)
377 return c >= 0xDC00 && c <= 0xDFFF;
380 inline char32_t
381 surrogate_pair_to_code_point(char32_t high, char32_t low)
383 return (high << 10) + low - 0x35FDC00;
386 // Read a codepoint from a UTF-16 multibyte sequence.
387 // The sequence's endianness is indicated by (mode & little_endian).
388 // Updates from.next if the codepoint is not greater than maxcode.
389 // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
390 template<bool Aligned>
391 char32_t
392 read_utf16_code_point(range<const char16_t, Aligned>& from,
393 unsigned long maxcode, codecvt_mode mode)
395 const size_t avail = from.size();
396 if (avail == 0)
397 return incomplete_mb_character;
398 int inc = 1;
399 char32_t c = adjust_byte_order(from[0], mode);
400 if (is_high_surrogate(c))
402 if (avail < 2)
403 return incomplete_mb_character;
404 const char16_t c2 = adjust_byte_order(from[1], mode);
405 if (is_low_surrogate(c2))
407 c = surrogate_pair_to_code_point(c, c2);
408 inc = 2;
410 else
411 return invalid_mb_sequence;
413 else if (is_low_surrogate(c))
414 return invalid_mb_sequence;
415 if (c <= maxcode)
416 from += inc;
417 return c;
420 template<typename C, bool A>
421 bool
422 write_utf16_code_point(range<C, A>& to, char32_t codepoint, codecvt_mode mode)
424 static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
426 if (codepoint <= max_single_utf16_unit)
428 if (to.size() > 0)
430 to = adjust_byte_order(codepoint, mode);
431 return true;
434 else if (to.size() > 1)
436 // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
437 const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
438 char16_t lead = LEAD_OFFSET + (codepoint >> 10);
439 char16_t trail = 0xDC00 + (codepoint & 0x3FF);
440 to = adjust_byte_order(lead, mode);
441 to = adjust_byte_order(trail, mode);
442 return true;
444 return false;
447 // utf8 -> ucs4
448 codecvt_base::result
449 ucs4_in(range<const char>& from, range<char32_t>& to,
450 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
452 read_utf8_bom(from, mode);
453 while (from.size() && to.size())
455 const char32_t codepoint = read_utf8_code_point(from, maxcode);
456 if (codepoint == incomplete_mb_character)
457 return codecvt_base::partial;
458 if (codepoint > maxcode)
459 return codecvt_base::error;
460 to = codepoint;
462 return from.size() ? codecvt_base::partial : codecvt_base::ok;
465 // ucs4 -> utf8
466 codecvt_base::result
467 ucs4_out(range<const char32_t>& from, range<char>& to,
468 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
470 if (!write_utf8_bom(to, mode))
471 return codecvt_base::partial;
472 while (from.size())
474 const char32_t c = from[0];
475 if (c > maxcode)
476 return codecvt_base::error;
477 if (!write_utf8_code_point(to, c))
478 return codecvt_base::partial;
479 ++from;
481 return codecvt_base::ok;
484 // utf16 -> ucs4
485 codecvt_base::result
486 ucs4_in(range<const char16_t, false>& from, range<char32_t>& to,
487 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
489 read_utf16_bom(from, mode);
490 while (from.size() && to.size())
492 const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
493 if (codepoint == incomplete_mb_character)
494 return codecvt_base::partial;
495 if (codepoint > maxcode)
496 return codecvt_base::error;
497 to = codepoint;
499 return from.size() ? codecvt_base::partial : codecvt_base::ok;
502 // ucs4 -> utf16
503 codecvt_base::result
504 ucs4_out(range<const char32_t>& from, range<char16_t, false>& to,
505 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
507 if (!write_utf16_bom(to, mode))
508 return codecvt_base::partial;
509 while (from.size())
511 const char32_t c = from[0];
512 if (c > maxcode)
513 return codecvt_base::error;
514 if (!write_utf16_code_point(to, c, mode))
515 return codecvt_base::partial;
516 ++from;
518 return codecvt_base::ok;
521 // Flag indicating whether to process UTF-16 or UCS2
522 enum class surrogates { allowed, disallowed };
524 // utf8 -> utf16 (or utf8 -> ucs2 if s == surrogates::disallowed)
525 template<typename C>
526 codecvt_base::result
527 utf16_in(range<const char>& from, range<C>& to,
528 unsigned long maxcode = max_code_point, codecvt_mode mode = {},
529 surrogates s = surrogates::allowed)
531 read_utf8_bom(from, mode);
532 while (from.size() && to.size())
534 auto orig = from;
535 const char32_t codepoint = read_utf8_code_point(from, maxcode);
536 if (codepoint == incomplete_mb_character)
538 if (s == surrogates::allowed)
539 return codecvt_base::partial;
540 else
541 return codecvt_base::error; // No surrogates in UCS2
543 if (codepoint > maxcode)
544 return codecvt_base::error;
545 if (!write_utf16_code_point(to, codepoint, mode))
547 from = orig; // rewind to previous position
548 return codecvt_base::partial;
551 return codecvt_base::ok;
554 // utf16 -> utf8 (or ucs2 -> utf8 if s == surrogates::disallowed)
555 template<typename C>
556 codecvt_base::result
557 utf16_out(range<const C>& from, range<char>& to,
558 unsigned long maxcode = max_code_point, codecvt_mode mode = {},
559 surrogates s = surrogates::allowed)
561 if (!write_utf8_bom(to, mode))
562 return codecvt_base::partial;
563 while (from.size())
565 char32_t c = from[0];
566 int inc = 1;
567 if (is_high_surrogate(c))
569 if (s == surrogates::disallowed)
570 return codecvt_base::error; // No surrogates in UCS-2
572 if (from.size() < 2)
573 return codecvt_base::ok; // stop converting at this point
575 const char32_t c2 = from[1];
576 if (is_low_surrogate(c2))
578 c = surrogate_pair_to_code_point(c, c2);
579 inc = 2;
581 else
582 return codecvt_base::error;
584 else if (is_low_surrogate(c))
585 return codecvt_base::error;
586 if (c > maxcode)
587 return codecvt_base::error;
588 if (!write_utf8_code_point(to, c))
589 return codecvt_base::partial;
590 from += inc;
592 return codecvt_base::ok;
595 // return pos such that [begin,pos) is valid UTF-16 string no longer than max
596 const char*
597 utf16_span(const char* begin, const char* end, size_t max,
598 char32_t maxcode = max_code_point, codecvt_mode mode = {})
600 range<const char> from{ begin, end };
601 read_utf8_bom(from, mode);
602 size_t count = 0;
603 while (count+1 < max)
605 char32_t c = read_utf8_code_point(from, maxcode);
606 if (c > maxcode)
607 return from.next;
608 else if (c > max_single_utf16_unit)
609 ++count;
610 ++count;
612 if (count+1 == max) // take one more character if it fits in a single unit
613 read_utf8_code_point(from, std::min(max_single_utf16_unit, maxcode));
614 return from.next;
617 // utf8 -> ucs2
618 codecvt_base::result
619 ucs2_in(range<const char>& from, range<char16_t>& to,
620 char32_t maxcode = max_code_point, codecvt_mode mode = {})
622 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
623 maxcode = std::min(max_single_utf16_unit, maxcode);
624 return utf16_in(from, to, maxcode, mode, surrogates::disallowed);
627 // ucs2 -> utf8
628 codecvt_base::result
629 ucs2_out(range<const char16_t>& from, range<char>& to,
630 char32_t maxcode = max_code_point, codecvt_mode mode = {})
632 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
633 maxcode = std::min(max_single_utf16_unit, maxcode);
634 return utf16_out(from, to, maxcode, mode, surrogates::disallowed);
637 // ucs2 -> utf16
638 codecvt_base::result
639 ucs2_out(range<const char16_t>& from, range<char16_t, false>& to,
640 char32_t maxcode = max_code_point, codecvt_mode mode = {})
642 if (!write_utf16_bom(to, mode))
643 return codecvt_base::partial;
644 while (from.size() && to.size())
646 char16_t c = from[0];
647 if (is_high_surrogate(c))
648 return codecvt_base::error;
649 if (c > maxcode)
650 return codecvt_base::error;
651 to = adjust_byte_order(c, mode);
652 ++from;
654 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
657 // utf16 -> ucs2
658 codecvt_base::result
659 ucs2_in(range<const char16_t, false>& from, range<char16_t>& to,
660 char32_t maxcode = max_code_point, codecvt_mode mode = {})
662 read_utf16_bom(from, mode);
663 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
664 maxcode = std::min(max_single_utf16_unit, maxcode);
665 while (from.size() && to.size())
667 const char32_t c = read_utf16_code_point(from, maxcode, mode);
668 if (c == incomplete_mb_character)
669 return codecvt_base::error; // UCS-2 only supports single units.
670 if (c > maxcode)
671 return codecvt_base::error;
672 to = c;
674 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
677 const char16_t*
678 ucs2_span(range<const char16_t, false>& from, size_t max,
679 char32_t maxcode, codecvt_mode mode)
681 read_utf16_bom(from, mode);
682 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
683 maxcode = std::min(max_single_utf16_unit, maxcode);
684 char32_t c = 0;
685 while (max-- && c <= maxcode)
686 c = read_utf16_code_point(from, maxcode, mode);
687 return reinterpret_cast<const char16_t*>(from.next);
690 const char*
691 ucs2_span(const char* begin, const char* end, size_t max,
692 char32_t maxcode, codecvt_mode mode)
694 range<const char> from{ begin, end };
695 read_utf8_bom(from, mode);
696 // UCS-2 only supports characters in the BMP, i.e. one UTF-16 code unit:
697 maxcode = std::min(max_single_utf16_unit, maxcode);
698 char32_t c = 0;
699 while (max-- && c <= maxcode)
700 c = read_utf8_code_point(from, maxcode);
701 return from.next;
704 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
705 const char*
706 ucs4_span(const char* begin, const char* end, size_t max,
707 char32_t maxcode = max_code_point, codecvt_mode mode = {})
709 range<const char> from{ begin, end };
710 read_utf8_bom(from, mode);
711 char32_t c = 0;
712 while (max-- && c <= maxcode)
713 c = read_utf8_code_point(from, maxcode);
714 return from.next;
717 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
718 const char16_t*
719 ucs4_span(range<const char16_t, false>& from, size_t max,
720 char32_t maxcode = max_code_point, codecvt_mode mode = {})
722 read_utf16_bom(from, mode);
723 char32_t c = 0;
724 while (max-- && c <= maxcode)
725 c = read_utf16_code_point(from, maxcode, mode);
726 return reinterpret_cast<const char16_t*>(from.next);
730 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
731 // Converts from UTF-8 to UTF-16.
733 locale::id codecvt<char16_t, char, mbstate_t>::id;
735 codecvt<char16_t, char, mbstate_t>::~codecvt() { }
737 codecvt_base::result
738 codecvt<char16_t, char, mbstate_t>::
739 do_out(state_type&,
740 const intern_type* __from,
741 const intern_type* __from_end, const intern_type*& __from_next,
742 extern_type* __to, extern_type* __to_end,
743 extern_type*& __to_next) const
745 range<const char16_t> from{ __from, __from_end };
746 range<char> to{ __to, __to_end };
747 auto res = utf16_out(from, to);
748 __from_next = from.next;
749 __to_next = to.next;
750 return res;
753 codecvt_base::result
754 codecvt<char16_t, char, mbstate_t>::
755 do_unshift(state_type&, extern_type* __to, extern_type*,
756 extern_type*& __to_next) const
758 __to_next = __to;
759 return noconv; // we don't use mbstate_t for the unicode facets
762 codecvt_base::result
763 codecvt<char16_t, char, mbstate_t>::
764 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
765 const extern_type*& __from_next,
766 intern_type* __to, intern_type* __to_end,
767 intern_type*& __to_next) const
769 range<const char> from{ __from, __from_end };
770 range<char16_t> to{ __to, __to_end };
771 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
772 codecvt_mode mode = {};
773 #else
774 codecvt_mode mode = little_endian;
775 #endif
776 auto res = utf16_in(from, to, max_code_point, mode);
777 __from_next = from.next;
778 __to_next = to.next;
779 return res;
783 codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
784 { return 0; } // UTF-8 is not a fixed-width encoding
786 bool
787 codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
788 { return false; }
791 codecvt<char16_t, char, mbstate_t>::
792 do_length(state_type&, const extern_type* __from,
793 const extern_type* __end, size_t __max) const
795 __end = utf16_span(__from, __end, __max);
796 return __end - __from;
800 codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
802 // A single character (one or two UTF-16 code units) requires
803 // up to four UTF-8 code units.
804 return 4;
807 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
808 // Converts from UTF-8 to UTF-32 (aka UCS-4).
810 locale::id codecvt<char32_t, char, mbstate_t>::id;
812 codecvt<char32_t, char, mbstate_t>::~codecvt() { }
814 codecvt_base::result
815 codecvt<char32_t, char, mbstate_t>::
816 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
817 const intern_type*& __from_next,
818 extern_type* __to, extern_type* __to_end,
819 extern_type*& __to_next) const
821 range<const char32_t> from{ __from, __from_end };
822 range<char> to{ __to, __to_end };
823 auto res = ucs4_out(from, to);
824 __from_next = from.next;
825 __to_next = to.next;
826 return res;
829 codecvt_base::result
830 codecvt<char32_t, char, mbstate_t>::
831 do_unshift(state_type&, extern_type* __to, extern_type*,
832 extern_type*& __to_next) const
834 __to_next = __to;
835 return noconv;
838 codecvt_base::result
839 codecvt<char32_t, char, mbstate_t>::
840 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
841 const extern_type*& __from_next,
842 intern_type* __to, intern_type* __to_end,
843 intern_type*& __to_next) const
845 range<const char> from{ __from, __from_end };
846 range<char32_t> to{ __to, __to_end };
847 auto res = ucs4_in(from, to);
848 __from_next = from.next;
849 __to_next = to.next;
850 return res;
854 codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
855 { return 0; } // UTF-8 is not a fixed-width encoding
857 bool
858 codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
859 { return false; }
862 codecvt<char32_t, char, mbstate_t>::
863 do_length(state_type&, const extern_type* __from,
864 const extern_type* __end, size_t __max) const
866 __end = ucs4_span(__from, __end, __max);
867 return __end - __from;
871 codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
873 // A single character (one UTF-32 code unit) requires
874 // up to 4 UTF-8 code units.
875 return 4;
878 // Define members of codecvt_utf8<char16_t> base class implementation.
879 // Converts from UTF-8 to UCS-2.
881 __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
883 codecvt_base::result
884 __codecvt_utf8_base<char16_t>::
885 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
886 const intern_type*& __from_next,
887 extern_type* __to, extern_type* __to_end,
888 extern_type*& __to_next) const
890 range<const char16_t> from{ __from, __from_end };
891 range<char> to{ __to, __to_end };
892 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
893 __from_next = from.next;
894 __to_next = to.next;
895 return res;
898 codecvt_base::result
899 __codecvt_utf8_base<char16_t>::
900 do_unshift(state_type&, extern_type* __to, extern_type*,
901 extern_type*& __to_next) const
903 __to_next = __to;
904 return noconv;
907 codecvt_base::result
908 __codecvt_utf8_base<char16_t>::
909 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
910 const extern_type*& __from_next,
911 intern_type* __to, intern_type* __to_end,
912 intern_type*& __to_next) const
914 range<const char> from{ __from, __from_end };
915 range<char16_t> to{ __to, __to_end };
916 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
917 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
918 mode = codecvt_mode(mode | little_endian);
919 #endif
920 auto res = ucs2_in(from, to, _M_maxcode, mode);
921 __from_next = from.next;
922 __to_next = to.next;
923 return res;
927 __codecvt_utf8_base<char16_t>::do_encoding() const throw()
928 { return 0; } // UTF-8 is not a fixed-width encoding
930 bool
931 __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
932 { return false; }
935 __codecvt_utf8_base<char16_t>::
936 do_length(state_type&, const extern_type* __from,
937 const extern_type* __end, size_t __max) const
939 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
940 return __end - __from;
944 __codecvt_utf8_base<char16_t>::do_max_length() const throw()
946 // A single UCS-2 character requires up to three UTF-8 code units.
947 // (UCS-2 cannot represent characters that use four UTF-8 code units).
948 int max = 3;
949 if (_M_mode & consume_header)
950 max += sizeof(utf8_bom);
951 return max;
954 // Define members of codecvt_utf8<char32_t> base class implementation.
955 // Converts from UTF-8 to UTF-32 (aka UCS-4).
957 __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
959 codecvt_base::result
960 __codecvt_utf8_base<char32_t>::
961 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
962 const intern_type*& __from_next,
963 extern_type* __to, extern_type* __to_end,
964 extern_type*& __to_next) const
966 range<const char32_t> from{ __from, __from_end };
967 range<char> to{ __to, __to_end };
968 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
969 __from_next = from.next;
970 __to_next = to.next;
971 return res;
974 codecvt_base::result
975 __codecvt_utf8_base<char32_t>::
976 do_unshift(state_type&, extern_type* __to, extern_type*,
977 extern_type*& __to_next) const
979 __to_next = __to;
980 return noconv;
983 codecvt_base::result
984 __codecvt_utf8_base<char32_t>::
985 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
986 const extern_type*& __from_next,
987 intern_type* __to, intern_type* __to_end,
988 intern_type*& __to_next) const
990 range<const char> from{ __from, __from_end };
991 range<char32_t> to{ __to, __to_end };
992 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
993 __from_next = from.next;
994 __to_next = to.next;
995 return res;
999 __codecvt_utf8_base<char32_t>::do_encoding() const throw()
1000 { return 0; } // UTF-8 is not a fixed-width encoding
1002 bool
1003 __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
1004 { return false; }
1007 __codecvt_utf8_base<char32_t>::
1008 do_length(state_type&, const extern_type* __from,
1009 const extern_type* __end, size_t __max) const
1011 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
1012 return __end - __from;
1016 __codecvt_utf8_base<char32_t>::do_max_length() const throw()
1018 // A single UCS-4 character requires up to four UTF-8 code units.
1019 int max = 4;
1020 if (_M_mode & consume_header)
1021 max += sizeof(utf8_bom);
1022 return max;
1025 #ifdef _GLIBCXX_USE_WCHAR_T
1027 #if __SIZEOF_WCHAR_T__ == 2
1028 static_assert(sizeof(wchar_t) == sizeof(char16_t), "");
1029 #elif __SIZEOF_WCHAR_T__ == 4
1030 static_assert(sizeof(wchar_t) == sizeof(char32_t), "");
1031 #endif
1033 // Define members of codecvt_utf8<wchar_t> base class implementation.
1034 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1036 __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
1038 codecvt_base::result
1039 __codecvt_utf8_base<wchar_t>::
1040 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1041 const intern_type*& __from_next,
1042 extern_type* __to, extern_type* __to_end,
1043 extern_type*& __to_next) const
1045 range<char> to{ __to, __to_end };
1046 #if __SIZEOF_WCHAR_T__ == 2
1047 range<const char16_t> from{
1048 reinterpret_cast<const char16_t*>(__from),
1049 reinterpret_cast<const char16_t*>(__from_end)
1051 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1052 #elif __SIZEOF_WCHAR_T__ == 4
1053 range<const char32_t> from{
1054 reinterpret_cast<const char32_t*>(__from),
1055 reinterpret_cast<const char32_t*>(__from_end)
1057 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1058 #else
1059 return codecvt_base::error;
1060 #endif
1061 __from_next = reinterpret_cast<const wchar_t*>(from.next);
1062 __to_next = to.next;
1063 return res;
1066 codecvt_base::result
1067 __codecvt_utf8_base<wchar_t>::
1068 do_unshift(state_type&, extern_type* __to, extern_type*,
1069 extern_type*& __to_next) const
1071 __to_next = __to;
1072 return noconv;
1075 codecvt_base::result
1076 __codecvt_utf8_base<wchar_t>::
1077 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1078 const extern_type*& __from_next,
1079 intern_type* __to, intern_type* __to_end,
1080 intern_type*& __to_next) const
1082 range<const char> from{ __from, __from_end };
1083 #if __SIZEOF_WCHAR_T__ == 2
1084 range<char16_t> to{
1085 reinterpret_cast<char16_t*>(__to),
1086 reinterpret_cast<char16_t*>(__to_end)
1088 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1089 codecvt_mode mode = {};
1090 #else
1091 codecvt_mode mode = little_endian;
1092 #endif
1093 auto res = ucs2_in(from, to, _M_maxcode, mode);
1094 #elif __SIZEOF_WCHAR_T__ == 4
1095 range<char32_t> to{
1096 reinterpret_cast<char32_t*>(__to),
1097 reinterpret_cast<char32_t*>(__to_end)
1099 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1100 #else
1101 return codecvt_base::error;
1102 #endif
1103 __from_next = from.next;
1104 __to_next = reinterpret_cast<wchar_t*>(to.next);
1105 return res;
1109 __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
1110 { return 0; } // UTF-8 is not a fixed-width encoding
1112 bool
1113 __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
1114 { return false; }
1117 __codecvt_utf8_base<wchar_t>::
1118 do_length(state_type&, const extern_type* __from,
1119 const extern_type* __end, size_t __max) const
1121 #if __SIZEOF_WCHAR_T__ == 2
1122 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
1123 #elif __SIZEOF_WCHAR_T__ == 4
1124 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
1125 #else
1126 __end = __from;
1127 #endif
1128 return __end - __from;
1132 __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
1134 #if __SIZEOF_WCHAR_T__ == 2
1135 int max = 3; // See __codecvt_utf8_base<char16_t>::do_max_length()
1136 #else
1137 int max = 4; // See __codecvt_utf8_base<char32_t>::do_max_length()
1138 #endif
1139 if (_M_mode & consume_header)
1140 max += sizeof(utf8_bom);
1141 return max;
1143 #endif
1145 // Define members of codecvt_utf16<char16_t> base class implementation.
1146 // Converts from UTF-16 to UCS-2.
1148 __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
1150 codecvt_base::result
1151 __codecvt_utf16_base<char16_t>::
1152 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1153 const intern_type*& __from_next,
1154 extern_type* __to, extern_type* __to_end,
1155 extern_type*& __to_next) const
1157 range<const char16_t> from{ __from, __from_end };
1158 range<char16_t, false> to{ __to, __to_end };
1159 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1160 __from_next = from.next;
1161 __to_next = reinterpret_cast<char*>(to.next);
1162 return res;
1165 codecvt_base::result
1166 __codecvt_utf16_base<char16_t>::
1167 do_unshift(state_type&, extern_type* __to, extern_type*,
1168 extern_type*& __to_next) const
1170 __to_next = __to;
1171 return noconv;
1174 codecvt_base::result
1175 __codecvt_utf16_base<char16_t>::
1176 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1177 const extern_type*& __from_next,
1178 intern_type* __to, intern_type* __to_end,
1179 intern_type*& __to_next) const
1181 range<const char16_t, false> from{ __from, __from_end };
1182 range<char16_t> to{ __to, __to_end };
1183 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1184 __from_next = reinterpret_cast<const char*>(from.next);
1185 __to_next = to.next;
1186 if (res == codecvt_base::ok && __from_next != __from_end)
1187 res = codecvt_base::error;
1188 return res;
1192 __codecvt_utf16_base<char16_t>::do_encoding() const throw()
1193 { return 0; } // UTF-16 is not a fixed-width encoding
1195 bool
1196 __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
1197 { return false; }
1200 __codecvt_utf16_base<char16_t>::
1201 do_length(state_type&, const extern_type* __from,
1202 const extern_type* __end, size_t __max) const
1204 range<const char16_t, false> from{ __from, __end };
1205 const char16_t* next = ucs2_span(from, __max, _M_maxcode, _M_mode);
1206 return reinterpret_cast<const char*>(next) - __from;
1210 __codecvt_utf16_base<char16_t>::do_max_length() const throw()
1212 // A single UCS-2 character requires one UTF-16 code unit (so two chars).
1213 // (UCS-2 cannot represent characters that use multiple UTF-16 code units).
1214 int max = 2;
1215 if (_M_mode & consume_header)
1216 max += sizeof(utf16_bom);
1217 return max;
1220 // Define members of codecvt_utf16<char32_t> base class implementation.
1221 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1223 __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
1225 codecvt_base::result
1226 __codecvt_utf16_base<char32_t>::
1227 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1228 const intern_type*& __from_next,
1229 extern_type* __to, extern_type* __to_end,
1230 extern_type*& __to_next) const
1232 range<const char32_t> from{ __from, __from_end };
1233 range<char16_t, false> to{ __to, __to_end };
1234 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1235 __from_next = from.next;
1236 __to_next = reinterpret_cast<char*>(to.next);
1237 return res;
1240 codecvt_base::result
1241 __codecvt_utf16_base<char32_t>::
1242 do_unshift(state_type&, extern_type* __to, extern_type*,
1243 extern_type*& __to_next) const
1245 __to_next = __to;
1246 return noconv;
1249 codecvt_base::result
1250 __codecvt_utf16_base<char32_t>::
1251 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1252 const extern_type*& __from_next,
1253 intern_type* __to, intern_type* __to_end,
1254 intern_type*& __to_next) const
1256 range<const char16_t, false> from{ __from, __from_end };
1257 range<char32_t> to{ __to, __to_end };
1258 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1259 __from_next = reinterpret_cast<const char*>(from.next);
1260 __to_next = to.next;
1261 if (res == codecvt_base::ok && __from_next != __from_end)
1262 res = codecvt_base::error;
1263 return res;
1267 __codecvt_utf16_base<char32_t>::do_encoding() const throw()
1268 { return 0; } // UTF-16 is not a fixed-width encoding
1270 bool
1271 __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
1272 { return false; }
1275 __codecvt_utf16_base<char32_t>::
1276 do_length(state_type&, const extern_type* __from,
1277 const extern_type* __end, size_t __max) const
1279 range<const char16_t, false> from{ __from, __end };
1280 const char16_t* next = ucs4_span(from, __max, _M_maxcode, _M_mode);
1281 return reinterpret_cast<const char*>(next) - __from;
1285 __codecvt_utf16_base<char32_t>::do_max_length() const throw()
1287 // A single UCS-4 character requires one or two UTF-16 code units
1288 // (so up to four chars).
1289 int max = 4;
1290 if (_M_mode & consume_header)
1291 max += sizeof(utf16_bom);
1292 return max;
1295 #ifdef _GLIBCXX_USE_WCHAR_T
1296 // Define members of codecvt_utf16<wchar_t> base class implementation.
1297 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1299 __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
1301 codecvt_base::result
1302 __codecvt_utf16_base<wchar_t>::
1303 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1304 const intern_type*& __from_next,
1305 extern_type* __to, extern_type* __to_end,
1306 extern_type*& __to_next) const
1308 range<char16_t, false> to{ __to, __to_end };
1309 #if __SIZEOF_WCHAR_T__ == 2
1310 range<const char16_t> from{
1311 reinterpret_cast<const char16_t*>(__from),
1312 reinterpret_cast<const char16_t*>(__from_end),
1314 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1315 #elif __SIZEOF_WCHAR_T__ == 4
1316 range<const char32_t> from{
1317 reinterpret_cast<const char32_t*>(__from),
1318 reinterpret_cast<const char32_t*>(__from_end),
1320 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1321 #else
1322 return codecvt_base::error;
1323 #endif
1324 __from_next = reinterpret_cast<const wchar_t*>(from.next);
1325 __to_next = reinterpret_cast<char*>(to.next);
1326 return res;
1329 codecvt_base::result
1330 __codecvt_utf16_base<wchar_t>::
1331 do_unshift(state_type&, extern_type* __to, extern_type*,
1332 extern_type*& __to_next) const
1334 __to_next = __to;
1335 return noconv;
1338 codecvt_base::result
1339 __codecvt_utf16_base<wchar_t>::
1340 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1341 const extern_type*& __from_next,
1342 intern_type* __to, intern_type* __to_end,
1343 intern_type*& __to_next) const
1345 range<const char16_t, false> from{ __from, __from_end };
1346 #if __SIZEOF_WCHAR_T__ == 2
1347 range<char16_t> to{
1348 reinterpret_cast<char16_t*>(__to),
1349 reinterpret_cast<char16_t*>(__to_end),
1351 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1352 #elif __SIZEOF_WCHAR_T__ == 4
1353 range<char32_t> to{
1354 reinterpret_cast<char32_t*>(__to),
1355 reinterpret_cast<char32_t*>(__to_end),
1357 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1358 #else
1359 return codecvt_base::error;
1360 #endif
1361 __from_next = reinterpret_cast<const char*>(from.next);
1362 __to_next = reinterpret_cast<wchar_t*>(to.next);
1363 if (res == codecvt_base::ok && __from_next != __from_end)
1364 res = codecvt_base::error;
1365 return res;
1369 __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
1370 { return 0; } // UTF-16 is not a fixed-width encoding
1372 bool
1373 __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
1374 { return false; }
1377 __codecvt_utf16_base<wchar_t>::
1378 do_length(state_type&, const extern_type* __from,
1379 const extern_type* __end, size_t __max) const
1381 range<const char16_t, false> from{ __from, __end };
1382 #if __SIZEOF_WCHAR_T__ == 2
1383 const char16_t* next = ucs2_span(from, __max, _M_maxcode, _M_mode);
1384 #elif __SIZEOF_WCHAR_T__ == 4
1385 const char16_t* next = ucs4_span(from, __max, _M_maxcode, _M_mode);
1386 #endif
1387 return reinterpret_cast<const char*>(next) - __from;
1391 __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
1393 #if __SIZEOF_WCHAR_T__ == 2
1394 int max = 2; // See __codecvt_utf16_base<char16_t>::do_max_length()
1395 #else
1396 int max = 4; // See __codecvt_utf16_base<char32_t>::do_max_length()
1397 #endif
1398 if (_M_mode & consume_header)
1399 max += sizeof(utf16_bom);
1400 return max;
1402 #endif
1404 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1405 // Converts from UTF-8 to UTF-16.
1407 __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
1409 codecvt_base::result
1410 __codecvt_utf8_utf16_base<char16_t>::
1411 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1412 const intern_type*& __from_next,
1413 extern_type* __to, extern_type* __to_end,
1414 extern_type*& __to_next) const
1416 range<const char16_t> from{ __from, __from_end };
1417 range<char> to{ __to, __to_end };
1418 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1419 __from_next = from.next;
1420 __to_next = to.next;
1421 return res;
1424 codecvt_base::result
1425 __codecvt_utf8_utf16_base<char16_t>::
1426 do_unshift(state_type&, extern_type* __to, extern_type*,
1427 extern_type*& __to_next) const
1429 __to_next = __to;
1430 return noconv;
1433 codecvt_base::result
1434 __codecvt_utf8_utf16_base<char16_t>::
1435 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1436 const extern_type*& __from_next,
1437 intern_type* __to, intern_type* __to_end,
1438 intern_type*& __to_next) const
1440 range<const char> from{ __from, __from_end };
1441 range<char16_t> to{ __to, __to_end };
1442 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1443 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1444 mode = codecvt_mode(mode | little_endian);
1445 #endif
1446 auto res = utf16_in(from, to, _M_maxcode, mode);
1447 __from_next = from.next;
1448 __to_next = to.next;
1449 return res;
1453 __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
1454 { return 0; } // UTF-8 is not a fixed-width encoding
1456 bool
1457 __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
1458 { return false; }
1461 __codecvt_utf8_utf16_base<char16_t>::
1462 do_length(state_type&, const extern_type* __from,
1463 const extern_type* __end, size_t __max) const
1465 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1466 return __end - __from;
1470 __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
1472 // A single character can be 1 or 2 UTF-16 code units,
1473 // requiring up to 4 UTF-8 code units.
1474 int max = 4;
1475 if (_M_mode & consume_header)
1476 max += sizeof(utf8_bom);
1477 return max;
1480 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1481 // Converts from UTF-8 to UTF-16.
1483 __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
1485 codecvt_base::result
1486 __codecvt_utf8_utf16_base<char32_t>::
1487 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1488 const intern_type*& __from_next,
1489 extern_type* __to, extern_type* __to_end,
1490 extern_type*& __to_next) const
1492 range<const char32_t> from{ __from, __from_end };
1493 range<char> to{ __to, __to_end };
1494 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1495 __from_next = from.next;
1496 __to_next = to.next;
1497 return res;
1500 codecvt_base::result
1501 __codecvt_utf8_utf16_base<char32_t>::
1502 do_unshift(state_type&, extern_type* __to, extern_type*,
1503 extern_type*& __to_next) const
1505 __to_next = __to;
1506 return noconv;
1509 codecvt_base::result
1510 __codecvt_utf8_utf16_base<char32_t>::
1511 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1512 const extern_type*& __from_next,
1513 intern_type* __to, intern_type* __to_end,
1514 intern_type*& __to_next) const
1516 range<const char> from{ __from, __from_end };
1517 range<char32_t> to{ __to, __to_end };
1518 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1519 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1520 mode = codecvt_mode(mode | little_endian);
1521 #endif
1522 auto res = utf16_in(from, to, _M_maxcode, mode);
1523 __from_next = from.next;
1524 __to_next = to.next;
1525 return res;
1529 __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
1530 { return 0; } // UTF-8 is not a fixed-width encoding
1532 bool
1533 __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
1534 { return false; }
1537 __codecvt_utf8_utf16_base<char32_t>::
1538 do_length(state_type&, const extern_type* __from,
1539 const extern_type* __end, size_t __max) const
1541 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1542 return __end - __from;
1546 __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
1548 // A single character can be 1 or 2 UTF-16 code units,
1549 // requiring up to 4 UTF-8 code units.
1550 int max = 4;
1551 if (_M_mode & consume_header)
1552 max += sizeof(utf8_bom);
1553 return max;
1556 #ifdef _GLIBCXX_USE_WCHAR_T
1557 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1558 // Converts from UTF-8 to UTF-16.
1560 __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
1562 codecvt_base::result
1563 __codecvt_utf8_utf16_base<wchar_t>::
1564 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1565 const intern_type*& __from_next,
1566 extern_type* __to, extern_type* __to_end,
1567 extern_type*& __to_next) const
1569 range<const wchar_t> from{ __from, __from_end };
1570 range<char> to{ __to, __to_end };
1571 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1572 __from_next = from.next;
1573 __to_next = to.next;
1574 return res;
1577 codecvt_base::result
1578 __codecvt_utf8_utf16_base<wchar_t>::
1579 do_unshift(state_type&, extern_type* __to, extern_type*,
1580 extern_type*& __to_next) const
1582 __to_next = __to;
1583 return noconv;
1586 codecvt_base::result
1587 __codecvt_utf8_utf16_base<wchar_t>::
1588 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1589 const extern_type*& __from_next,
1590 intern_type* __to, intern_type* __to_end,
1591 intern_type*& __to_next) const
1593 range<const char> from{ __from, __from_end };
1594 range<wchar_t> to{ __to, __to_end };
1595 codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1596 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1597 mode = codecvt_mode(mode | little_endian);
1598 #endif
1599 auto res = utf16_in(from, to, _M_maxcode, mode);
1600 __from_next = from.next;
1601 __to_next = to.next;
1602 return res;
1606 __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
1607 { return 0; } // UTF-8 is not a fixed-width encoding
1609 bool
1610 __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
1611 { return false; }
1614 __codecvt_utf8_utf16_base<wchar_t>::
1615 do_length(state_type&, const extern_type* __from,
1616 const extern_type* __end, size_t __max) const
1618 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1619 return __end - __from;
1623 __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
1625 // A single character can be 1 or 2 UTF-16 code units,
1626 // requiring up to 4 UTF-8 code units.
1627 int max = 4;
1628 if (_M_mode & consume_header)
1629 max += sizeof(utf8_bom);
1630 return max;
1632 #endif
1634 inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
1635 inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
1636 template class codecvt_byname<char16_t, char, mbstate_t>;
1637 template class codecvt_byname<char32_t, char, mbstate_t>;
1639 _GLIBCXX_END_NAMESPACE_VERSION