libstdc++-v3/src/c++11/codecvt.cc

   1 // Locale support (codecvt) -*- C++ -*-
   2
   3 // Copyright (C) 2015-2016 Free Software Foundation, Inc.
   4 //
   5 // This file is part of the GNU ISO C++ Library.  This library is free
   6 // software; you can redistribute it and/or modify it under the
   7 // terms of the GNU General Public License as published by the
   8 // Free Software Foundation; either version 3, or (at your option)
   9 // any later version.
  10
  11 // This library is distributed in the hope that it will be useful,
  12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 // GNU General Public License for more details.
  15
  16 // Under Section 7 of GPL version 3, you are granted additional
  17 // permissions described in the GCC Runtime Library Exception, version
  18 // 3.1, as published by the Free Software Foundation.
  19
  20 // You should have received a copy of the GNU General Public License and
  21 // a copy of the GCC Runtime Library Exception along with this program;
  22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23 // <http://www.gnu.org/licenses/>.
  24
  25 #include <codecvt>
  26 #include <cstring>              // std::memcpy, std::memcmp
  27 #include <bits/stl_algobase.h>  // std::max
  28
  29 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
  30 namespace std _GLIBCXX_VISIBILITY(default)
  31 {
  32 _GLIBCXX_BEGIN_NAMESPACE_VERSION
  33
  34 namespace
  35 {
  36   // Largest code point that fits in a single UTF-16 code unit.
  37   const char32_t max_single_utf16_unit = 0xFFFF;
  38
  39   const char32_t max_code_point = 0x10FFFF;
  40
  41   // The functions below rely on maxcode < incomplete_mb_character
  42   // (which is enforced by the codecvt_utf* classes on construction).
  43   const char32_t incomplete_mb_character = char32_t(-2);
  44   const char32_t invalid_mb_sequence = char32_t(-1);
  45
  46   template<typename Elem>
  47     struct range
  48     {
  49       Elem* next;
  50       Elem* end;
  51
  52       Elem operator*() const { return *next; }
  53
  54       range& operator++() { ++next; return *this; }
  55
  56       size_t size() const { return end - next; }
  57     };
  58
  59   // Multibyte sequences can have "header" consisting of Byte Order Mark
  60   const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
  61   const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
  62   const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
  63
  64   template<size_t N>
  65     inline bool
  66     write_bom(range<char>& to, const unsigned char (&bom)[N])
  67     {
  68       if (to.size() < N)
  69         return false;
  70       memcpy(to.next, bom, N);
  71       to.next += N;
  72       return true;
  73     }
  74
  75   // If generate_header is set in mode write out UTF-8 BOM.
  76   bool
  77   write_utf8_bom(range<char>& to, codecvt_mode mode)
  78   {
  79     if (mode & generate_header)
  80       return write_bom(to, utf8_bom);
  81     return true;
  82   }
  83
  84   // If generate_header is set in mode write out the UTF-16 BOM indicated
  85   // by whether little_endian is set in mode.
  86   bool
  87   write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
  88   {
  89     if (mode & generate_header)
  90     {
  91       if (!to.size())
  92         return false;
  93       auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
  94       std::memcpy(to.next, bom, 2);
  95       ++to.next;
  96     }
  97     return true;
  98   }
  99
 100   template<size_t N>
 101     inline bool
 102     read_bom(range<const char>& from, const unsigned char (&bom)[N])
 103     {
 104       if (from.size() >= N && !memcmp(from.next, bom, N))
 105         {
 106           from.next += N;
 107           return true;
 108         }
 109       return false;
 110     }
 111
 112   // If consume_header is set in mode update from.next to after any BOM.
 113   void
 114   read_utf8_bom(range<const char>& from, codecvt_mode mode)
 115   {
 116     if (mode & consume_header)
 117       read_bom(from, utf8_bom);
 118   }
 119
 120   // If consume_header is set in mode update from.next to after any BOM.
 121   // Return little_endian iff the UTF-16LE BOM was present.
 122   codecvt_mode
 123   read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
 124   {
 125     if (mode & consume_header && from.size())
 126       {
 127         if (*from.next == 0xFEFF)
 128           ++from.next;
 129         else if (*from.next == 0xFFFE)
 130           {
 131             ++from.next;
 132             return little_endian;
 133           }
 134       }
 135     return {};
 136   }
 137
 138   // Read a codepoint from a UTF-8 multibyte sequence.
 139   // Updates from.next if the codepoint is not greater than maxcode.
 140   // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
 141   char32_t
 142   read_utf8_code_point(range<const char>& from, unsigned long maxcode)
 143   {
 144     const size_t avail = from.size();
 145     if (avail == 0)
 146       return incomplete_mb_character;
 147     unsigned char c1 = from.next[0];
 148     // https://en.wikipedia.org/wiki/UTF-8#Sample_code
 149     if (c1 < 0x80)
 150     {
 151       ++from.next;
 152       return c1;
 153     }
 154     else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
 155       return invalid_mb_sequence;
 156     else if (c1 < 0xE0) // 2-byte sequence
 157     {
 158       if (avail < 2)
 159         return incomplete_mb_character;
 160       unsigned char c2 = from.next[1];
 161       if ((c2 & 0xC0) != 0x80)
 162         return invalid_mb_sequence;
 163       char32_t c = (c1 << 6) + c2 - 0x3080;
 164       if (c <= maxcode)
 165         from.next += 2;
 166       return c;
 167     }
 168     else if (c1 < 0xF0) // 3-byte sequence
 169     {
 170       if (avail < 3)
 171         return incomplete_mb_character;
 172       unsigned char c2 = from.next[1];
 173       if ((c2 & 0xC0) != 0x80)
 174         return invalid_mb_sequence;
 175       if (c1 == 0xE0 && c2 < 0xA0) // overlong
 176         return invalid_mb_sequence;
 177       unsigned char c3 = from.next[2];
 178       if ((c3 & 0xC0) != 0x80)
 179         return invalid_mb_sequence;
 180       char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
 181       if (c <= maxcode)
 182         from.next += 3;
 183       return c;
 184     }
 185     else if (c1 < 0xF5) // 4-byte sequence
 186     {
 187       if (avail < 4)
 188         return incomplete_mb_character;
 189       unsigned char c2 = from.next[1];
 190       if ((c2 & 0xC0) != 0x80)
 191         return invalid_mb_sequence;
 192       if (c1 == 0xF0 && c2 < 0x90) // overlong
 193         return invalid_mb_sequence;
 194       if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
 195       return invalid_mb_sequence;
 196       unsigned char c3 = from.next[2];
 197       if ((c3 & 0xC0) != 0x80)
 198         return invalid_mb_sequence;
 199       unsigned char c4 = from.next[3];
 200       if ((c4 & 0xC0) != 0x80)
 201         return invalid_mb_sequence;
 202       char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
 203       if (c <= maxcode)
 204         from.next += 4;
 205       return c;
 206     }
 207     else // > U+10FFFF
 208       return invalid_mb_sequence;
 209   }
 210
 211   bool
 212   write_utf8_code_point(range<char>& to, char32_t code_point)
 213   {
 214     if (code_point < 0x80)
 215       {
 216         if (to.size() < 1)
 217           return false;
 218         *to.next++ = code_point;
 219       }
 220     else if (code_point <= 0x7FF)
 221       {
 222         if (to.size() < 2)
 223           return false;
 224         *to.next++ = (code_point >> 6) + 0xC0;
 225         *to.next++ = (code_point & 0x3F) + 0x80;
 226       }
 227     else if (code_point <= 0xFFFF)
 228       {
 229         if (to.size() < 3)
 230           return false;
 231         *to.next++ = (code_point >> 12) + 0xE0;
 232         *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
 233         *to.next++ = (code_point & 0x3F) + 0x80;
 234       }
 235     else if (code_point <= 0x10FFFF)
 236       {
 237         if (to.size() < 4)
 238           return false;
 239         *to.next++ = (code_point >> 18) + 0xF0;
 240         *to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
 241         *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
 242         *to.next++ = (code_point & 0x3F) + 0x80;
 243       }
 244     else
 245       return false;
 246     return true;
 247   }
 248
 249   inline char16_t
 250   adjust_byte_order(char16_t c, codecvt_mode mode)
 251   {
 252 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 253     return (mode & little_endian) ? __builtin_bswap16(c) : c;
 254 #else
 255     return (mode & little_endian) ? c : __builtin_bswap16(c);
 256 #endif
 257   }
 258
 259   // Return true if c is a high-surrogate (aka leading) code point.
 260   inline bool
 261   is_high_surrogate(char32_t c)
 262   {
 263     return c >= 0xD800 && c <= 0xDBFF;
 264   }
 265
 266   // Return true if c is a low-surrogate (aka trailing) code point.
 267   inline bool
 268   is_low_surrogate(char32_t c)
 269   {
 270     return c >= 0xDC00 && c <= 0xDFFF;
 271   }
 272
 273   inline char32_t
 274   surrogate_pair_to_code_point(char32_t high, char32_t low)
 275   {
 276     return (high << 10) + low - 0x35FDC00;
 277   }
 278
 279   // Read a codepoint from a UTF-16 multibyte sequence.
 280   // The sequence's endianness is indicated by (mode & little_endian).
 281   // Updates from.next if the codepoint is not greater than maxcode.
 282   // Returns invalid_mb_sequence, incomplete_mb_character or the code point.
 283   char32_t
 284   read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
 285                         codecvt_mode mode)
 286   {
 287     const size_t avail = from.size();
 288     if (avail == 0)
 289       return incomplete_mb_character;
 290     int inc = 1;
 291     char32_t c = adjust_byte_order(from.next[0], mode);
 292     if (is_high_surrogate(c))
 293       {
 294         if (avail < 2)
 295           return incomplete_mb_character;
 296         const char16_t c2 = adjust_byte_order(from.next[1], mode);
 297         if (is_low_surrogate(c2))
 298           {
 299             c = surrogate_pair_to_code_point(c, c2);
 300             inc = 2;
 301           }
 302         else
 303           return invalid_mb_sequence;
 304       }
 305     else if (is_low_surrogate(c))
 306       return invalid_mb_sequence;
 307     if (c <= maxcode)
 308       from.next += inc;
 309     return c;
 310   }
 311
 312   template<typename C>
 313   bool
 314   write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
 315   {
 316     static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
 317
 318     if (codepoint < max_single_utf16_unit)
 319       {
 320         if (to.size() > 0)
 321           {
 322             *to.next = adjust_byte_order(codepoint, mode);
 323             ++to.next;
 324             return true;
 325           }
 326       }
 327     else if (to.size() > 1)
 328       {
 329         // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
 330         const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
 331         char16_t lead = LEAD_OFFSET + (codepoint >> 10);
 332         char16_t trail = 0xDC00 + (codepoint & 0x3FF);
 333         to.next[0] = adjust_byte_order(lead, mode);
 334         to.next[1] = adjust_byte_order(trail, mode);
 335         to.next += 2;
 336         return true;
 337       }
 338     return false;
 339   }
 340
 341   // utf8 -> ucs4
 342   codecvt_base::result
 343   ucs4_in(range<const char>& from, range<char32_t>& to,
 344           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 345   {
 346     read_utf8_bom(from, mode);
 347     while (from.size() && to.size())
 348       {
 349         const char32_t codepoint = read_utf8_code_point(from, maxcode);
 350         if (codepoint == incomplete_mb_character)
 351           return codecvt_base::partial;
 352         if (codepoint > maxcode)
 353           return codecvt_base::error;
 354         *to.next++ = codepoint;
 355       }
 356     return from.size() ? codecvt_base::partial : codecvt_base::ok;
 357   }
 358
 359   // ucs4 -> utf8
 360   codecvt_base::result
 361   ucs4_out(range<const char32_t>& from, range<char>& to,
 362            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 363   {
 364     if (!write_utf8_bom(to, mode))
 365       return codecvt_base::partial;
 366     while (from.size())
 367       {
 368         const char32_t c = from.next[0];
 369         if (c > maxcode)
 370           return codecvt_base::error;
 371         if (!write_utf8_code_point(to, c))
 372           return codecvt_base::partial;
 373         ++from.next;
 374       }
 375     return codecvt_base::ok;
 376   }
 377
 378   // utf16 -> ucs4
 379   codecvt_base::result
 380   ucs4_in(range<const char16_t>& from, range<char32_t>& to,
 381           unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 382   {
 383     if (read_utf16_bom(from, mode) == little_endian)
 384       mode = codecvt_mode(mode & little_endian);
 385     while (from.size() && to.size())
 386       {
 387         const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
 388         if (codepoint == incomplete_mb_character)
 389           return codecvt_base::partial;
 390         if (codepoint > maxcode)
 391           return codecvt_base::error;
 392         *to.next++ = codepoint;
 393       }
 394     return from.size() ? codecvt_base::partial : codecvt_base::ok;
 395   }
 396
 397   // ucs4 -> utf16
 398   codecvt_base::result
 399   ucs4_out(range<const char32_t>& from, range<char16_t>& to,
 400            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 401   {
 402     if (!write_utf16_bom(to, mode))
 403       return codecvt_base::partial;
 404     while (from.size())
 405       {
 406         const char32_t c = from.next[0];
 407         if (c > maxcode)
 408           return codecvt_base::error;
 409         if (!write_utf16_code_point(to, c, mode))
 410           return codecvt_base::partial;
 411         ++from.next;
 412       }
 413     return codecvt_base::ok;
 414   }
 415
 416   // utf8 -> utf16
 417   template<typename C>
 418   codecvt_base::result
 419   utf16_in(range<const char>& from, range<C>& to,
 420            unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 421   {
 422     read_utf8_bom(from, mode);
 423     while (from.size() && to.size())
 424       {
 425         const char* const first = from.next;
 426         const char32_t codepoint = read_utf8_code_point(from, maxcode);
 427         if (codepoint == incomplete_mb_character)
 428           return codecvt_base::partial;
 429         if (codepoint > maxcode)
 430           return codecvt_base::error;
 431         if (!write_utf16_code_point(to, codepoint, mode))
 432           {
 433             from.next = first;
 434             return codecvt_base::partial;
 435           }
 436       }
 437     return codecvt_base::ok;
 438   }
 439
 440   // utf16 -> utf8
 441   template<typename C>
 442   codecvt_base::result
 443   utf16_out(range<const C>& from, range<char>& to,
 444             unsigned long maxcode = max_code_point, codecvt_mode mode = {})
 445   {
 446     if (!write_utf8_bom(to, mode))
 447       return codecvt_base::partial;
 448     while (from.size())
 449       {
 450         char32_t c = from.next[0];
 451         int inc = 1;
 452         if (is_high_surrogate(c))
 453           {
 454             if (from.size() < 2)
 455               return codecvt_base::ok; // stop converting at this point
 456
 457             const char32_t c2 = from.next[1];
 458             if (is_low_surrogate(c2))
 459               {
 460                 c = surrogate_pair_to_code_point(c, c2);
 461                 inc = 2;
 462               }
 463             else
 464               return codecvt_base::error;
 465           }
 466         else if (is_low_surrogate(c))
 467           return codecvt_base::error;
 468         if (c > maxcode)
 469           return codecvt_base::error;
 470         if (!write_utf8_code_point(to, c))
 471           return codecvt_base::partial;
 472         from.next += inc;
 473       }
 474     return codecvt_base::ok;
 475   }
 476
 477   // return pos such that [begin,pos) is valid UTF-16 string no longer than max
 478   const char*
 479   utf16_span(const char* begin, const char* end, size_t max,
 480              char32_t maxcode = max_code_point, codecvt_mode mode = {})
 481   {
 482     range<const char> from{ begin, end };
 483     read_utf8_bom(from, mode);
 484     size_t count = 0;
 485     while (count+1 < max)
 486       {
 487         char32_t c = read_utf8_code_point(from, maxcode);
 488         if (c > maxcode)
 489           return from.next;
 490         else if (c > max_single_utf16_unit)
 491           ++count;
 492         ++count;
 493       }
 494     if (count+1 == max) // take one more character if it fits in a single unit
 495       read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
 496     return from.next;
 497   }
 498
 499   // utf8 -> ucs2
 500   codecvt_base::result
 501   ucs2_in(range<const char>& from, range<char16_t>& to,
 502           char32_t maxcode = max_code_point, codecvt_mode mode = {})
 503   {
 504     return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
 505   }
 506
 507   // ucs2 -> utf8
 508   codecvt_base::result
 509   ucs2_out(range<const char16_t>& from, range<char>& to,
 510            char32_t maxcode = max_code_point, codecvt_mode mode = {})
 511   {
 512     return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
 513   }
 514
 515   // ucs2 -> utf16
 516   codecvt_base::result
 517   ucs2_out(range<const char16_t>& from, range<char16_t>& to,
 518            char32_t maxcode = max_code_point, codecvt_mode mode = {})
 519   {
 520     if (!write_utf16_bom(to, mode))
 521       return codecvt_base::partial;
 522     while (from.size() && to.size())
 523       {
 524         char16_t c = from.next[0];
 525         if (is_high_surrogate(c))
 526           return codecvt_base::error;
 527         if (c > maxcode)
 528           return codecvt_base::error;
 529         *to.next++ = adjust_byte_order(c, mode);
 530         ++from.next;
 531       }
 532     return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
 533   }
 534
 535   // utf16 -> ucs2
 536   codecvt_base::result
 537   ucs2_in(range<const char16_t>& from, range<char16_t>& to,
 538           char32_t maxcode = max_code_point, codecvt_mode mode = {})
 539   {
 540     if (read_utf16_bom(from, mode) == little_endian)
 541       mode = codecvt_mode(mode & little_endian);
 542     maxcode = std::max(max_single_utf16_unit, maxcode);
 543     while (from.size() && to.size())
 544       {
 545         const char32_t c = read_utf16_code_point(from, maxcode, mode);
 546         if (c == incomplete_mb_character)
 547           return codecvt_base::partial;
 548         if (c > maxcode)
 549           return codecvt_base::error;
 550         *to.next++ = c;
 551       }
 552     return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
 553   }
 554
 555   const char16_t*
 556   ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
 557             char32_t maxcode, codecvt_mode mode)
 558   {
 559     range<const char16_t> from{ begin, end };
 560     if (read_utf16_bom(from, mode) == little_endian)
 561       mode = codecvt_mode(mode & little_endian);
 562     maxcode = std::max(max_single_utf16_unit, maxcode);
 563     char32_t c = 0;
 564     while (max-- && c <= maxcode)
 565       c = read_utf16_code_point(from, maxcode, mode);
 566     return from.next;
 567   }
 568
 569   const char*
 570   ucs2_span(const char* begin, const char* end, size_t max,
 571             char32_t maxcode, codecvt_mode mode)
 572   {
 573     range<const char> from{ begin, end };
 574     read_utf8_bom(from, mode);
 575     maxcode = std::max(max_single_utf16_unit, maxcode);
 576     char32_t c = 0;
 577     while (max-- && c <= maxcode)
 578       c = read_utf8_code_point(from, maxcode);
 579     return from.next;
 580   }
 581
 582   // return pos such that [begin,pos) is valid UCS-4 string no longer than max
 583   const char*
 584   ucs4_span(const char* begin, const char* end, size_t max,
 585             char32_t maxcode = max_code_point, codecvt_mode mode = {})
 586   {
 587     range<const char> from{ begin, end };
 588     read_utf8_bom(from, mode);
 589     char32_t c = 0;
 590     while (max-- && c <= maxcode)
 591       c = read_utf8_code_point(from, maxcode);
 592     return from.next;
 593   }
 594
 595   // return pos such that [begin,pos) is valid UCS-4 string no longer than max
 596   const char16_t*
 597   ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
 598             char32_t maxcode = max_code_point, codecvt_mode mode = {})
 599   {
 600     range<const char16_t> from{ begin, end };
 601     if (read_utf16_bom(from, mode) == little_endian)
 602       mode = codecvt_mode(mode & little_endian);
 603     char32_t c = 0;
 604     while (max-- && c <= maxcode)
 605       c = read_utf16_code_point(from, maxcode, mode);
 606     return from.next;
 607   }
 608 }
 609
 610 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
 611 // Converts from UTF-8 to UTF-16.
 612
 613 locale::id codecvt<char16_t, char, mbstate_t>::id;
 614
 615 codecvt<char16_t, char, mbstate_t>::~codecvt() { }
 616
 617 codecvt_base::result
 618 codecvt<char16_t, char, mbstate_t>::
 619 do_out(state_type&,
 620        const intern_type* __from,
 621        const intern_type* __from_end, const intern_type*& __from_next,
 622        extern_type* __to, extern_type* __to_end,
 623        extern_type*& __to_next) const
 624 {
 625   range<const char16_t> from{ __from, __from_end };
 626   range<char> to{ __to, __to_end };
 627   auto res = utf16_out(from, to);
 628   __from_next = from.next;
 629   __to_next = to.next;
 630   return res;
 631 }
 632
 633 codecvt_base::result
 634 codecvt<char16_t, char, mbstate_t>::
 635 do_unshift(state_type&, extern_type* __to, extern_type*,
 636            extern_type*& __to_next) const
 637 {
 638   __to_next = __to;
 639   return noconv; // we don't use mbstate_t for the unicode facets
 640 }
 641
 642 codecvt_base::result
 643 codecvt<char16_t, char, mbstate_t>::
 644 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 645       const extern_type*& __from_next,
 646       intern_type* __to, intern_type* __to_end,
 647       intern_type*& __to_next) const
 648 {
 649   range<const char> from{ __from, __from_end };
 650   range<char16_t> to{ __to, __to_end };
 651 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 652   codecvt_mode mode = {};
 653 #else
 654   codecvt_mode mode = little_endian;
 655 #endif
 656   auto res = utf16_in(from, to, max_code_point, mode);
 657   __from_next = from.next;
 658   __to_next = to.next;
 659   return res;
 660 }
 661
 662 int
 663 codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
 664 { return 0; }
 665
 666 bool
 667 codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
 668 { return false; }
 669
 670 int
 671 codecvt<char16_t, char, mbstate_t>::
 672 do_length(state_type&, const extern_type* __from,
 673           const extern_type* __end, size_t __max) const
 674 {
 675   __end = utf16_span(__from, __end, __max);
 676   return __end - __from;
 677 }
 678
 679 int
 680 codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
 681 {
 682   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
 683   // whereas 4 byte sequences require two 16-bit code units.
 684   return 3;
 685 }
 686
 687 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
 688 // Converts from UTF-8 to UTF-32 (aka UCS-4).
 689
 690 locale::id codecvt<char32_t, char, mbstate_t>::id;
 691
 692 codecvt<char32_t, char, mbstate_t>::~codecvt() { }
 693
 694 codecvt_base::result
 695 codecvt<char32_t, char, mbstate_t>::
 696 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 697        const intern_type*& __from_next,
 698        extern_type* __to, extern_type* __to_end,
 699        extern_type*& __to_next) const
 700 {
 701   range<const char32_t> from{ __from, __from_end };
 702   range<char> to{ __to, __to_end };
 703   auto res = ucs4_out(from, to);
 704   __from_next = from.next;
 705   __to_next = to.next;
 706   return res;
 707 }
 708
 709 codecvt_base::result
 710 codecvt<char32_t, char, mbstate_t>::
 711 do_unshift(state_type&, extern_type* __to, extern_type*,
 712            extern_type*& __to_next) const
 713 {
 714   __to_next = __to;
 715   return noconv;
 716 }
 717
 718 codecvt_base::result
 719 codecvt<char32_t, char, mbstate_t>::
 720 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 721       const extern_type*& __from_next,
 722       intern_type* __to, intern_type* __to_end,
 723       intern_type*& __to_next) const
 724 {
 725   range<const char> from{ __from, __from_end };
 726   range<char32_t> to{ __to, __to_end };
 727   auto res = ucs4_in(from, to);
 728   __from_next = from.next;
 729   __to_next = to.next;
 730   return res;
 731 }
 732
 733 int
 734 codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
 735 { return 0; }
 736
 737 bool
 738 codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
 739 { return false; }
 740
 741 int
 742 codecvt<char32_t, char, mbstate_t>::
 743 do_length(state_type&, const extern_type* __from,
 744           const extern_type* __end, size_t __max) const
 745 {
 746   __end = ucs4_span(__from, __end, __max);
 747   return __end - __from;
 748 }
 749
 750 int
 751 codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
 752 { return 4; }
 753
 754 // Define members of codecvt_utf8<char16_t> base class implementation.
 755 // Converts from UTF-8 to UCS-2.
 756
 757 __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
 758
 759 codecvt_base::result
 760 __codecvt_utf8_base<char16_t>::
 761 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 762        const intern_type*& __from_next,
 763        extern_type* __to, extern_type* __to_end,
 764        extern_type*& __to_next) const
 765 {
 766   range<const char16_t> from{ __from, __from_end };
 767   range<char> to{ __to, __to_end };
 768   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
 769   __from_next = from.next;
 770   __to_next = to.next;
 771   return res;
 772 }
 773
 774 codecvt_base::result
 775 __codecvt_utf8_base<char16_t>::
 776 do_unshift(state_type&, extern_type* __to, extern_type*,
 777            extern_type*& __to_next) const
 778 {
 779   __to_next = __to;
 780   return noconv;
 781 }
 782
 783 codecvt_base::result
 784 __codecvt_utf8_base<char16_t>::
 785 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 786       const extern_type*& __from_next,
 787       intern_type* __to, intern_type* __to_end,
 788       intern_type*& __to_next) const
 789 {
 790   range<const char> from{ __from, __from_end };
 791   range<char16_t> to{ __to, __to_end };
 792   codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
 793 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
 794   mode = codecvt_mode(mode | little_endian);
 795 #endif
 796   auto res = ucs2_in(from, to, _M_maxcode, mode);
 797   __from_next = from.next;
 798   __to_next = to.next;
 799   return res;
 800 }
 801
 802 int
 803 __codecvt_utf8_base<char16_t>::do_encoding() const throw()
 804 { return 0; }
 805
 806 bool
 807 __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
 808 { return false; }
 809
 810 int
 811 __codecvt_utf8_base<char16_t>::
 812 do_length(state_type&, const extern_type* __from,
 813           const extern_type* __end, size_t __max) const
 814 {
 815   __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
 816   return __end - __from;
 817 }
 818
 819 int
 820 __codecvt_utf8_base<char16_t>::do_max_length() const throw()
 821 { return 3; }
 822
 823 // Define members of codecvt_utf8<char32_t> base class implementation.
 824 // Converts from UTF-8 to UTF-32 (aka UCS-4).
 825
 826 __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
 827
 828 codecvt_base::result
 829 __codecvt_utf8_base<char32_t>::
 830 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 831        const intern_type*& __from_next,
 832        extern_type* __to, extern_type* __to_end,
 833        extern_type*& __to_next) const
 834 {
 835   range<const char32_t> from{ __from, __from_end };
 836   range<char> to{ __to, __to_end };
 837   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
 838   __from_next = from.next;
 839   __to_next = to.next;
 840   return res;
 841 }
 842
 843 codecvt_base::result
 844 __codecvt_utf8_base<char32_t>::
 845 do_unshift(state_type&, extern_type* __to, extern_type*,
 846            extern_type*& __to_next) const
 847 {
 848   __to_next = __to;
 849   return noconv;
 850 }
 851
 852 codecvt_base::result
 853 __codecvt_utf8_base<char32_t>::
 854 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 855       const extern_type*& __from_next,
 856       intern_type* __to, intern_type* __to_end,
 857       intern_type*& __to_next) const
 858 {
 859   range<const char> from{ __from, __from_end };
 860   range<char32_t> to{ __to, __to_end };
 861   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
 862   __from_next = from.next;
 863   __to_next = to.next;
 864   return res;
 865 }
 866
 867 int
 868 __codecvt_utf8_base<char32_t>::do_encoding() const throw()
 869 { return 0; }
 870
 871 bool
 872 __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
 873 { return false; }
 874
 875 int
 876 __codecvt_utf8_base<char32_t>::
 877 do_length(state_type&, const extern_type* __from,
 878           const extern_type* __end, size_t __max) const
 879 {
 880   __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
 881   return __end - __from;
 882 }
 883
 884 int
 885 __codecvt_utf8_base<char32_t>::do_max_length() const throw()
 886 { return 4; }
 887
 888 #ifdef _GLIBCXX_USE_WCHAR_T
 889 // Define members of codecvt_utf8<wchar_t> base class implementation.
 890 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
 891
 892 __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
 893
 894 codecvt_base::result
 895 __codecvt_utf8_base<wchar_t>::
 896 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 897        const intern_type*& __from_next,
 898        extern_type* __to, extern_type* __to_end,
 899        extern_type*& __to_next) const
 900 {
 901   range<char> to{ __to, __to_end };
 902 #if __SIZEOF_WCHAR_T__ == 2
 903   range<const char16_t> from{
 904     reinterpret_cast<const char16_t*>(__from),
 905     reinterpret_cast<const char16_t*>(__from_end)
 906   };
 907   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
 908 #elif __SIZEOF_WCHAR_T__ == 4
 909   range<const char32_t> from{
 910     reinterpret_cast<const char32_t*>(__from),
 911     reinterpret_cast<const char32_t*>(__from_end)
 912   };
 913   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
 914 #else
 915   return codecvt_base::error;
 916 #endif
 917   __from_next = reinterpret_cast<const wchar_t*>(from.next);
 918   __to_next = to.next;
 919   return res;
 920 }
 921
 922 codecvt_base::result
 923 __codecvt_utf8_base<wchar_t>::
 924 do_unshift(state_type&, extern_type* __to, extern_type*,
 925            extern_type*& __to_next) const
 926 {
 927   __to_next = __to;
 928   return noconv;
 929 }
 930
 931 codecvt_base::result
 932 __codecvt_utf8_base<wchar_t>::
 933 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
 934       const extern_type*& __from_next,
 935       intern_type* __to, intern_type* __to_end,
 936       intern_type*& __to_next) const
 937 {
 938   range<const char> from{ __from, __from_end };
 939 #if __SIZEOF_WCHAR_T__ == 2
 940   range<char16_t> to{
 941     reinterpret_cast<char16_t*>(__to),
 942     reinterpret_cast<char16_t*>(__to_end)
 943   };
 944   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
 945 #elif __SIZEOF_WCHAR_T__ == 4
 946   range<char32_t> to{
 947     reinterpret_cast<char32_t*>(__to),
 948     reinterpret_cast<char32_t*>(__to_end)
 949   };
 950   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
 951 #else
 952   return codecvt_base::error;
 953 #endif
 954   __from_next = from.next;
 955   __to_next = reinterpret_cast<wchar_t*>(to.next);
 956   return res;
 957 }
 958
 959 int
 960 __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
 961 { return 0; }
 962
 963 bool
 964 __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
 965 { return false; }
 966
 967 int
 968 __codecvt_utf8_base<wchar_t>::
 969 do_length(state_type&, const extern_type* __from,
 970           const extern_type* __end, size_t __max) const
 971 {
 972 #if __SIZEOF_WCHAR_T__ == 2
 973   __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
 974 #elif __SIZEOF_WCHAR_T__ == 4
 975   __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
 976 #else
 977   __end = __from;
 978 #endif
 979   return __end - __from;
 980 }
 981
 982 int
 983 __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
 984 { return 4; }
 985 #endif
 986
 987 // Define members of codecvt_utf16<char16_t> base class implementation.
 988 // Converts from UTF-16 to UCS-2.
 989
 990 __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
 991
 992 codecvt_base::result
 993 __codecvt_utf16_base<char16_t>::
 994 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
 995        const intern_type*& __from_next,
 996        extern_type* __to, extern_type* __to_end,
 997        extern_type*& __to_next) const
 998 {
 999   range<const char16_t> from{ __from, __from_end };
1000   range<char16_t> to{
1001     reinterpret_cast<char16_t*>(__to),
1002     reinterpret_cast<char16_t*>(__to_end)
1003   };
1004   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1005   __from_next = from.next;
1006   __to_next = reinterpret_cast<char*>(to.next);
1007   return res;
1008 }
1009
1010 codecvt_base::result
1011 __codecvt_utf16_base<char16_t>::
1012 do_unshift(state_type&, extern_type* __to, extern_type*,
1013            extern_type*& __to_next) const
1014 {
1015   __to_next = __to;
1016   return noconv;
1017 }
1018
1019 codecvt_base::result
1020 __codecvt_utf16_base<char16_t>::
1021 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1022       const extern_type*& __from_next,
1023       intern_type* __to, intern_type* __to_end,
1024       intern_type*& __to_next) const
1025 {
1026   range<const char16_t> from{
1027     reinterpret_cast<const char16_t*>(__from),
1028     reinterpret_cast<const char16_t*>(__from_end)
1029   };
1030   range<char16_t> to{ __to, __to_end };
1031   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1032   __from_next = reinterpret_cast<const char*>(from.next);
1033   __to_next = to.next;
1034   return res;
1035 }
1036
1037 int
1038 __codecvt_utf16_base<char16_t>::do_encoding() const throw()
1039 { return 1; }
1040
1041 bool
1042 __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
1043 { return false; }
1044
1045 int
1046 __codecvt_utf16_base<char16_t>::
1047 do_length(state_type&, const extern_type* __from,
1048           const extern_type* __end, size_t __max) const
1049 {
1050   auto next = reinterpret_cast<const char16_t*>(__from);
1051   next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1052                    _M_maxcode, _M_mode);
1053   return reinterpret_cast<const char*>(next) - __from;
1054 }
1055
1056 int
1057 __codecvt_utf16_base<char16_t>::do_max_length() const throw()
1058 { return 3; }
1059
1060 // Define members of codecvt_utf16<char32_t> base class implementation.
1061 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1062
1063 __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
1064
1065 codecvt_base::result
1066 __codecvt_utf16_base<char32_t>::
1067 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1068        const intern_type*& __from_next,
1069        extern_type* __to, extern_type* __to_end,
1070        extern_type*& __to_next) const
1071 {
1072   range<const char32_t> from{ __from, __from_end };
1073   range<char16_t> to{
1074     reinterpret_cast<char16_t*>(__to),
1075     reinterpret_cast<char16_t*>(__to_end)
1076   };
1077   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1078   __from_next = from.next;
1079   __to_next = reinterpret_cast<char*>(to.next);
1080   return res;
1081 }
1082
1083 codecvt_base::result
1084 __codecvt_utf16_base<char32_t>::
1085 do_unshift(state_type&, extern_type* __to, extern_type*,
1086            extern_type*& __to_next) const
1087 {
1088   __to_next = __to;
1089   return noconv;
1090 }
1091
1092 codecvt_base::result
1093 __codecvt_utf16_base<char32_t>::
1094 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1095       const extern_type*& __from_next,
1096       intern_type* __to, intern_type* __to_end,
1097       intern_type*& __to_next) const
1098 {
1099   range<const char16_t> from{
1100     reinterpret_cast<const char16_t*>(__from),
1101     reinterpret_cast<const char16_t*>(__from_end)
1102   };
1103   range<char32_t> to{ __to, __to_end };
1104   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1105   __from_next = reinterpret_cast<const char*>(from.next);
1106   __to_next = to.next;
1107   return res;
1108 }
1109
1110 int
1111 __codecvt_utf16_base<char32_t>::do_encoding() const throw()
1112 { return 0; }
1113
1114 bool
1115 __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
1116 { return false; }
1117
1118 int
1119 __codecvt_utf16_base<char32_t>::
1120 do_length(state_type&, const extern_type* __from,
1121           const extern_type* __end, size_t __max) const
1122 {
1123   auto next = reinterpret_cast<const char16_t*>(__from);
1124   next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1125                    _M_maxcode, _M_mode);
1126   return reinterpret_cast<const char*>(next) - __from;
1127 }
1128
1129 int
1130 __codecvt_utf16_base<char32_t>::do_max_length() const throw()
1131 { return 4; }
1132
1133 #ifdef _GLIBCXX_USE_WCHAR_T
1134 // Define members of codecvt_utf16<wchar_t> base class implementation.
1135 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1136
1137 __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
1138
1139 codecvt_base::result
1140 __codecvt_utf16_base<wchar_t>::
1141 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1142        const intern_type*& __from_next,
1143        extern_type* __to, extern_type* __to_end,
1144        extern_type*& __to_next) const
1145 {
1146   range<char> to{ __to, __to_end };
1147 #if __SIZEOF_WCHAR_T__ == 2
1148   range<const char16_t> from{
1149     reinterpret_cast<const char16_t*>(__from),
1150     reinterpret_cast<const char16_t*>(__from_end)
1151   };
1152   auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1153 #elif __SIZEOF_WCHAR_T__ == 4
1154   range<const char32_t> from{
1155     reinterpret_cast<const char32_t*>(__from),
1156     reinterpret_cast<const char32_t*>(__from_end)
1157   };
1158   auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1159 #else
1160   return codecvt_base::error;
1161 #endif
1162   __from_next = reinterpret_cast<const wchar_t*>(from.next);
1163   __to_next = to.next;
1164   return res;
1165 }
1166
1167 codecvt_base::result
1168 __codecvt_utf16_base<wchar_t>::
1169 do_unshift(state_type&, extern_type* __to, extern_type*,
1170            extern_type*& __to_next) const
1171 {
1172   __to_next = __to;
1173   return noconv;
1174 }
1175
1176 codecvt_base::result
1177 __codecvt_utf16_base<wchar_t>::
1178 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1179       const extern_type*& __from_next,
1180       intern_type* __to, intern_type* __to_end,
1181       intern_type*& __to_next) const
1182 {
1183   range<const char> from{ __from, __from_end };
1184 #if __SIZEOF_WCHAR_T__ == 2
1185   range<char16_t> to{
1186     reinterpret_cast<char16_t*>(__to),
1187     reinterpret_cast<char16_t*>(__to_end)
1188   };
1189   auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1190 #elif __SIZEOF_WCHAR_T__ == 4
1191   range<char32_t> to{
1192     reinterpret_cast<char32_t*>(__to),
1193     reinterpret_cast<char32_t*>(__to_end)
1194   };
1195   auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1196 #else
1197   return codecvt_base::error;
1198 #endif
1199   __from_next = from.next;
1200   __to_next = reinterpret_cast<wchar_t*>(to.next);
1201   return res;
1202 }
1203
1204 int
1205 __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
1206 { return 0; }
1207
1208 bool
1209 __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
1210 { return false; }
1211
1212 int
1213 __codecvt_utf16_base<wchar_t>::
1214 do_length(state_type&, const extern_type* __from,
1215           const extern_type* __end, size_t __max) const
1216 {
1217   auto next = reinterpret_cast<const char16_t*>(__from);
1218 #if __SIZEOF_WCHAR_T__ == 2
1219   next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1220                    _M_maxcode, _M_mode);
1221 #elif __SIZEOF_WCHAR_T__ == 4
1222   next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1223                    _M_maxcode, _M_mode);
1224 #endif
1225   return reinterpret_cast<const char*>(next) - __from;
1226 }
1227
1228 int
1229 __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
1230 { return 4; }
1231 #endif
1232
1233 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1234 // Converts from UTF-8 to UTF-16.
1235
1236 __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
1237
1238 codecvt_base::result
1239 __codecvt_utf8_utf16_base<char16_t>::
1240 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1241        const intern_type*& __from_next,
1242        extern_type* __to, extern_type* __to_end,
1243        extern_type*& __to_next) const
1244 {
1245   range<const char16_t> from{ __from, __from_end };
1246   range<char> to{ __to, __to_end };
1247   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1248   __from_next = from.next;
1249   __to_next = to.next;
1250   return res;
1251 }
1252
1253 codecvt_base::result
1254 __codecvt_utf8_utf16_base<char16_t>::
1255 do_unshift(state_type&, extern_type* __to, extern_type*,
1256            extern_type*& __to_next) const
1257 {
1258   __to_next = __to;
1259   return noconv;
1260 }
1261
1262 codecvt_base::result
1263 __codecvt_utf8_utf16_base<char16_t>::
1264 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1265       const extern_type*& __from_next,
1266       intern_type* __to, intern_type* __to_end,
1267       intern_type*& __to_next) const
1268 {
1269   range<const char> from{ __from, __from_end };
1270   range<char16_t> to{ __to, __to_end };
1271   codecvt_mode mode = codecvt_mode(_M_mode & (consume_header|generate_header));
1272 #if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
1273   mode = codecvt_mode(mode | little_endian);
1274 #endif
1275   auto res = utf16_in(from, to, _M_maxcode, mode);
1276   __from_next = from.next;
1277   __to_next = to.next;
1278   return res;
1279 }
1280
1281 int
1282 __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
1283 { return 0; }
1284
1285 bool
1286 __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
1287 { return false; }
1288
1289 int
1290 __codecvt_utf8_utf16_base<char16_t>::
1291 do_length(state_type&, const extern_type* __from,
1292           const extern_type* __end, size_t __max) const
1293 {
1294   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1295   return __end - __from;
1296 }
1297
1298 int
1299 __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
1300 {
1301   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1302   // whereas 4 byte sequences require two 16-bit code units.
1303   return 3;
1304 }
1305
1306 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1307 // Converts from UTF-8 to UTF-16.
1308
1309 __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
1310
1311 codecvt_base::result
1312 __codecvt_utf8_utf16_base<char32_t>::
1313 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1314        const intern_type*& __from_next,
1315        extern_type* __to, extern_type* __to_end,
1316        extern_type*& __to_next) const
1317 {
1318   range<const char32_t> from{ __from, __from_end };
1319   range<char> to{ __to, __to_end };
1320   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1321   __from_next = from.next;
1322   __to_next = to.next;
1323   return res;
1324 }
1325
1326 codecvt_base::result
1327 __codecvt_utf8_utf16_base<char32_t>::
1328 do_unshift(state_type&, extern_type* __to, extern_type*,
1329            extern_type*& __to_next) const
1330 {
1331   __to_next = __to;
1332   return noconv;
1333 }
1334
1335 codecvt_base::result
1336 __codecvt_utf8_utf16_base<char32_t>::
1337 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1338       const extern_type*& __from_next,
1339       intern_type* __to, intern_type* __to_end,
1340       intern_type*& __to_next) const
1341 {
1342   range<const char> from{ __from, __from_end };
1343   range<char32_t> to{ __to, __to_end };
1344   auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1345   __from_next = from.next;
1346   __to_next = to.next;
1347   return res;
1348 }
1349
1350 int
1351 __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
1352 { return 0; }
1353
1354 bool
1355 __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
1356 { return false; }
1357
1358 int
1359 __codecvt_utf8_utf16_base<char32_t>::
1360 do_length(state_type&, const extern_type* __from,
1361           const extern_type* __end, size_t __max) const
1362 {
1363   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1364   return __end - __from;
1365 }
1366
1367 int
1368 __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
1369 {
1370   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1371   // whereas 4 byte sequences require two 16-bit code units.
1372   return 3;
1373 }
1374
1375 #ifdef _GLIBCXX_USE_WCHAR_T
1376 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1377 // Converts from UTF-8 to UTF-16.
1378
1379 __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
1380
1381 codecvt_base::result
1382 __codecvt_utf8_utf16_base<wchar_t>::
1383 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1384        const intern_type*& __from_next,
1385        extern_type* __to, extern_type* __to_end,
1386        extern_type*& __to_next) const
1387 {
1388   range<const wchar_t> from{ __from, __from_end };
1389   range<char> to{ __to, __to_end };
1390   auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1391   __from_next = from.next;
1392   __to_next = to.next;
1393   return res;
1394 }
1395
1396 codecvt_base::result
1397 __codecvt_utf8_utf16_base<wchar_t>::
1398 do_unshift(state_type&, extern_type* __to, extern_type*,
1399            extern_type*& __to_next) const
1400 {
1401   __to_next = __to;
1402   return noconv;
1403 }
1404
1405 codecvt_base::result
1406 __codecvt_utf8_utf16_base<wchar_t>::
1407 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1408       const extern_type*& __from_next,
1409       intern_type* __to, intern_type* __to_end,
1410       intern_type*& __to_next) const
1411 {
1412   range<const char> from{ __from, __from_end };
1413   range<wchar_t> to{ __to, __to_end };
1414   auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1415   __from_next = from.next;
1416   __to_next = to.next;
1417   return res;
1418 }
1419
1420 int
1421 __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
1422 { return 0; }
1423
1424 bool
1425 __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
1426 { return false; }
1427
1428 int
1429 __codecvt_utf8_utf16_base<wchar_t>::
1430 do_length(state_type&, const extern_type* __from,
1431           const extern_type* __end, size_t __max) const
1432 {
1433   __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1434   return __end - __from;
1435 }
1436
1437 int
1438 __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
1439 {
1440   // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1441   // whereas 4 byte sequences require two 16-bit code units.
1442   return 3;
1443 }
1444 #endif
1445
1446 inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
1447 inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
1448 template class codecvt_byname<char16_t, char, mbstate_t>;
1449 template class codecvt_byname<char32_t, char, mbstate_t>;
1450
1451 _GLIBCXX_END_NAMESPACE_VERSION
1452 }
1453 #endif // _GLIBCXX_USE_C99_STDINT_TR1