gcc/d/dmd/root/utf.d

   1 /**
   2  * Functions related to UTF encoding.
   3  *
   4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
   5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
   6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
   7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/utf.d, _utf.d)
   8  * Documentation:  https://dlang.org/phobos/dmd_root_utf.html
   9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/utf.d
  10  */
  11
  12 module dmd.root.utf;
  13
  14 @nogc nothrow pure @safe:
  15
  16 /// The Unicode code space is the range of code points [0x000000,0x10FFFF]
  17 /// except the UTF-16 surrogate pairs in the range [0xD800,0xDFFF]
  18 bool utf_isValidDchar(dchar c)
  19 {
  20     // TODO: Whether non-char code points should be rejected is pending review.
  21     // 0xFFFE and 0xFFFF are valid for internal use, like Phobos std.utf.isValidDChar
  22     // See also https://issues.dlang.org/show_bug.cgi?id=1357
  23     if (c < 0xD800) // Almost all characters in a typical document.
  24         return true;
  25     if (c > 0xDFFF && c <= 0x10FFFF)
  26         return true;
  27     return false;
  28 }
  29
  30 /*******************************
  31  * Return !=0 if unicode alpha.
  32  * Use table from C99 Appendix D.
  33  */
  34 bool isUniAlpha(dchar c)
  35 {
  36     static immutable wchar[2][] ALPHA_TABLE =
  37     [
  38         [0x00AA, 0x00AA],
  39         [0x00B5, 0x00B5],
  40         [0x00B7, 0x00B7],
  41         [0x00BA, 0x00BA],
  42         [0x00C0, 0x00D6],
  43         [0x00D8, 0x00F6],
  44         [0x00F8, 0x01F5],
  45         [0x01FA, 0x0217],
  46         [0x0250, 0x02A8],
  47         [0x02B0, 0x02B8],
  48         [0x02BB, 0x02BB],
  49         [0x02BD, 0x02C1],
  50         [0x02D0, 0x02D1],
  51         [0x02E0, 0x02E4],
  52         [0x037A, 0x037A],
  53         [0x0386, 0x0386],
  54         [0x0388, 0x038A],
  55         [0x038C, 0x038C],
  56         [0x038E, 0x03A1],
  57         [0x03A3, 0x03CE],
  58         [0x03D0, 0x03D6],
  59         [0x03DA, 0x03DA],
  60         [0x03DC, 0x03DC],
  61         [0x03DE, 0x03DE],
  62         [0x03E0, 0x03E0],
  63         [0x03E2, 0x03F3],
  64         [0x0401, 0x040C],
  65         [0x040E, 0x044F],
  66         [0x0451, 0x045C],
  67         [0x045E, 0x0481],
  68         [0x0490, 0x04C4],
  69         [0x04C7, 0x04C8],
  70         [0x04CB, 0x04CC],
  71         [0x04D0, 0x04EB],
  72         [0x04EE, 0x04F5],
  73         [0x04F8, 0x04F9],
  74         [0x0531, 0x0556],
  75         [0x0559, 0x0559],
  76         [0x0561, 0x0587],
  77         [0x05B0, 0x05B9],
  78         [0x05BB, 0x05BD],
  79         [0x05BF, 0x05BF],
  80         [0x05C1, 0x05C2],
  81         [0x05D0, 0x05EA],
  82         [0x05F0, 0x05F2],
  83         [0x0621, 0x063A],
  84         [0x0640, 0x0652],
  85         [0x0660, 0x0669],
  86         [0x0670, 0x06B7],
  87         [0x06BA, 0x06BE],
  88         [0x06C0, 0x06CE],
  89         [0x06D0, 0x06DC],
  90         [0x06E5, 0x06E8],
  91         [0x06EA, 0x06ED],
  92         [0x06F0, 0x06F9],
  93         [0x0901, 0x0903],
  94         [0x0905, 0x0939],
  95         [0x093D, 0x094D],
  96         [0x0950, 0x0952],
  97         [0x0958, 0x0963],
  98         [0x0966, 0x096F],
  99         [0x0981, 0x0983],
 100         [0x0985, 0x098C],
 101         [0x098F, 0x0990],
 102         [0x0993, 0x09A8],
 103         [0x09AA, 0x09B0],
 104         [0x09B2, 0x09B2],
 105         [0x09B6, 0x09B9],
 106         [0x09BE, 0x09C4],
 107         [0x09C7, 0x09C8],
 108         [0x09CB, 0x09CD],
 109         [0x09DC, 0x09DD],
 110         [0x09DF, 0x09E3],
 111         [0x09E6, 0x09F1],
 112         [0x0A02, 0x0A02],
 113         [0x0A05, 0x0A0A],
 114         [0x0A0F, 0x0A10],
 115         [0x0A13, 0x0A28],
 116         [0x0A2A, 0x0A30],
 117         [0x0A32, 0x0A33],
 118         [0x0A35, 0x0A36],
 119         [0x0A38, 0x0A39],
 120         [0x0A3E, 0x0A42],
 121         [0x0A47, 0x0A48],
 122         [0x0A4B, 0x0A4D],
 123         [0x0A59, 0x0A5C],
 124         [0x0A5E, 0x0A5E],
 125         [0x0A66, 0x0A6F],
 126         [0x0A74, 0x0A74],
 127         [0x0A81, 0x0A83],
 128         [0x0A85, 0x0A8B],
 129         [0x0A8D, 0x0A8D],
 130         [0x0A8F, 0x0A91],
 131         [0x0A93, 0x0AA8],
 132         [0x0AAA, 0x0AB0],
 133         [0x0AB2, 0x0AB3],
 134         [0x0AB5, 0x0AB9],
 135         [0x0ABD, 0x0AC5],
 136         [0x0AC7, 0x0AC9],
 137         [0x0ACB, 0x0ACD],
 138         [0x0AD0, 0x0AD0],
 139         [0x0AE0, 0x0AE0],
 140         [0x0AE6, 0x0AEF],
 141         [0x0B01, 0x0B03],
 142         [0x0B05, 0x0B0C],
 143         [0x0B0F, 0x0B10],
 144         [0x0B13, 0x0B28],
 145         [0x0B2A, 0x0B30],
 146         [0x0B32, 0x0B33],
 147         [0x0B36, 0x0B39],
 148         [0x0B3D, 0x0B43],
 149         [0x0B47, 0x0B48],
 150         [0x0B4B, 0x0B4D],
 151         [0x0B5C, 0x0B5D],
 152         [0x0B5F, 0x0B61],
 153         [0x0B66, 0x0B6F],
 154         [0x0B82, 0x0B83],
 155         [0x0B85, 0x0B8A],
 156         [0x0B8E, 0x0B90],
 157         [0x0B92, 0x0B95],
 158         [0x0B99, 0x0B9A],
 159         [0x0B9C, 0x0B9C],
 160         [0x0B9E, 0x0B9F],
 161         [0x0BA3, 0x0BA4],
 162         [0x0BA8, 0x0BAA],
 163         [0x0BAE, 0x0BB5],
 164         [0x0BB7, 0x0BB9],
 165         [0x0BBE, 0x0BC2],
 166         [0x0BC6, 0x0BC8],
 167         [0x0BCA, 0x0BCD],
 168         [0x0BE7, 0x0BEF],
 169         [0x0C01, 0x0C03],
 170         [0x0C05, 0x0C0C],
 171         [0x0C0E, 0x0C10],
 172         [0x0C12, 0x0C28],
 173         [0x0C2A, 0x0C33],
 174         [0x0C35, 0x0C39],
 175         [0x0C3E, 0x0C44],
 176         [0x0C46, 0x0C48],
 177         [0x0C4A, 0x0C4D],
 178         [0x0C60, 0x0C61],
 179         [0x0C66, 0x0C6F],
 180         [0x0C82, 0x0C83],
 181         [0x0C85, 0x0C8C],
 182         [0x0C8E, 0x0C90],
 183         [0x0C92, 0x0CA8],
 184         [0x0CAA, 0x0CB3],
 185         [0x0CB5, 0x0CB9],
 186         [0x0CBE, 0x0CC4],
 187         [0x0CC6, 0x0CC8],
 188         [0x0CCA, 0x0CCD],
 189         [0x0CDE, 0x0CDE],
 190         [0x0CE0, 0x0CE1],
 191         [0x0CE6, 0x0CEF],
 192         [0x0D02, 0x0D03],
 193         [0x0D05, 0x0D0C],
 194         [0x0D0E, 0x0D10],
 195         [0x0D12, 0x0D28],
 196         [0x0D2A, 0x0D39],
 197         [0x0D3E, 0x0D43],
 198         [0x0D46, 0x0D48],
 199         [0x0D4A, 0x0D4D],
 200         [0x0D60, 0x0D61],
 201         [0x0D66, 0x0D6F],
 202         [0x0E01, 0x0E3A],
 203         [0x0E40, 0x0E5B],
 204         [0x0E81, 0x0E82],
 205         [0x0E84, 0x0E84],
 206         [0x0E87, 0x0E88],
 207         [0x0E8A, 0x0E8A],
 208         [0x0E8D, 0x0E8D],
 209         [0x0E94, 0x0E97],
 210         [0x0E99, 0x0E9F],
 211         [0x0EA1, 0x0EA3],
 212         [0x0EA5, 0x0EA5],
 213         [0x0EA7, 0x0EA7],
 214         [0x0EAA, 0x0EAB],
 215         [0x0EAD, 0x0EAE],
 216         [0x0EB0, 0x0EB9],
 217         [0x0EBB, 0x0EBD],
 218         [0x0EC0, 0x0EC4],
 219         [0x0EC6, 0x0EC6],
 220         [0x0EC8, 0x0ECD],
 221         [0x0ED0, 0x0ED9],
 222         [0x0EDC, 0x0EDD],
 223         [0x0F00, 0x0F00],
 224         [0x0F18, 0x0F19],
 225         [0x0F20, 0x0F33],
 226         [0x0F35, 0x0F35],
 227         [0x0F37, 0x0F37],
 228         [0x0F39, 0x0F39],
 229         [0x0F3E, 0x0F47],
 230         [0x0F49, 0x0F69],
 231         [0x0F71, 0x0F84],
 232         [0x0F86, 0x0F8B],
 233         [0x0F90, 0x0F95],
 234         [0x0F97, 0x0F97],
 235         [0x0F99, 0x0FAD],
 236         [0x0FB1, 0x0FB7],
 237         [0x0FB9, 0x0FB9],
 238         [0x10A0, 0x10C5],
 239         [0x10D0, 0x10F6],
 240         [0x1E00, 0x1E9B],
 241         [0x1EA0, 0x1EF9],
 242         [0x1F00, 0x1F15],
 243         [0x1F18, 0x1F1D],
 244         [0x1F20, 0x1F45],
 245         [0x1F48, 0x1F4D],
 246         [0x1F50, 0x1F57],
 247         [0x1F59, 0x1F59],
 248         [0x1F5B, 0x1F5B],
 249         [0x1F5D, 0x1F5D],
 250         [0x1F5F, 0x1F7D],
 251         [0x1F80, 0x1FB4],
 252         [0x1FB6, 0x1FBC],
 253         [0x1FBE, 0x1FBE],
 254         [0x1FC2, 0x1FC4],
 255         [0x1FC6, 0x1FCC],
 256         [0x1FD0, 0x1FD3],
 257         [0x1FD6, 0x1FDB],
 258         [0x1FE0, 0x1FEC],
 259         [0x1FF2, 0x1FF4],
 260         [0x1FF6, 0x1FFC],
 261         [0x203F, 0x2040],
 262         [0x207F, 0x207F],
 263         [0x2102, 0x2102],
 264         [0x2107, 0x2107],
 265         [0x210A, 0x2113],
 266         [0x2115, 0x2115],
 267         [0x2118, 0x211D],
 268         [0x2124, 0x2124],
 269         [0x2126, 0x2126],
 270         [0x2128, 0x2128],
 271         [0x212A, 0x2131],
 272         [0x2133, 0x2138],
 273         [0x2160, 0x2182],
 274         [0x3005, 0x3007],
 275         [0x3021, 0x3029],
 276         [0x3041, 0x3093],
 277         [0x309B, 0x309C],
 278         [0x30A1, 0x30F6],
 279         [0x30FB, 0x30FC],
 280         [0x3105, 0x312C],
 281         [0x4E00, 0x9FA5],
 282         [0xAC00, 0xD7A3]
 283     ];
 284
 285     size_t high = ALPHA_TABLE.length - 1;
 286     // Shortcut search if c is out of range
 287     size_t low = (c < ALPHA_TABLE[0][0] || ALPHA_TABLE[high][1] < c) ? high + 1 : 0;
 288     // Binary search
 289     while (low <= high)
 290     {
 291         const size_t mid = low + ((high - low) >> 1);
 292         if (c < ALPHA_TABLE[mid][0])
 293             high = mid - 1;
 294         else if (ALPHA_TABLE[mid][1] < c)
 295             low = mid + 1;
 296         else
 297         {
 298             assert(ALPHA_TABLE[mid][0] <= c && c <= ALPHA_TABLE[mid][1]);
 299             return true;
 300         }
 301     }
 302     return false;
 303 }
 304
 305 /**
 306  * Returns the code length of c in code units.
 307  */
 308 int utf_codeLengthChar(dchar c)
 309 {
 310     if (c <= 0x7F)
 311         return 1;
 312     if (c <= 0x7FF)
 313         return 2;
 314     if (c <= 0xFFFF)
 315         return 3;
 316     if (c <= 0x10FFFF)
 317         return 4;
 318     assert(false);
 319 }
 320
 321 int utf_codeLengthWchar(dchar c)
 322 {
 323     return c <= 0xFFFF ? 1 : 2;
 324 }
 325
 326 /**
 327  * Returns the code length of c in code units for the encoding.
 328  * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32.
 329  */
 330 int utf_codeLength(int sz, dchar c)
 331 {
 332     if (sz == 1)
 333         return utf_codeLengthChar(c);
 334     if (sz == 2)
 335         return utf_codeLengthWchar(c);
 336     assert(sz == 4);
 337     return 1;
 338 }
 339
 340 void utf_encodeChar(char* s, dchar c) @system
 341 {
 342     assert(s !is null);
 343     assert(utf_isValidDchar(c));
 344     if (c <= 0x7F)
 345     {
 346         s[0] = cast(char)c;
 347     }
 348     else if (c <= 0x07FF)
 349     {
 350         s[0] = cast(char)(0xC0 | (c >> 6));
 351         s[1] = cast(char)(0x80 | (c & 0x3F));
 352     }
 353     else if (c <= 0xFFFF)
 354     {
 355         s[0] = cast(char)(0xE0 | (c >> 12));
 356         s[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
 357         s[2] = cast(char)(0x80 | (c & 0x3F));
 358     }
 359     else if (c <= 0x10FFFF)
 360     {
 361         s[0] = cast(char)(0xF0 | (c >> 18));
 362         s[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
 363         s[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
 364         s[3] = cast(char)(0x80 | (c & 0x3F));
 365     }
 366     else
 367         assert(0);
 368 }
 369
 370 void utf_encodeWchar(wchar* s, dchar c) @system
 371 {
 372     assert(s !is null);
 373     assert(utf_isValidDchar(c));
 374     if (c <= 0xFFFF)
 375     {
 376         s[0] = cast(wchar)c;
 377     }
 378     else
 379     {
 380         s[0] = cast(wchar)((((c - 0x010000) >> 10) & 0x03FF) + 0xD800);
 381         s[1] = cast(wchar)(((c - 0x010000) & 0x03FF) + 0xDC00);
 382     }
 383 }
 384
 385 void utf_encode(int sz, void* s, dchar c) @system
 386 {
 387     if (sz == 1)
 388         utf_encodeChar(cast(char*)s, c);
 389     else if (sz == 2)
 390         utf_encodeWchar(cast(wchar*)s, c);
 391     else
 392     {
 393         assert(sz == 4);
 394         *(cast(dchar*)s) = c;
 395     }
 396 }
 397
 398 /********************************************
 399  * Checks whether an Unicode code point is a bidirectional
 400  * control character.
 401  */
 402 bool isBidiControl(dchar c)
 403 {
 404     // Source: https://www.unicode.org/versions/Unicode15.0.0, table 23-3.
 405     switch(c)
 406     {
 407         case '\u061C':
 408         case '\u200E':
 409         case '\u200F':
 410         case '\u202A': .. case '\u202E':
 411         case '\u2066': .. case '\u2069':
 412             return true;
 413         default:
 414             return false;
 415     }
 416 }
 417
 418 /********************************************
 419  * Decode a UTF-8 sequence as a single UTF-32 code point.
 420  * Params:
 421  *      s = UTF-8 sequence
 422  *      ridx = starting index in s[], updated to reflect number of code units decoded
 423  *      rresult = set to character decoded
 424  * Returns:
 425  *      null on success, otherwise error message string
 426  */
 427 string utf_decodeChar(const(char)[] s, ref size_t ridx, out dchar rresult)
 428 {
 429     // UTF-8 decoding errors
 430     static immutable string UTF8_DECODE_OK = null; // no error
 431     static immutable string UTF8_DECODE_OUTSIDE_CODE_SPACE = "Outside Unicode code space";
 432     static immutable string UTF8_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-8 sequence";
 433     static immutable string UTF8_DECODE_OVERLONG = "Overlong UTF-8 sequence";
 434     static immutable string UTF8_DECODE_INVALID_TRAILER = "Invalid trailing code unit";
 435     static immutable string UTF8_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
 436
 437     /* The following encodings are valid, except for the 5 and 6 byte
 438      * combinations:
 439      *      0xxxxxxx
 440      *      110xxxxx 10xxxxxx
 441      *      1110xxxx 10xxxxxx 10xxxxxx
 442      *      11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 443      *      111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 444      *      1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 445      */
 446     static immutable ubyte[256] UTF8_STRIDE =
 447     [
 448         1,1,1,1, 1,1,1,1,
 449         1,1,1,1, 1,1,1,1,
 450         1,1,1,1, 1,1,1,1,
 451         1,1,1,1, 1,1,1,1,
 452         1,1,1,1, 1,1,1,1,
 453         1,1,1,1, 1,1,1,1,
 454         1,1,1,1, 1,1,1,1,
 455         1,1,1,1, 1,1,1,1,
 456
 457         1,1,1,1, 1,1,1,1,
 458         1,1,1,1, 1,1,1,1,
 459         1,1,1,1, 1,1,1,1,
 460         1,1,1,1, 1,1,1,1,
 461         1,1,1,1, 1,1,1,1,
 462         1,1,1,1, 1,1,1,1,
 463         1,1,1,1, 1,1,1,1,
 464         1,1,1,1, 1,1,1,1,
 465
 466         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 467         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 468         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 469         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 470         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 471         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 472         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 473         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
 474
 475         2,2,2,2, 2,2,2,2,
 476         2,2,2,2, 2,2,2,2,
 477         2,2,2,2, 2,2,2,2,
 478         2,2,2,2, 2,2,2,2,
 479
 480         3,3,3,3, 3,3,3,3,
 481         3,3,3,3, 3,3,3,3,
 482
 483         4,4,4,4, 4,4,4,4,
 484         5,5,5,5, 6,6,0xFF,0xFF
 485     ];
 486
 487     assert(s !is null);
 488     size_t i = ridx++;
 489
 490     const char u = s[i];
 491     // Pre-stage results for ASCII and error cases
 492     rresult = u;
 493     //printf("utf_decodeChar(s = %02x, %02x, %02x len = %d)\n", u, s[1], s[2], len);
 494     // Get expected sequence length
 495     const size_t n = UTF8_STRIDE[u];
 496     switch (n)
 497     {
 498     case 1:
 499         // ASCII
 500         return UTF8_DECODE_OK;
 501     case 2:
 502     case 3:
 503     case 4:
 504         // multi-byte UTF-8
 505         break;
 506     default:
 507         // 5- or 6-byte sequence
 508         return UTF8_DECODE_OUTSIDE_CODE_SPACE;
 509     }
 510     if (s.length < i + n) // source too short
 511         return UTF8_DECODE_TRUNCATED_SEQUENCE;
 512     // Pick off 7 - n low bits from first code unit
 513     dchar c = u & ((1 << (7 - n)) - 1);
 514     /* The following combinations are overlong, and illegal:
 515      *      1100000x (10xxxxxx)
 516      *      11100000 100xxxxx (10xxxxxx)
 517      *      11110000 1000xxxx (10xxxxxx 10xxxxxx)
 518      *      11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
 519      *      11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
 520      */
 521     const char u2 = s[++i];
 522     // overlong combination
 523     if ((u & 0xFE) == 0xC0 || (u == 0xE0 && (u2 & 0xE0) == 0x80) || (u == 0xF0 && (u2 & 0xF0) == 0x80) || (u == 0xF8 && (u2 & 0xF8) == 0x80) || (u == 0xFC && (u2 & 0xFC) == 0x80))
 524         return UTF8_DECODE_OVERLONG;
 525     // Decode remaining bits
 526     for (const m = n + i - 1; i != m; ++i)
 527     {
 528         const u3 = s[i];
 529         if ((u3 & 0xC0) != 0x80) // trailing bytes are 10xxxxxx
 530             return UTF8_DECODE_INVALID_TRAILER;
 531         c = (c << 6) | (u3 & 0x3F);
 532     }
 533     if (!utf_isValidDchar(c))
 534         return UTF8_DECODE_INVALID_CODE_POINT;
 535     ridx = i;
 536     rresult = c;
 537     return UTF8_DECODE_OK;
 538 }
 539
 540 /********************************************
 541  * Decode a UTF-16 sequence as a single UTF-32 code point.
 542  * Params:
 543  *      s = UTF-16 sequence
 544  *      ridx = starting index in s[], updated to reflect number of code units decoded
 545  *      rresult = set to character decoded
 546  * Returns:
 547  *      null on success, otherwise error message string
 548  */
 549 string utf_decodeWchar(const(wchar)[] s, ref size_t ridx, out dchar rresult)
 550 {
 551     // UTF-16 decoding errors
 552     static immutable string UTF16_DECODE_OK = null; // no error
 553     static immutable string UTF16_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-16 sequence";
 554     static immutable string UTF16_DECODE_INVALID_SURROGATE = "Invalid low surrogate";
 555     static immutable string UTF16_DECODE_UNPAIRED_SURROGATE = "Unpaired surrogate";
 556     static immutable string UTF16_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
 557
 558     assert(s !is null);
 559     size_t i = ridx++;
 560
 561     // Pre-stage results for single wchar and error cases
 562     dchar u = rresult = s[i];
 563     if (u < 0xD800) // Single wchar codepoint
 564         return UTF16_DECODE_OK;
 565     if (0xD800 <= u && u <= 0xDBFF) // Surrogate pair
 566     {
 567         if (s.length <= i + 1)
 568             return UTF16_DECODE_TRUNCATED_SEQUENCE;
 569         wchar u2 = s[i + 1];
 570         if (u2 < 0xDC00 || 0xDFFF < u)
 571             return UTF16_DECODE_INVALID_SURROGATE;
 572         u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
 573         ++ridx;
 574     }
 575     else if (0xDC00 <= u && u <= 0xDFFF)
 576         return UTF16_DECODE_UNPAIRED_SURROGATE;
 577     if (!utf_isValidDchar(u))
 578         return UTF16_DECODE_INVALID_CODE_POINT;
 579     rresult = u;
 580     return UTF16_DECODE_OK;
 581 }