netcore/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 using System.Buffers;
   6 using System.Buffers.Binary;
   7 using System.Diagnostics;
   8 using System.Numerics;
   9 using System.Runtime.Intrinsics.X86;
  10 using Internal.Runtime.CompilerServices;
  11
  12 #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
  13 #if BIT64
  14 using nint = System.Int64;
  15 using nuint = System.UInt64;
  16 #else // BIT64
  17 using nint = System.Int32;
  18 using nuint = System.UInt32;
  19 #endif // BIT64
  20
  21 namespace System.Text.Unicode
  22 {
  23     internal static unsafe partial class Utf8Utility
  24     {
  25 #if DEBUG
  26         static Utf8Utility()
  27         {
  28             Debug.Assert(sizeof(nint) == IntPtr.Size && nint.MinValue < 0, "nint is defined incorrectly.");
  29             Debug.Assert(sizeof(nuint) == IntPtr.Size && nuint.MinValue == 0, "nuint is defined incorrectly.");
  30
  31             _ValidateAdditionalNIntDefinitions();
  32         }
  33 #endif // DEBUG
  34
  35         // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where
  36         // the next byte would have been consumed from / the next char would have been written to.
  37         // inputLength in bytes, outputCharsRemaining in chars.
  38         public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLength, char* pOutputBuffer, int outputCharsRemaining, out byte* pInputBufferRemaining, out char* pOutputBufferRemaining)
  39         {
  40             Debug.Assert(inputLength >= 0, "Input length must not be negative.");
  41             Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null.");
  42
  43             Debug.Assert(outputCharsRemaining >= 0, "Destination length must not be negative.");
  44             Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null.");
  45
  46             // First, try vectorized conversion.
  47
  48             {
  49                 nuint numElementsConverted = ASCIIUtility.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining));
  50
  51                 pInputBuffer += numElementsConverted;
  52                 pOutputBuffer += numElementsConverted;
  53
  54                 // Quick check - did we just end up consuming the entire input buffer?
  55                 // If so, short-circuit the remainder of the method.
  56
  57                 if ((int)numElementsConverted == inputLength)
  58                 {
  59                     pInputBufferRemaining = pInputBuffer;
  60                     pOutputBufferRemaining = pOutputBuffer;
  61                     return OperationStatus.Done;
  62                 }
  63
  64                 inputLength -= (int)numElementsConverted;
  65                 outputCharsRemaining -= (int)numElementsConverted;
  66             }
  67
  68             if (inputLength < sizeof(uint))
  69             {
  70                 goto ProcessInputOfLessThanDWordSize;
  71             }
  72
  73             byte* pFinalPosWhereCanReadDWordFromInputBuffer = pInputBuffer + (uint)inputLength - 4;
  74
  75             // Begin the main loop.
  76
  77 #if DEBUG
  78             byte* pLastBufferPosProcessed = null; // used for invariant checking in debug builds
  79 #endif
  80
  81             while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer)
  82             {
  83                 // Read 32 bits at a time. This is enough to hold any possible UTF8-encoded scalar.
  84
  85                 uint thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
  86
  87             AfterReadDWord:
  88
  89 #if DEBUG
  90                 Debug.Assert(pLastBufferPosProcessed < pInputBuffer, "Algorithm should've made forward progress since last read.");
  91                 pLastBufferPosProcessed = pInputBuffer;
  92 #endif
  93                 // First, check for the common case of all-ASCII bytes.
  94
  95                 if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord))
  96                 {
  97                     // We read an all-ASCII sequence.
  98
  99                     if (outputCharsRemaining < sizeof(uint))
 100                     {
 101                         goto ProcessRemainingBytesSlow; // running out of space, but may be able to write some data
 102                     }
 103
 104                     Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord);
 105                     pInputBuffer += 4;
 106                     pOutputBuffer += 4;
 107                     outputCharsRemaining -= 4;
 108
 109                     // If we saw a sequence of all ASCII, there's a good chance a significant amount of following data is also ASCII.
 110                     // Below is basically unrolled loops with poor man's vectorization.
 111
 112                     uint remainingInputBytes = (uint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) + 4;
 113                     uint maxIters = Math.Min(remainingInputBytes, (uint)outputCharsRemaining) / (2 * sizeof(uint));
 114                     uint secondDWord;
 115                     int i;
 116                     for (i = 0; (uint)i < maxIters; i++)
 117                     {
 118                         // Reading two DWORDs in parallel benchmarked faster than reading a single QWORD.
 119
 120                         thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
 121                         secondDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer + sizeof(uint));
 122
 123                         if (!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord | secondDWord))
 124                         {
 125                             goto LoopTerminatedEarlyDueToNonAsciiData;
 126                         }
 127
 128                         pInputBuffer += 8;
 129
 130                         Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[0], thisDWord);
 131                         Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[4], secondDWord);
 132
 133                         pOutputBuffer += 8;
 134                     }
 135
 136                     outputCharsRemaining -= 8 * i;
 137
 138                     continue; // need to perform a bounds check because we might be running out of data
 139
 140                 LoopTerminatedEarlyDueToNonAsciiData:
 141
 142                     if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord))
 143                     {
 144                         // The first DWORD contained all-ASCII bytes, so expand it.
 145
 146                         Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord);
 147
 148                         // continue the outer loop from the second DWORD
 149
 150                         Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(secondDWord));
 151                         thisDWord = secondDWord;
 152
 153                         pInputBuffer += 4;
 154                         pOutputBuffer += 4;
 155                         outputCharsRemaining -= 4;
 156                     }
 157
 158                     outputCharsRemaining -= 8 * i;
 159
 160                     // We know that there's *at least* one DWORD of data remaining in the buffer.
 161                     // We also know that it's not all-ASCII. We can skip the logic at the beginning of the main loop.
 162
 163                     goto AfterReadDWordSkipAllBytesAsciiCheck;
 164                 }
 165
 166             AfterReadDWordSkipAllBytesAsciiCheck:
 167
 168                 Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)); // this should have been handled earlier
 169
 170                 // Next, try stripping off ASCII bytes one at a time.
 171                 // We only handle up to three ASCII bytes here since we handled the four ASCII byte case above.
 172
 173                 if (UInt32FirstByteIsAscii(thisDWord))
 174                 {
 175                     if (outputCharsRemaining >= 3)
 176                     {
 177                         // Fast-track: we don't need to check the destination length for subsequent
 178                         // ASCII bytes since we know we can write them all now.
 179
 180                         uint thisDWordLittleEndian = ToLittleEndian(thisDWord);
 181
 182                         nuint adjustment = 1;
 183                         pOutputBuffer[0] = (char)(byte)thisDWordLittleEndian;
 184
 185                         if (UInt32SecondByteIsAscii(thisDWord))
 186                         {
 187                             adjustment++;
 188                             thisDWordLittleEndian >>= 8;
 189                             pOutputBuffer[1] = (char)(byte)thisDWordLittleEndian;
 190
 191                             if (UInt32ThirdByteIsAscii(thisDWord))
 192                             {
 193                                 adjustment++;
 194                                 thisDWordLittleEndian >>= 8;
 195                                 pOutputBuffer[2] = (char)(byte)thisDWordLittleEndian;
 196                             }
 197                         }
 198
 199                         pInputBuffer += adjustment;
 200                         pOutputBuffer += adjustment;
 201                         outputCharsRemaining -= (int)adjustment;
 202                     }
 203                     else
 204                     {
 205                         // Slow-track: we need to make sure each individual write has enough
 206                         // of a buffer so that we don't overrun the destination.
 207
 208                         if (outputCharsRemaining == 0)
 209                         {
 210                             goto OutputBufferTooSmall;
 211                         }
 212
 213                         uint thisDWordLittleEndian = ToLittleEndian(thisDWord);
 214
 215                         pInputBuffer++;
 216                         *pOutputBuffer++ = (char)(byte)thisDWordLittleEndian;
 217                         outputCharsRemaining--;
 218
 219                         if (UInt32SecondByteIsAscii(thisDWord))
 220                         {
 221                             if (outputCharsRemaining == 0)
 222                             {
 223                                 goto OutputBufferTooSmall;
 224                             }
 225
 226                             pInputBuffer++;
 227                             thisDWordLittleEndian >>= 8;
 228                             *pOutputBuffer++ = (char)(byte)thisDWordLittleEndian;
 229
 230                             // We can perform a small optimization here. We know at this point that
 231                             // the output buffer is fully consumed (we read two ASCII bytes and wrote
 232                             // two ASCII chars, and we checked earlier that the destination buffer
 233                             // can't store a third byte). If the next byte is ASCII, we can jump straight
 234                             // to the return statement since the end-of-method logic only relies on the
 235                             // destination buffer pointer -- NOT the output chars remaining count -- being
 236                             // correct. If the next byte is not ASCII, we'll need to continue with the
 237                             // rest of the main loop, but we can set the buffer length directly to zero
 238                             // rather than decrementing it from 1 to 0.
 239
 240                             Debug.Assert(outputCharsRemaining == 1);
 241
 242                             if (UInt32ThirdByteIsAscii(thisDWord))
 243                             {
 244                                 goto OutputBufferTooSmall;
 245                             }
 246                             else
 247                             {
 248                                 outputCharsRemaining = 0;
 249                             }
 250                         }
 251                     }
 252
 253                     if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
 254                     {
 255                         goto ProcessRemainingBytesSlow; // input buffer doesn't contain enough data to read a DWORD
 256                     }
 257                     else
 258                     {
 259                         // The input buffer at the current offset contains a non-ASCII byte.
 260                         // Read an entire DWORD and fall through to multi-byte consumption logic.
 261                         thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
 262                     }
 263                 }
 264
 265             BeforeProcessTwoByteSequence:
 266
 267                 // At this point, we know we're working with a multi-byte code unit,
 268                 // but we haven't yet validated it.
 269
 270                 // The masks and comparands are derived from the Unicode Standard, Table 3-6.
 271                 // Additionally, we need to check for valid byte sequences per Table 3-7.
 272
 273                 // Check the 2-byte case.
 274
 275                 if (UInt32BeginsWithUtf8TwoByteMask(thisDWord))
 276                 {
 277                     // Per Table 3-7, valid sequences are:
 278                     // [ C2..DF ] [ 80..BF ]
 279
 280                     if (UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord))
 281                     {
 282                         goto Error;
 283                     }
 284
 285                 ProcessTwoByteSequenceSkipOverlongFormCheck:
 286
 287                     // Optimization: If this is a two-byte-per-character language like Cyrillic or Hebrew,
 288                     // there's a good chance that if we see one two-byte run then there's another two-byte
 289                     // run immediately after. Let's check that now.
 290
 291                     // On little-endian platforms, we can check for the two-byte UTF8 mask *and* validate that
 292                     // the value isn't overlong using a single comparison. On big-endian platforms, we'll need
 293                     // to validate the mask and validate that the sequence isn't overlong as two separate comparisons.
 294
 295                     if ((BitConverter.IsLittleEndian && UInt32EndsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord))
 296                         || (!BitConverter.IsLittleEndian && (UInt32EndsWithUtf8TwoByteMask(thisDWord) && !UInt32EndsWithOverlongUtf8TwoByteSequence(thisDWord))))
 297                     {
 298                         // We have two runs of two bytes each.
 299
 300                         if (outputCharsRemaining < 2)
 301                         {
 302                             goto ProcessRemainingBytesSlow; // running out of output buffer
 303                         }
 304
 305                         Unsafe.WriteUnaligned<uint>(pOutputBuffer, ExtractTwoCharsPackedFromTwoAdjacentTwoByteSequences(thisDWord));
 306
 307                         pInputBuffer += 4;
 308                         pOutputBuffer += 2;
 309                         outputCharsRemaining -= 2;
 310
 311                         if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer)
 312                         {
 313                             // Optimization: If we read a long run of two-byte sequences, the next sequence is probably
 314                             // also two bytes. Check for that first before going back to the beginning of the loop.
 315
 316                             thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
 317
 318                             if (BitConverter.IsLittleEndian)
 319                             {
 320                                 if (UInt32BeginsWithValidUtf8TwoByteSequenceLittleEndian(thisDWord))
 321                                 {
 322                                     // The next sequence is a valid two-byte sequence.
 323                                     goto ProcessTwoByteSequenceSkipOverlongFormCheck;
 324                                 }
 325                             }
 326                             else
 327                             {
 328                                 if (UInt32BeginsWithUtf8TwoByteMask(thisDWord))
 329                                 {
 330                                     if (UInt32BeginsWithOverlongUtf8TwoByteSequence(thisDWord))
 331                                     {
 332                                         goto Error; // The next sequence purports to be a 2-byte sequence but is overlong.
 333                                     }
 334
 335                                     goto ProcessTwoByteSequenceSkipOverlongFormCheck;
 336                                 }
 337                             }
 338
 339                             // If we reached this point, the next sequence is something other than a valid
 340                             // two-byte sequence, so go back to the beginning of the loop.
 341                             goto AfterReadDWord;
 342                         }
 343                         else
 344                         {
 345                             goto ProcessRemainingBytesSlow; // Running out of data - go down slow path
 346                         }
 347                     }
 348
 349                     // The buffer contains a 2-byte sequence followed by 2 bytes that aren't a 2-byte sequence.
 350                     // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining
 351                     // bytes are ASCII?
 352
 353                     uint charToWrite = ExtractCharFromFirstTwoByteSequence(thisDWord); // optimistically compute this now, but don't store until we know dest is large enough
 354
 355                     if (UInt32ThirdByteIsAscii(thisDWord))
 356                     {
 357                         if (UInt32FourthByteIsAscii(thisDWord))
 358                         {
 359                             if (outputCharsRemaining < 3)
 360                             {
 361                                 goto ProcessRemainingBytesSlow; // running out of output buffer
 362                             }
 363
 364                             pOutputBuffer[0] = (char)charToWrite;
 365                             if (BitConverter.IsLittleEndian)
 366                             {
 367                                 thisDWord >>= 16;
 368                                 pOutputBuffer[1] = (char)(byte)thisDWord;
 369                                 thisDWord >>= 8;
 370                                 pOutputBuffer[2] = (char)thisDWord;
 371                             }
 372                             else
 373                             {
 374                                 pOutputBuffer[2] = (char)(byte)thisDWord;
 375                                 pOutputBuffer[1] = (char)(byte)(thisDWord >> 8);
 376                             }
 377                             pInputBuffer += 4;
 378                             pOutputBuffer += 3;
 379                             outputCharsRemaining -= 3;
 380
 381                             continue; // go back to original bounds check and check for ASCII
 382                         }
 383                         else
 384                         {
 385                             if (outputCharsRemaining < 2)
 386                             {
 387                                 goto ProcessRemainingBytesSlow; // running out of output buffer
 388                             }
 389
 390                             pOutputBuffer[0] = (char)charToWrite;
 391                             pOutputBuffer[1] = (char)(byte)(thisDWord >> (BitConverter.IsLittleEndian ? 16 : 8));
 392                             pInputBuffer += 3;
 393                             pOutputBuffer += 2;
 394                             outputCharsRemaining -= 2;
 395
 396                             // A two-byte sequence followed by an ASCII byte followed by a non-ASCII byte.
 397                             // Read in the next DWORD and jump directly to the start of the multi-byte processing block.
 398
 399                             if (pFinalPosWhereCanReadDWordFromInputBuffer < pInputBuffer)
 400                             {
 401                                 goto ProcessRemainingBytesSlow; // Running out of data - go down slow path
 402                             }
 403                             else
 404                             {
 405                                 thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
 406                                 goto BeforeProcessTwoByteSequence;
 407                             }
 408                         }
 409                     }
 410                     else
 411                     {
 412                         if (outputCharsRemaining == 0)
 413                         {
 414                             goto ProcessRemainingBytesSlow; // running out of output buffer
 415                         }
 416
 417                         pOutputBuffer[0] = (char)charToWrite;
 418                         pInputBuffer += 2;
 419                         pOutputBuffer++;
 420                         outputCharsRemaining--;
 421
 422                         if (pFinalPosWhereCanReadDWordFromInputBuffer < pInputBuffer)
 423                         {
 424                             goto ProcessRemainingBytesSlow; // Running out of data - go down slow path
 425                         }
 426                         else
 427                         {
 428                             thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
 429                             goto BeforeProcessThreeByteSequence; // we know the next byte isn't ASCII, and it's not the start of a 2-byte sequence (this was checked above)
 430                         }
 431                     }
 432                 }
 433
 434             // Check the 3-byte case.
 435
 436             BeforeProcessThreeByteSequence:
 437
 438                 if (UInt32BeginsWithUtf8ThreeByteMask(thisDWord))
 439                 {
 440                 ProcessThreeByteSequenceWithCheck:
 441
 442                     // We need to check for overlong or surrogate three-byte sequences.
 443                     //
 444                     // Per Table 3-7, valid sequences are:
 445                     // [   E0   ] [ A0..BF ] [ 80..BF ]
 446                     // [ E1..EC ] [ 80..BF ] [ 80..BF ]
 447                     // [   ED   ] [ 80..9F ] [ 80..BF ]
 448                     // [ EE..EF ] [ 80..BF ] [ 80..BF ]
 449                     //
 450                     // Big-endian examples of using the above validation table:
 451                     // E0A0 = 1110 0000 1010 0000 => invalid (overlong ) patterns are 1110 0000 100# ####
 452                     // ED9F = 1110 1101 1001 1111 => invalid (surrogate) patterns are 1110 1101 101# ####
 453                     // If using the bitmask ......................................... 0000 1111 0010 0000 (=0F20),
 454                     // Then invalid (overlong) patterns match the comparand ......... 0000 0000 0000 0000 (=0000),
 455                     // And invalid (surrogate) patterns match the comparand ......... 0000 1101 0010 0000 (=0D20).
 456
 457                     if (BitConverter.IsLittleEndian)
 458                     {
 459                         // The "overlong or surrogate" check can be implemented using a single jump, but there's
 460                         // some overhead to moving the bits into the correct locations in order to perform the
 461                         // correct comparison, and in practice the processor's branch prediction capability is
 462                         // good enough that we shouldn't bother. So we'll use two jumps instead.
 463
 464                         // Can't extract this check into its own helper method because JITter produces suboptimal
 465                         // assembly, even with aggressive inlining.
 466
 467                         // Code below becomes 5 instructions: test, jz, lea, test, jz
 468
 469                         if (((thisDWord & 0x0000_200Fu) == 0) || (((thisDWord - 0x0000_200Du) & 0x0000_200Fu) == 0))
 470                         {
 471                             goto Error; // overlong or surrogate
 472                         }
 473                     }
 474                     else
 475                     {
 476                         if (((thisDWord & 0x0F20_0000u) == 0) || (((thisDWord - 0x0D20_0000u) & 0x0F20_0000u) == 0))
 477                         {
 478                             goto Error; // overlong or surrogate
 479                         }
 480                     }
 481
 482                     // At this point, we know the incoming scalar is well-formed.
 483
 484                     if (outputCharsRemaining == 0)
 485                     {
 486                         goto OutputBufferTooSmall; // not enough space in the destination buffer to write
 487                     }
 488
 489                     // As an optimization, on compatible platforms check if a second three-byte sequence immediately
 490                     // follows the one we just read, and if so use BSWAP and BMI2 to extract them together.
 491
 492                     if (Bmi2.X64.IsSupported)
 493                     {
 494                         Debug.Assert(BitConverter.IsLittleEndian, "BMI2 requires little-endian.");
 495
 496                         // First, check that the leftover byte from the original DWORD is in the range [ E0..EF ], which
 497                         // would indicate the potential start of a second three-byte sequence.
 498
 499                         if (((thisDWord - 0xE000_0000u) & 0xF000_0000u) == 0)
 500                         {
 501                             // The const '3' below is correct because pFinalPosWhereCanReadDWordFromInputBuffer represents
 502                             // the final place where we can safely perform a DWORD read, and we want to probe whether it's
 503                             // safe to read a DWORD beginning at address &pInputBuffer[3].
 504
 505                             if (outputCharsRemaining > 1 && (nint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) >= 3)
 506                             {
 507                                 // We're going to attempt to read a second 3-byte sequence and write them both out simultaneously using PEXT.
 508                                 // We need to check the continuation bit mask on the remaining two bytes (and we may as well check the leading
 509                                 // byte mask again since it's free), then perform overlong + surrogate checks. If the overlong or surrogate
 510                                 // checks fail, we'll fall through to the remainder of the logic which will transcode the original valid
 511                                 // 3-byte UTF-8 sequence we read; and on the next iteration of the loop the validation routine will run again,
 512                                 // fail, and redirect control flow to the error handling logic at the very end of this method.
 513
 514                                 uint secondDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer + 3);
 515
 516                                 if (UInt32BeginsWithUtf8ThreeByteMask(secondDWord)
 517                                     && ((secondDWord & 0x0000_200Fu) != 0)
 518                                     && (((secondDWord - 0x0000_200Du) & 0x0000_200Fu) != 0))
 519                                 {
 520                                     // combinedQWord = [ 1110ZZZZ 10YYYYYY 10XXXXXX ######## | 1110zzzz 10yyyyyy 10xxxxxx ######## ], where xyz are from first DWORD, XYZ are from second DWORD
 521                                     ulong combinedQWord = ((ulong)BinaryPrimitives.ReverseEndianness(secondDWord) << 32) | BinaryPrimitives.ReverseEndianness(thisDWord);
 522                                     thisDWord = secondDWord; // store this value in the correct local for the ASCII drain logic
 523
 524                                     // extractedQWord = [ 00000000 00000000 00000000 00000000 | ZZZZYYYYYYXXXXXX zzzzyyyyyyxxxxxx ]
 525                                     ulong extractedQWord = Bmi2.X64.ParallelBitExtract(combinedQWord, 0x0F3F3F00_0F3F3F00ul);
 526
 527                                     Unsafe.WriteUnaligned<uint>(pOutputBuffer, (uint)extractedQWord);
 528                                     pInputBuffer += 6;
 529                                     pOutputBuffer += 2;
 530                                     outputCharsRemaining -= 2;
 531
 532                                     // Drain any ASCII data following the second three-byte sequence.
 533
 534                                     goto CheckForAsciiByteAfterThreeByteSequence;
 535                                 }
 536                             }
 537                         }
 538                     }
 539
 540                     // Couldn't extract 2x three-byte sequences together, just do this one by itself.
 541
 542                     *pOutputBuffer = (char)ExtractCharFromFirstThreeByteSequence(thisDWord);
 543                     pInputBuffer += 3;
 544                     pOutputBuffer++;
 545                     outputCharsRemaining--;
 546
 547                 CheckForAsciiByteAfterThreeByteSequence:
 548
 549                     // Occasionally one-off ASCII characters like spaces, periods, or newlines will make their way
 550                     // in to the text. If this happens strip it off now before seeing if the next character
 551                     // consists of three code units.
 552
 553                     if (UInt32FourthByteIsAscii(thisDWord))
 554                     {
 555                         if (outputCharsRemaining == 0)
 556                         {
 557                             goto OutputBufferTooSmall;
 558                         }
 559
 560                         if (BitConverter.IsLittleEndian)
 561                         {
 562                             *pOutputBuffer = (char)(thisDWord >> 24);
 563                         }
 564                         else
 565                         {
 566                             *pOutputBuffer = (char)(byte)thisDWord;
 567                         }
 568
 569                         pInputBuffer++;
 570                         pOutputBuffer++;
 571                         outputCharsRemaining--;
 572                     }
 573
 574                     if (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer)
 575                     {
 576                         thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
 577
 578                         // Optimization: A three-byte character could indicate CJK text, which makes it likely
 579                         // that the character following this one is also CJK. We'll check for a three-byte sequence
 580                         // marker now and jump directly to three-byte sequence processing if we see one, skipping
 581                         // all of the logic at the beginning of the loop.
 582
 583                         if (UInt32BeginsWithUtf8ThreeByteMask(thisDWord))
 584                         {
 585                             goto ProcessThreeByteSequenceWithCheck; // found a three-byte sequence marker; validate and consume
 586                         }
 587                         else
 588                         {
 589                             goto AfterReadDWord; // probably ASCII punctuation or whitespace
 590                         }
 591                     }
 592                     else
 593                     {
 594                         goto ProcessRemainingBytesSlow; // Running out of data - go down slow path
 595                     }
 596                 }
 597
 598                 // Assume the 4-byte case, but we need to validate.
 599
 600                 {
 601                     // We need to check for overlong or invalid (over U+10FFFF) four-byte sequences.
 602                     //
 603                     // Per Table 3-7, valid sequences are:
 604                     // [   F0   ] [ 90..BF ] [ 80..BF ] [ 80..BF ]
 605                     // [ F1..F3 ] [ 80..BF ] [ 80..BF ] [ 80..BF ]
 606                     // [   F4   ] [ 80..8F ] [ 80..BF ] [ 80..BF ]
 607
 608                     if (!UInt32BeginsWithUtf8FourByteMask(thisDWord))
 609                     {
 610                         goto Error;
 611                     }
 612
 613                     // Now check for overlong / out-of-range sequences.
 614
 615                     if (BitConverter.IsLittleEndian)
 616                     {
 617                         // The DWORD we read is [ 10xxxxxx 10yyyyyy 10zzzzzz 11110www ].
 618                         // We want to get the 'w' byte in front of the 'z' byte so that we can perform
 619                         // a single range comparison. We'll take advantage of the fact that the JITter
 620                         // can detect a ROR / ROL operation, then we'll just zero out the bytes that
 621                         // aren't involved in the range check.
 622
 623                         uint toCheck = thisDWord & 0x0000_FFFFu;
 624
 625                         // At this point, toCheck = [ 00000000 00000000 10zzzzzz 11110www ].
 626
 627                         toCheck = BitOperations.RotateRight(toCheck, 8);
 628
 629                         // At this point, toCheck = [ 11110www 00000000 00000000 10zzzzzz ].
 630
 631                         if (!UnicodeUtility.IsInRangeInclusive(toCheck, 0xF000_0090u, 0xF400_008Fu))
 632                         {
 633                             goto Error;
 634                         }
 635                     }
 636                     else
 637                     {
 638                         if (!UnicodeUtility.IsInRangeInclusive(thisDWord, 0xF090_0000u, 0xF48F_FFFFu))
 639                         {
 640                             goto Error;
 641                         }
 642                     }
 643
 644                     // Validation complete.
 645
 646                     if (outputCharsRemaining < 2)
 647                     {
 648                         // There's no point to falling back to the "drain the input buffer" logic, since we know
 649                         // we can't write anything to the destination. So we'll just exit immediately.
 650                         goto OutputBufferTooSmall;
 651                     }
 652
 653                     Unsafe.WriteUnaligned<uint>(pOutputBuffer, ExtractCharsFromFourByteSequence(thisDWord));
 654
 655                     pInputBuffer += 4;
 656                     pOutputBuffer += 2;
 657                     outputCharsRemaining -= 2;
 658
 659                     continue; // go back to beginning of loop for processing
 660                 }
 661             }
 662
 663         ProcessRemainingBytesSlow:
 664             inputLength = (int)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) + 4;
 665
 666         ProcessInputOfLessThanDWordSize:
 667             while (inputLength > 0)
 668             {
 669                 uint firstByte = pInputBuffer[0];
 670                 if (firstByte <= 0x7Fu)
 671                 {
 672                     if (outputCharsRemaining == 0)
 673                     {
 674                         goto OutputBufferTooSmall; // we have no hope of writing anything to the output
 675                     }
 676
 677                     // 1-byte (ASCII) case
 678                     *pOutputBuffer = (char)firstByte;
 679
 680                     pInputBuffer++;
 681                     pOutputBuffer++;
 682                     inputLength--;
 683                     outputCharsRemaining--;
 684                     continue;
 685                 }
 686
 687                 // Potentially the start of a multi-byte sequence?
 688
 689                 firstByte -= 0xC2u;
 690                 if ((byte)firstByte <= (0xDFu - 0xC2u))
 691                 {
 692                     // Potentially a 2-byte sequence?
 693                     if (inputLength < 2)
 694                     {
 695                         goto InputBufferTooSmall; // out of data
 696                     }
 697
 698                     uint secondByte = pInputBuffer[1];
 699                     if (!IsLowByteUtf8ContinuationByte(secondByte))
 700                     {
 701                         goto Error; // 2-byte marker not followed by continuation byte
 702                     }
 703
 704                     if (outputCharsRemaining == 0)
 705                     {
 706                         goto OutputBufferTooSmall; // we have no hope of writing anything to the output
 707                     }
 708
 709                     uint asChar = (firstByte << 6) + secondByte + ((0xC2u - 0xC0u) << 6) - 0x80u; // remove UTF-8 markers from scalar
 710                     *pOutputBuffer = (char)asChar;
 711
 712                     pInputBuffer += 2;
 713                     pOutputBuffer++;
 714                     inputLength -= 2;
 715                     outputCharsRemaining--;
 716                     continue;
 717                 }
 718                 else if ((byte)firstByte <= (0xEFu - 0xC2u))
 719                 {
 720                     // Potentially a 3-byte sequence?
 721                     if (inputLength >= 3)
 722                     {
 723                         uint secondByte = pInputBuffer[1];
 724                         uint thirdByte = pInputBuffer[2];
 725                         if (!IsLowByteUtf8ContinuationByte(secondByte) || !IsLowByteUtf8ContinuationByte(thirdByte))
 726                         {
 727                             goto Error; // 3-byte marker not followed by 2 continuation bytes
 728                         }
 729
 730                         // To speed up the validation logic below, we're not going to remove the UTF-8 markers from the partial char just yet.
 731                         // We account for this in the comparisons below.
 732
 733                         uint partialChar = (firstByte << 12) + (secondByte << 6);
 734                         if (partialChar < ((0xE0u - 0xC2u) << 12) + (0xA0u << 6))
 735                         {
 736                             goto Error; // this is an overlong encoding; fail
 737                         }
 738
 739                         partialChar -= ((0xEDu - 0xC2u) << 12) + (0xA0u << 6); // if partialChar = 0, we're at beginning of UTF-16 surrogate code point range
 740                         if (partialChar < 0x0800u /* number of code points in UTF-16 surrogate code point range */)
 741                         {
 742                             goto Error; // attempted to encode a UTF-16 surrogate code point; fail
 743                         }
 744
 745                         if (outputCharsRemaining == 0)
 746                         {
 747                             goto OutputBufferTooSmall; // we have no hope of writing anything to the output
 748                         }
 749
 750                         // Now restore the full scalar value.
 751
 752                         partialChar += thirdByte;
 753                         partialChar += 0xD800; // undo "move to beginning of UTF-16 surrogate code point range" from earlier, fold it with later adds
 754                         partialChar -= 0x80u; // remove third byte continuation marker
 755
 756                         *pOutputBuffer = (char)partialChar;
 757
 758                         pInputBuffer += 3;
 759                         pOutputBuffer++;
 760                         inputLength -= 3;
 761                         outputCharsRemaining--;
 762                         continue;
 763                     }
 764                     else if (inputLength >= 2)
 765                     {
 766                         uint secondByte = pInputBuffer[1];
 767                         if (!IsLowByteUtf8ContinuationByte(secondByte))
 768                         {
 769                             goto Error; // 3-byte marker not followed by continuation byte
 770                         }
 771
 772                         // We can't build up the entire scalar value now, but we can check for overlong / surrogate representations
 773                         // from just the first two bytes.
 774
 775                         uint partialChar = (firstByte << 6) + secondByte; // don't worry about fixing up the UTF-8 markers; we'll account for it in the below comparison
 776                         if (partialChar < ((0xE0u - 0xC2u) << 6) + 0xA0u)
 777                         {
 778                             goto Error; // failed overlong check
 779                         }
 780                         if (UnicodeUtility.IsInRangeInclusive(partialChar, ((0xEDu - 0xC2u) << 6) + 0xA0u, ((0xEEu - 0xC2u) << 6) + 0x7Fu))
 781                         {
 782                             goto Error; // failed surrogate check
 783                         }
 784                     }
 785
 786                     goto InputBufferTooSmall; // out of data
 787                 }
 788                 else if ((byte)firstByte <= (0xF4u - 0xC2u))
 789                 {
 790                     // Potentially a 4-byte sequence?
 791
 792                     if (inputLength < 2)
 793                     {
 794                         goto InputBufferTooSmall; // ran out of data
 795                     }
 796
 797                     uint nextByte = pInputBuffer[1];
 798                     if (!IsLowByteUtf8ContinuationByte(nextByte))
 799                     {
 800                         goto Error; // 4-byte marker not followed by a continuation byte
 801                     }
 802
 803                     uint asPartialChar = (firstByte << 6) + nextByte; // don't worry about fixing up the UTF-8 markers; we'll account for it in the below comparison
 804                     if (!UnicodeUtility.IsInRangeInclusive(asPartialChar, ((0xF0u - 0xC2u) << 6) + 0x90u, ((0xF4u - 0xC2u) << 6) + 0x8Fu))
 805                     {
 806                         goto Error; // failed overlong / out-of-range check
 807                     }
 808
 809                     if (inputLength < 3)
 810                     {
 811                         goto InputBufferTooSmall; // ran out of data
 812                     }
 813
 814                     if (!IsLowByteUtf8ContinuationByte(pInputBuffer[2]))
 815                     {
 816                         goto Error; // third byte in 4-byte sequence not a continuation byte
 817                     }
 818
 819                     if (inputLength < 4)
 820                     {
 821                         goto InputBufferTooSmall; // ran out of data
 822                     }
 823
 824                     if (!IsLowByteUtf8ContinuationByte(pInputBuffer[3]))
 825                     {
 826                         goto Error; // fourth byte in 4-byte sequence not a continuation byte
 827                     }
 828
 829                     // If we read a valid astral scalar value, the only way we could've fallen down this code path
 830                     // is that we didn't have enough output buffer to write the result.
 831
 832                     goto OutputBufferTooSmall;
 833                 }
 834                 else
 835                 {
 836                     goto Error; // didn't begin with [ C2 .. F4 ], so invalid multi-byte sequence header byte
 837                 }
 838             }
 839
 840             OperationStatus retVal = OperationStatus.Done;
 841             goto ReturnCommon;
 842
 843         InputBufferTooSmall:
 844             retVal = OperationStatus.NeedMoreData;
 845             goto ReturnCommon;
 846
 847         OutputBufferTooSmall:
 848             retVal = OperationStatus.DestinationTooSmall;
 849             goto ReturnCommon;
 850
 851         Error:
 852             retVal = OperationStatus.InvalidData;
 853             goto ReturnCommon;
 854
 855         ReturnCommon:
 856             pInputBufferRemaining = pInputBuffer;
 857             pOutputBufferRemaining = pOutputBuffer;
 858             return retVal;
 859         }
 860
 861         // On method return, pInputBufferRemaining and pOutputBufferRemaining will both point to where
 862         // the next char would have been consumed from / the next byte would have been written to.
 863         // inputLength in chars, outputBytesRemaining in bytes.
 864         public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLength, byte* pOutputBuffer, int outputBytesRemaining, out char* pInputBufferRemaining, out byte* pOutputBufferRemaining)
 865         {
 866             const int CharsPerDWord = sizeof(uint) / sizeof(char);
 867
 868             Debug.Assert(inputLength >= 0, "Input length must not be negative.");
 869             Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null.");
 870
 871             Debug.Assert(outputBytesRemaining >= 0, "Destination length must not be negative.");
 872             Debug.Assert(pOutputBuffer != null || outputBytesRemaining == 0, "Destination length must be zero if destination buffer pointer is null.");
 873
 874             // First, try vectorized conversion.
 875
 876             {
 877                 nuint numElementsConverted = ASCIIUtility.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining));
 878
 879                 pInputBuffer += numElementsConverted;
 880                 pOutputBuffer += numElementsConverted;
 881
 882                 // Quick check - did we just end up consuming the entire input buffer?
 883                 // If so, short-circuit the remainder of the method.
 884
 885                 if ((int)numElementsConverted == inputLength)
 886                 {
 887                     pInputBufferRemaining = pInputBuffer;
 888                     pOutputBufferRemaining = pOutputBuffer;
 889                     return OperationStatus.Done;
 890                 }
 891
 892                 inputLength -= (int)numElementsConverted;
 893                 outputBytesRemaining -= (int)numElementsConverted;
 894             }
 895
 896             if (inputLength < CharsPerDWord)
 897             {
 898                 goto ProcessInputOfLessThanDWordSize;
 899             }
 900
 901             char* pFinalPosWhereCanReadDWordFromInputBuffer = pInputBuffer + (uint)inputLength - CharsPerDWord;
 902
 903             // Begin the main loop.
 904
 905 #if DEBUG
 906             char* pLastBufferPosProcessed = null; // used for invariant checking in debug builds
 907 #endif
 908
 909             uint thisDWord;
 910
 911             while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer)
 912             {
 913                 // Read 32 bits at a time. This is enough to hold any possible UTF16-encoded scalar.
 914
 915                 thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
 916
 917             AfterReadDWord:
 918
 919 #if DEBUG
 920                 Debug.Assert(pLastBufferPosProcessed < pInputBuffer, "Algorithm should've made forward progress since last read.");
 921                 pLastBufferPosProcessed = pInputBuffer;
 922 #endif
 923
 924                 // First, check for the common case of all-ASCII chars.
 925
 926                 if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord))
 927                 {
 928                     // We read an all-ASCII sequence (2 chars).
 929
 930                     if (outputBytesRemaining < 2)
 931                     {
 932                         goto ProcessOneCharFromCurrentDWordAndFinish; // running out of space, but may be able to write some data
 933                     }
 934
 935                     // The high WORD of the local declared below might be populated with garbage
 936                     // as a result of our shifts below, but that's ok since we're only going to
 937                     // write the low WORD.
 938                     //
 939                     // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ]
 940                     // (Same logic works regardless of endianness.)
 941                     uint valueToWrite = thisDWord | (thisDWord >> 8);
 942
 943                     Unsafe.WriteUnaligned<ushort>(pOutputBuffer, (ushort)valueToWrite);
 944
 945                     pInputBuffer += 2;
 946                     pOutputBuffer += 2;
 947                     outputBytesRemaining -= 2;
 948
 949                     // If we saw a sequence of all ASCII, there's a good chance a significant amount of following data is also ASCII.
 950                     // Below is basically unrolled loops with poor man's vectorization.
 951
 952                     uint inputCharsRemaining = (uint)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + 2;
 953                     uint minElementsRemaining = (uint)Math.Min(inputCharsRemaining, outputBytesRemaining);
 954
 955                     if (Bmi2.X64.IsSupported)
 956                     {
 957                         Debug.Assert(BitConverter.IsLittleEndian, "BMI2 requires little-endian.");
 958                         const ulong PEXT_MASK = 0x00FF00FF_00FF00FFul;
 959
 960                         // Try reading and writing 8 elements per iteration.
 961                         uint maxIters = minElementsRemaining / 8;
 962                         ulong firstQWord, secondQWord;
 963                         int i;
 964                         for (i = 0; (uint)i < maxIters; i++)
 965                         {
 966                             firstQWord = Unsafe.ReadUnaligned<ulong>(pInputBuffer);
 967                             secondQWord = Unsafe.ReadUnaligned<ulong>(pInputBuffer + 4);
 968
 969                             if (!Utf16Utility.AllCharsInUInt64AreAscii(firstQWord | secondQWord))
 970                             {
 971                                 goto LoopTerminatedDueToNonAsciiData;
 972                             }
 973
 974                             Unsafe.WriteUnaligned<uint>(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(firstQWord, PEXT_MASK));
 975                             Unsafe.WriteUnaligned<uint>(pOutputBuffer + 4, (uint)Bmi2.X64.ParallelBitExtract(secondQWord, PEXT_MASK));
 976
 977                             pInputBuffer += 8;
 978                             pOutputBuffer += 8;
 979                         }
 980
 981                         outputBytesRemaining -= 8 * i;
 982
 983                         // Can we perform one more iteration, but reading & writing 4 elements instead of 8?
 984
 985                         if ((minElementsRemaining & 4) != 0)
 986                         {
 987                             secondQWord = Unsafe.ReadUnaligned<ulong>(pInputBuffer);
 988
 989                             if (!Utf16Utility.AllCharsInUInt64AreAscii(secondQWord))
 990                             {
 991                                 goto LoopTerminatedDueToNonAsciiDataInSecondQWord;
 992                             }
 993
 994                             Unsafe.WriteUnaligned<uint>(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(secondQWord, PEXT_MASK));
 995
 996                             pInputBuffer += 4;
 997                             pOutputBuffer += 4;
 998                             outputBytesRemaining -= 4;
 999                         }
1000
1001                         continue; // Go back to beginning of main loop, read data, check for ASCII
1002
1003                     LoopTerminatedDueToNonAsciiData:
1004
1005                         outputBytesRemaining -= 8 * i;
1006
1007                         // First, see if we can drain any ASCII data from the first QWORD.
1008
1009                         if (Utf16Utility.AllCharsInUInt64AreAscii(firstQWord))
1010                         {
1011                             Unsafe.WriteUnaligned<uint>(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(firstQWord, PEXT_MASK));
1012                             pInputBuffer += 4;
1013                             pOutputBuffer += 4;
1014                             outputBytesRemaining -= 4;
1015                         }
1016                         else
1017                         {
1018                             secondQWord = firstQWord;
1019                         }
1020
1021                     LoopTerminatedDueToNonAsciiDataInSecondQWord:
1022
1023                         Debug.Assert(!Utf16Utility.AllCharsInUInt64AreAscii(secondQWord)); // this condition should've been checked earlier
1024
1025                         thisDWord = (uint)secondQWord;
1026                         if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord))
1027                         {
1028                             // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ]
1029                             Unsafe.WriteUnaligned<ushort>(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8)));
1030                             pInputBuffer += 2;
1031                             pOutputBuffer += 2;
1032                             outputBytesRemaining -= 2;
1033                             thisDWord = (uint)(secondQWord >> 32);
1034                         }
1035
1036                         goto AfterReadDWordSkipAllCharsAsciiCheck;
1037                     }
1038                     else
1039                     {
1040                         // Can't use BMI2 x64, so we'll only read and write 4 elements per iteration.
1041                         uint maxIters = minElementsRemaining / 4;
1042                         uint secondDWord;
1043                         int i;
1044                         for (i = 0; (uint)i < maxIters; i++)
1045                         {
1046                             thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
1047                             secondDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer + 2);
1048
1049                             if (!Utf16Utility.AllCharsInUInt32AreAscii(thisDWord | secondDWord))
1050                             {
1051                                 goto LoopTerminatedDueToNonAsciiData;
1052                             }
1053
1054                             // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ]
1055                             // (Same logic works regardless of endianness.)
1056                             Unsafe.WriteUnaligned<ushort>(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8)));
1057                             Unsafe.WriteUnaligned<ushort>(pOutputBuffer + 2, (ushort)(secondDWord | (secondDWord >> 8)));
1058
1059                             pInputBuffer += 4;
1060                             pOutputBuffer += 4;
1061                         }
1062
1063                         outputBytesRemaining -= 4 * i;
1064
1065                         continue; // Go back to beginning of main loop, read data, check for ASCII
1066
1067                     LoopTerminatedDueToNonAsciiData:
1068
1069                         outputBytesRemaining -= 4 * i;
1070
1071                         // First, see if we can drain any ASCII data from the first DWORD.
1072
1073                         if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord))
1074                         {
1075                             // [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ]
1076                             // (Same logic works regardless of endianness.)
1077                             Unsafe.WriteUnaligned<ushort>(pOutputBuffer, (ushort)(thisDWord | (thisDWord >> 8)));
1078                             pInputBuffer += 2;
1079                             pOutputBuffer += 2;
1080                             outputBytesRemaining -= 2;
1081                             thisDWord = secondDWord;
1082                         }
1083
1084                         goto AfterReadDWordSkipAllCharsAsciiCheck;
1085                     }
1086                 }
1087
1088             AfterReadDWordSkipAllCharsAsciiCheck:
1089
1090                 Debug.Assert(!Utf16Utility.AllCharsInUInt32AreAscii(thisDWord)); // this should have been handled earlier
1091
1092                 // Next, try stripping off the first ASCII char if it exists.
1093                 // We don't check for a second ASCII char since that should have been handled above.
1094
1095                 if (IsFirstCharAscii(thisDWord))
1096                 {
1097                     if (outputBytesRemaining == 0)
1098                     {
1099                         goto OutputBufferTooSmall;
1100                     }
1101
1102                     if (BitConverter.IsLittleEndian)
1103                     {
1104                         pOutputBuffer[0] = (byte)thisDWord; // extract [ ## ## 00 AA ]
1105                     }
1106                     else
1107                     {
1108                         pOutputBuffer[0] = (byte)(thisDWord >> 24); // extract [ AA 00 ## ## ]
1109                     }
1110
1111                     pInputBuffer++;
1112                     pOutputBuffer++;
1113                     outputBytesRemaining--;
1114
1115                     if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
1116                     {
1117                         goto ProcessNextCharAndFinish; // input buffer doesn't contain enough data to read a DWORD
1118                     }
1119                     else
1120                     {
1121                         // The input buffer at the current offset contains a non-ASCII char.
1122                         // Read an entire DWORD and fall through to non-ASCII consumption logic.
1123                         thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
1124                     }
1125                 }
1126
1127                 // At this point, we know the first char in the buffer is non-ASCII, but we haven't yet validated it.
1128
1129                 if (!IsFirstCharAtLeastThreeUtf8Bytes(thisDWord))
1130                 {
1131                 TryConsumeMultipleTwoByteSequences:
1132
1133                     // For certain text (Greek, Cyrillic, ...), 2-byte sequences tend to be clustered. We'll try transcoding them in
1134                     // a tight loop without falling back to the main loop.
1135
1136                     if (IsSecondCharTwoUtf8Bytes(thisDWord))
1137                     {
1138                         // We have two runs of two bytes each.
1139
1140                         if (outputBytesRemaining < 4)
1141                         {
1142                             goto ProcessOneCharFromCurrentDWordAndFinish; // running out of output buffer
1143                         }
1144
1145                         Unsafe.WriteUnaligned<uint>(pOutputBuffer, ExtractTwoUtf8TwoByteSequencesFromTwoPackedUtf16Chars(thisDWord));
1146
1147                         pInputBuffer += 2;
1148                         pOutputBuffer += 4;
1149                         outputBytesRemaining -= 4;
1150
1151                         if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
1152                         {
1153                             goto ProcessNextCharAndFinish; // Running out of data - go down slow path
1154                         }
1155                         else
1156                         {
1157                             // Optimization: If we read a long run of two-byte sequences, the next sequence is probably
1158                             // also two bytes. Check for that first before going back to the beginning of the loop.
1159
1160                             thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
1161
1162                             if (IsFirstCharTwoUtf8Bytes(thisDWord))
1163                             {
1164                                 // Validated we have a two-byte sequence coming up
1165                                 goto TryConsumeMultipleTwoByteSequences;
1166                             }
1167
1168                             // If we reached this point, the next sequence is something other than a valid
1169                             // two-byte sequence, so go back to the beginning of the loop.
1170                             goto AfterReadDWord;
1171                         }
1172                     }
1173
1174                     if (outputBytesRemaining < 2)
1175                     {
1176                         goto OutputBufferTooSmall;
1177                     }
1178
1179                     Unsafe.WriteUnaligned<ushort>(pOutputBuffer, (ushort)ExtractUtf8TwoByteSequenceFromFirstUtf16Char(thisDWord));
1180
1181                     // The buffer contains a 2-byte sequence followed by 2 bytes that aren't a 2-byte sequence.
1182                     // Unlikely that a 3-byte sequence would follow a 2-byte sequence, so perhaps remaining
1183                     // char is ASCII?
1184
1185                     if (IsSecondCharAscii(thisDWord))
1186                     {
1187                         if (outputBytesRemaining >= 3)
1188                         {
1189                             if (BitConverter.IsLittleEndian)
1190                             {
1191                                 thisDWord >>= 16;
1192                             }
1193                             pOutputBuffer[2] = (byte)thisDWord;
1194
1195                             pInputBuffer += 2;
1196                             pOutputBuffer += 3;
1197                             outputBytesRemaining -= 3;
1198
1199                             continue; // go back to original bounds check and check for ASCII
1200                         }
1201                         else
1202                         {
1203                             pInputBuffer++;
1204                             pOutputBuffer += 2;
1205                             goto OutputBufferTooSmall;
1206                         }
1207                     }
1208                     else
1209                     {
1210                         pInputBuffer++;
1211                         pOutputBuffer += 2;
1212                         outputBytesRemaining -= 2;
1213
1214                         if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
1215                         {
1216                             goto ProcessNextCharAndFinish; // Running out of data - go down slow path
1217                         }
1218                         else
1219                         {
1220                             thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
1221                             goto BeforeProcessThreeByteSequence; // we know the next byte isn't ASCII, and it's not the start of a 2-byte sequence (this was checked above)
1222                         }
1223                     }
1224                 }
1225
1226             // Check the 3-byte case.
1227
1228             BeforeProcessThreeByteSequence:
1229
1230                 if (!IsFirstCharSurrogate(thisDWord))
1231                 {
1232                     // Optimization: A three-byte character could indicate CJK text, which makes it likely
1233                     // that the character following this one is also CJK. We'll perform the check now
1234                     // rather than jumping to the beginning of the main loop.
1235
1236                     if (IsSecondCharAtLeastThreeUtf8Bytes(thisDWord))
1237                     {
1238                         if (!IsSecondCharSurrogate(thisDWord))
1239                         {
1240                             if (outputBytesRemaining < 6)
1241                             {
1242                                 goto ConsumeSingleThreeByteRun; // not enough space - try consuming as much as we can
1243                             }
1244
1245                             WriteTwoUtf16CharsAsTwoUtf8ThreeByteSequences(ref *pOutputBuffer, thisDWord);
1246
1247                             pInputBuffer += 2;
1248                             pOutputBuffer += 6;
1249                             outputBytesRemaining -= 6;
1250
1251                             // Try to remain in the 3-byte processing loop if at all possible.
1252
1253                             if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
1254                             {
1255                                 goto ProcessNextCharAndFinish; // Running out of data - go down slow path
1256                             }
1257                             else
1258                             {
1259                                 thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
1260
1261                                 if (IsFirstCharAtLeastThreeUtf8Bytes(thisDWord))
1262                                 {
1263                                     goto BeforeProcessThreeByteSequence;
1264                                 }
1265                                 else
1266                                 {
1267                                     // Fall back to standard processing loop since we don't know how to optimize this.
1268                                     goto AfterReadDWord;
1269                                 }
1270                             }
1271                         }
1272                     }
1273
1274                 ConsumeSingleThreeByteRun:
1275
1276                     if (outputBytesRemaining < 3)
1277                     {
1278                         goto OutputBufferTooSmall;
1279                     }
1280
1281                     WriteFirstUtf16CharAsUtf8ThreeByteSequence(ref *pOutputBuffer, thisDWord);
1282
1283                     pInputBuffer++;
1284                     pOutputBuffer += 3;
1285                     outputBytesRemaining -= 3;
1286
1287                     // Occasionally one-off ASCII characters like spaces, periods, or newlines will make their way
1288                     // in to the text. If this happens strip it off now before seeing if the next character
1289                     // consists of three code units.
1290
1291                     if (IsSecondCharAscii(thisDWord))
1292                     {
1293                         if (outputBytesRemaining == 0)
1294                         {
1295                             goto OutputBufferTooSmall;
1296                         }
1297
1298                         if (BitConverter.IsLittleEndian)
1299                         {
1300                             *pOutputBuffer = (byte)(thisDWord >> 16);
1301                         }
1302                         else
1303                         {
1304                             *pOutputBuffer = (byte)(thisDWord);
1305                         }
1306
1307                         pInputBuffer++;
1308                         pOutputBuffer++;
1309                         outputBytesRemaining--;
1310
1311                         if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
1312                         {
1313                             goto ProcessNextCharAndFinish; // Running out of data - go down slow path
1314                         }
1315                         else
1316                         {
1317                             thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
1318
1319                             if (IsFirstCharAtLeastThreeUtf8Bytes(thisDWord))
1320                             {
1321                                 goto BeforeProcessThreeByteSequence;
1322                             }
1323                             else
1324                             {
1325                                 // Fall back to standard processing loop since we don't know how to optimize this.
1326                                 goto AfterReadDWord;
1327                             }
1328                         }
1329                     }
1330
1331                     if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer)
1332                     {
1333                         goto ProcessNextCharAndFinish; // Running out of data - go down slow path
1334                     }
1335                     else
1336                     {
1337                         thisDWord = Unsafe.ReadUnaligned<uint>(pInputBuffer);
1338                         goto AfterReadDWordSkipAllCharsAsciiCheck; // we just checked above that this value isn't ASCII
1339                     }
1340                 }
1341
1342                 // Four byte sequence processing
1343
1344                 if (IsWellFormedUtf16SurrogatePair(thisDWord))
1345                 {
1346                     if (outputBytesRemaining < 4)
1347                     {
1348                         goto OutputBufferTooSmall;
1349                     }
1350
1351                     Unsafe.WriteUnaligned<uint>(pOutputBuffer, ExtractFourUtf8BytesFromSurrogatePair(thisDWord));
1352
1353                     pInputBuffer += 2;
1354                     pOutputBuffer += 4;
1355                     outputBytesRemaining -= 4;
1356
1357                     continue; // go back to beginning of loop for processing
1358                 }
1359
1360                 goto Error; // an ill-formed surrogate sequence: high not followed by low, or low not preceded by high
1361             }
1362
1363         ProcessNextCharAndFinish:
1364             inputLength = (int)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + CharsPerDWord;
1365
1366         ProcessInputOfLessThanDWordSize:
1367             Debug.Assert(inputLength < CharsPerDWord);
1368
1369             if (inputLength == 0)
1370             {
1371                 goto InputBufferFullyConsumed;
1372             }
1373
1374             uint thisChar = *pInputBuffer;
1375             goto ProcessFinalChar;
1376
1377         ProcessOneCharFromCurrentDWordAndFinish:
1378             if (BitConverter.IsLittleEndian)
1379             {
1380                 thisChar = thisDWord & 0xFFFFu; // preserve only the first char
1381             }
1382             else
1383             {
1384                 thisChar = thisDWord >> 16; // preserve only the first char
1385             }
1386
1387         ProcessFinalChar:
1388             {
1389                 if (thisChar <= 0x7Fu)
1390                 {
1391                     if (outputBytesRemaining == 0)
1392                     {
1393                         goto OutputBufferTooSmall; // we have no hope of writing anything to the output
1394                     }
1395
1396                     // 1-byte (ASCII) case
1397                     *pOutputBuffer = (byte)thisChar;
1398
1399                     pInputBuffer++;
1400                     pOutputBuffer++;
1401                 }
1402                 else if (thisChar < 0x0800u)
1403                 {
1404                     if (outputBytesRemaining < 2)
1405                     {
1406                         goto OutputBufferTooSmall; // we have no hope of writing anything to the output
1407                     }
1408
1409                     // 2-byte case
1410                     pOutputBuffer[1] = (byte)((thisChar & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10xxxxxx ]
1411                     pOutputBuffer[0] = (byte)((thisChar >> 6) | unchecked((uint)(sbyte)0xC0)); // [ 110yyyyy ]
1412
1413                     pInputBuffer++;
1414                     pOutputBuffer += 2;
1415                 }
1416                 else if (!UnicodeUtility.IsSurrogateCodePoint(thisChar))
1417                 {
1418                     if (outputBytesRemaining < 3)
1419                     {
1420                         goto OutputBufferTooSmall; // we have no hope of writing anything to the output
1421                     }
1422
1423                     // 3-byte case
1424                     pOutputBuffer[2] = (byte)((thisChar & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10xxxxxx ]
1425                     pOutputBuffer[1] = (byte)(((thisChar >> 6) & 0x3Fu) | unchecked((uint)(sbyte)0x80)); // [ 10yyyyyy ]
1426                     pOutputBuffer[0] = (byte)((thisChar >> 12) | unchecked((uint)(sbyte)0xE0)); // [ 1110zzzz ]
1427
1428                     pInputBuffer++;
1429                     pOutputBuffer += 3;
1430                 }
1431                 else if (thisChar <= 0xDBFFu)
1432                 {
1433                     // UTF-16 high surrogate code point with no trailing data, report incomplete input buffer
1434                     goto InputBufferTooSmall;
1435                 }
1436                 else
1437                 {
1438                     // UTF-16 low surrogate code point with no leading data, report error
1439                     goto Error;
1440                 }
1441             }
1442
1443             // There are two ways we can end up here. Either we were running low on input data,
1444             // or we were running low on space in the destination buffer. If we're running low on
1445             // input data (label targets ProcessInputOfLessThanDWordSize and ProcessNextCharAndFinish),
1446             // then the inputLength value is guaranteed to be between 0 and 1, and we should return Done.
1447             // If we're running low on destination buffer space (label target ProcessOneCharFromCurrentDWordAndFinish),
1448             // then we didn't modify inputLength since entering the main loop, which means it should
1449             // still have a value of >= 2. So checking the value of inputLength is all we need to do to determine
1450             // which of the two scenarios we're in.
1451
1452             if (inputLength > 1)
1453             {
1454                 goto OutputBufferTooSmall;
1455             }
1456
1457         InputBufferFullyConsumed:
1458             OperationStatus retVal = OperationStatus.Done;
1459             goto ReturnCommon;
1460
1461         InputBufferTooSmall:
1462             retVal = OperationStatus.NeedMoreData;
1463             goto ReturnCommon;
1464
1465         OutputBufferTooSmall:
1466             retVal = OperationStatus.DestinationTooSmall;
1467             goto ReturnCommon;
1468
1469         Error:
1470             retVal = OperationStatus.InvalidData;
1471             goto ReturnCommon;
1472
1473         ReturnCommon:
1474             pInputBufferRemaining = pInputBuffer;
1475             pOutputBufferRemaining = pOutputBuffer;
1476             return retVal;
1477         }
1478     }
1479 }