netcore/System.Private.CoreLib/shared/System/Text/UnicodeEncoding.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 //
   6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
   7 //
   8
   9 // This define can be used to turn off the fast loops. Useful for finding whether
  10 // the problem is fastloop-specific.
  11 #define FASTLOOP
  12
  13 using System;
  14 using System.Globalization;
  15 using System.Diagnostics;
  16 using System.Runtime.InteropServices;
  17
  18 using Internal.Runtime.CompilerServices;
  19
  20 namespace System.Text
  21 {
  22     public class UnicodeEncoding : Encoding
  23     {
  24         // Used by Encoding.BigEndianUnicode/Unicode for lazy initialization
  25         // The initialization code will not be run until a static member of the class is referenced
  26         internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true);
  27         internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true);
  28
  29         private readonly bool isThrowException = false;
  30
  31         private readonly bool bigEndian = false;
  32         private readonly bool byteOrderMark = false;
  33
  34         // Unicode version 2.0 character size in bytes
  35         public const int CharSize = 2;
  36
  37         public UnicodeEncoding()
  38             : this(false, true)
  39         {
  40         }
  41
  42
  43         public UnicodeEncoding(bool bigEndian, bool byteOrderMark)
  44             : base(bigEndian ? 1201 : 1200)  //Set the data item.
  45         {
  46             this.bigEndian = bigEndian;
  47             this.byteOrderMark = byteOrderMark;
  48         }
  49
  50
  51         public UnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
  52             : this(bigEndian, byteOrderMark)
  53         {
  54             this.isThrowException = throwOnInvalidBytes;
  55
  56             // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
  57             if (this.isThrowException)
  58                 SetDefaultFallbacks();
  59         }
  60
  61         internal sealed override void SetDefaultFallbacks()
  62         {
  63             // For UTF-X encodings, we use a replacement fallback with an empty string
  64             if (this.isThrowException)
  65             {
  66                 this.encoderFallback = EncoderFallback.ExceptionFallback;
  67                 this.decoderFallback = DecoderFallback.ExceptionFallback;
  68             }
  69             else
  70             {
  71                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
  72                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
  73             }
  74         }
  75
  76         // The following methods are copied from EncodingNLS.cs.
  77         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
  78         // These should be kept in sync for the following classes:
  79         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  80         //
  81
  82         // Returns the number of bytes required to encode a range of characters in
  83         // a character array.
  84         //
  85         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  86         // So if you fix this, fix the others.  Currently those include:
  87         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  88         // parent method is safe
  89
  90         public override unsafe int GetByteCount(char[] chars, int index, int count)
  91         {
  92             // Validate input parameters
  93             if (chars == null)
  94                 throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  95
  96             if (index < 0 || count < 0)
  97                 throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  98
  99             if (chars.Length - index < count)
 100                 throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
 101
 102             // If no input, return 0, avoid fixed empty array problem
 103             if (count == 0)
 104                 return 0;
 105
 106             // Just call the pointer version
 107             fixed (char* pChars = chars)
 108                 return GetByteCount(pChars + index, count, null);
 109         }
 110
 111         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 112         // So if you fix this, fix the others.  Currently those include:
 113         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 114         // parent method is safe
 115
 116         public override unsafe int GetByteCount(string s)
 117         {
 118             // Validate input
 119             if (s == null)
 120                 throw new ArgumentNullException(nameof(s));
 121
 122             fixed (char* pChars = s)
 123                 return GetByteCount(pChars, s.Length, null);
 124         }
 125
 126         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 127         // So if you fix this, fix the others.  Currently those include:
 128         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 129
 130         [CLSCompliant(false)]
 131         public override unsafe int GetByteCount(char* chars, int count)
 132         {
 133             // Validate Parameters
 134             if (chars == null)
 135                 throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
 136
 137             if (count < 0)
 138                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
 139
 140             // Call it with empty encoder
 141             return GetByteCount(chars, count, null);
 142         }
 143
 144         // Parent method is safe.
 145         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 146         // So if you fix this, fix the others.  Currently those include:
 147         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 148
 149         public override unsafe int GetBytes(string s, int charIndex, int charCount,
 150                                               byte[] bytes, int byteIndex)
 151         {
 152             if (s == null || bytes == null)
 153                 throw new ArgumentNullException(s == null ? nameof(s) : nameof(bytes), SR.ArgumentNull_Array);
 154
 155             if (charIndex < 0 || charCount < 0)
 156                 throw new ArgumentOutOfRangeException(charIndex < 0 ? nameof(charIndex) : nameof(charCount), SR.ArgumentOutOfRange_NeedNonNegNum);
 157
 158             if (s.Length - charIndex < charCount)
 159                 throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
 160
 161             if (byteIndex < 0 || byteIndex > bytes.Length)
 162                 throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
 163
 164             int byteCount = bytes.Length - byteIndex;
 165
 166             fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
 167                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
 168         }
 169
 170         // Encodes a range of characters in a character array into a range of bytes
 171         // in a byte array. An exception occurs if the byte array is not large
 172         // enough to hold the complete encoding of the characters. The
 173         // GetByteCount method can be used to determine the exact number of
 174         // bytes that will be produced for a given range of characters.
 175         // Alternatively, the GetMaxByteCount method can be used to
 176         // determine the maximum number of bytes that will be produced for a given
 177         // number of characters, regardless of the actual character values.
 178         //
 179         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 180         // So if you fix this, fix the others.  Currently those include:
 181         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 182         // parent method is safe
 183
 184         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
 185                                                byte[] bytes, int byteIndex)
 186         {
 187             // Validate parameters
 188             if (chars == null || bytes == null)
 189                 throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
 190
 191             if (charIndex < 0 || charCount < 0)
 192                 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 193
 194             if (chars.Length - charIndex < charCount)
 195                 throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
 196
 197             if (byteIndex < 0 || byteIndex > bytes.Length)
 198                 throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
 199
 200             // If nothing to encode return 0, avoid fixed problem
 201             if (charCount == 0)
 202                 return 0;
 203
 204             // Just call pointer version
 205             int byteCount = bytes.Length - byteIndex;
 206
 207             fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
 208                 // Remember that byteCount is # to decode, not size of array.
 209                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
 210         }
 211
 212         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 213         // So if you fix this, fix the others.  Currently those include:
 214         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 215
 216         [CLSCompliant(false)]
 217         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
 218         {
 219             // Validate Parameters
 220             if (bytes == null || chars == null)
 221                 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
 222
 223             if (charCount < 0 || byteCount < 0)
 224                 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 225
 226             return GetBytes(chars, charCount, bytes, byteCount, null);
 227         }
 228
 229         // Returns the number of characters produced by decoding a range of bytes
 230         // in a byte array.
 231         //
 232         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 233         // So if you fix this, fix the others.  Currently those include:
 234         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 235         // parent method is safe
 236
 237         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
 238         {
 239             // Validate Parameters
 240             if (bytes == null)
 241                 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
 242
 243             if (index < 0 || count < 0)
 244                 throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
 245
 246             if (bytes.Length - index < count)
 247                 throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
 248
 249             // If no input just return 0, fixed doesn't like 0 length arrays
 250             if (count == 0)
 251                 return 0;
 252
 253             // Just call pointer version
 254             fixed (byte* pBytes = bytes)
 255                 return GetCharCount(pBytes + index, count, null);
 256         }
 257
 258         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 259         // So if you fix this, fix the others.  Currently those include:
 260         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 261
 262         [CLSCompliant(false)]
 263         public override unsafe int GetCharCount(byte* bytes, int count)
 264         {
 265             // Validate Parameters
 266             if (bytes == null)
 267                 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
 268
 269             if (count < 0)
 270                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
 271
 272             return GetCharCount(bytes, count, null);
 273         }
 274
 275         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 276         // So if you fix this, fix the others.  Currently those include:
 277         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 278         // parent method is safe
 279
 280         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
 281                                               char[] chars, int charIndex)
 282         {
 283             // Validate Parameters
 284             if (bytes == null || chars == null)
 285                 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
 286
 287             if (byteIndex < 0 || byteCount < 0)
 288                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 289
 290             if ( bytes.Length - byteIndex < byteCount)
 291                 throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
 292
 293             if (charIndex < 0 || charIndex > chars.Length)
 294                 throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
 295
 296             // If no input, return 0 & avoid fixed problem
 297             if (byteCount == 0)
 298                 return 0;
 299
 300             // Just call pointer version
 301             int charCount = chars.Length - charIndex;
 302
 303             fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
 304                 // Remember that charCount is # to decode, not size of array
 305                 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
 306         }
 307
 308         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 309         // So if you fix this, fix the others.  Currently those include:
 310         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 311
 312         [CLSCompliant(false)]
 313         public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
 314         {
 315             // Validate Parameters
 316             if (bytes == null || chars == null)
 317                 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
 318
 319             if (charCount < 0 || byteCount < 0)
 320                 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
 321
 322             return GetChars(bytes, byteCount, chars, charCount, null);
 323         }
 324
 325         // Returns a string containing the decoded representation of a range of
 326         // bytes in a byte array.
 327         //
 328         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
 329         // So if you fix this, fix the others.  Currently those include:
 330         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
 331         // parent method is safe
 332
 333         public override unsafe string GetString(byte[] bytes, int index, int count)
 334         {
 335             // Validate Parameters
 336             if (bytes == null)
 337                 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
 338
 339             if (index < 0 || count < 0)
 340                 throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
 341
 342             if (bytes.Length - index < count)
 343                 throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
 344
 345             // Avoid problems with empty input buffer
 346             if (count == 0) return string.Empty;
 347
 348             fixed (byte* pBytes = bytes)
 349                 return string.CreateStringFromEncoding(
 350                     pBytes + index, count, this);
 351         }
 352
 353         //
 354         // End of standard methods copied from EncodingNLS.cs
 355         //
 356         internal sealed override unsafe int GetByteCount(char* chars, int count, EncoderNLS? encoder)
 357         {
 358             Debug.Assert(chars != null, "[UnicodeEncoding.GetByteCount]chars!=null");
 359             Debug.Assert(count >= 0, "[UnicodeEncoding.GetByteCount]count >=0");
 360
 361             // Start by assuming each char gets 2 bytes
 362             int byteCount = count << 1;
 363
 364             // Check for overflow in byteCount
 365             // (If they were all invalid chars, this would actually be wrong,
 366             // but that's a ridiculously large # so we're not concerned about that case)
 367             if (byteCount < 0)
 368                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
 369
 370             char* charStart = chars;
 371             char* charEnd = chars + count;
 372             char charLeftOver = (char)0;
 373
 374             bool wasHereBefore = false;
 375
 376             // For fallback we may need a fallback buffer
 377             EncoderFallbackBuffer? fallbackBuffer = null;
 378             char* charsForFallback;
 379
 380             if (encoder != null)
 381             {
 382                 charLeftOver = encoder._charLeftOver;
 383
 384                 // Assume extra bytes to encode charLeftOver if it existed
 385                 if (charLeftOver > 0)
 386                     byteCount += 2;
 387
 388                 // We mustn't have left over fallback data when counting
 389                 if (encoder.InternalHasFallbackBuffer)
 390                 {
 391                     fallbackBuffer = encoder.FallbackBuffer;
 392                     if (fallbackBuffer.Remaining > 0)
 393                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback?.GetType()));
 394
 395                     // Set our internal fallback interesting things.
 396                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 397                 }
 398             }
 399
 400             char ch;
 401         TryAgain:
 402
 403             while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
 404             {
 405                 // First unwind any fallback
 406                 if (ch == 0)
 407                 {
 408                     // No fallback, maybe we can do it fast
 409 #if FASTLOOP
 410                     // If endianess is backwards then each pair of bytes would be backwards.
 411                     if ( (bigEndian ^ BitConverter.IsLittleEndian) &&
 412 #if BIT64
 413                         (unchecked((long)chars) & 7) == 0 &&
 414 #else
 415                         (unchecked((int)chars) & 3) == 0 &&
 416 #endif
 417                         charLeftOver == 0)
 418                     {
 419                         // Need -1 to check 2 at a time.  If we have an even #, longChars will go
 420                         // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
 421                         // will go from longEnd - 1 long to longEnd. (Might not get to use this)
 422                         ulong* longEnd = (ulong*)(charEnd - 3);
 423
 424                         // Need new char* so we can check 4 at a time
 425                         ulong* longChars = (ulong*)chars;
 426
 427                         while (longChars < longEnd)
 428                         {
 429                             // See if we potentially have surrogates (0x8000 bit set)
 430                             // (We're either big endian on a big endian machine or little endian on
 431                             // a little endian machine so that'll work)
 432                             if ((0x8000800080008000 & *longChars) != 0)
 433                             {
 434                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
 435                                 // 5 bits looks like 11011, then its a high or low surrogate.
 436                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
 437                                 // Note that we expect BMP characters to be more common than surrogates
 438                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
 439                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
 440
 441                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
 442                                 // but no clue if they're high or low.
 443                                 // If each of the 4 characters are non-zero, then none are surrogates.
 444                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
 445                                     (uTemp & 0x0000FFFF00000000) == 0 ||
 446                                     (uTemp & 0x00000000FFFF0000) == 0 ||
 447                                     (uTemp & 0x000000000000FFFF) == 0)
 448                                 {
 449                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
 450                                     // or if there's 1 or 4 surrogates
 451
 452                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
 453                                     // bit to see if its set (low) or not (high) in the right pattern
 454                                     if ((0xfc00fc00fc00fc00 & *longChars) !=
 455                                             (BitConverter.IsLittleEndian ? (ulong)0xdc00d800dc00d800 : (ulong)0xd800dc00d800dc00))
 456                                     {
 457                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
 458                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
 459
 460                                         // Drop out to the slow loop to resolve the surrogates
 461                                         break;
 462                                     }
 463                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
 464                                 }
 465                                 // else none are surrogates, so we can use them.
 466                             }
 467                             // else all < 0x8000 so we can use them
 468
 469                             // We already counted these four chars, go to next long.
 470                             longChars++;
 471                         }
 472
 473                         chars = (char*)longChars;
 474
 475                         if (chars >= charEnd)
 476                             break;
 477                     }
 478 #endif // FASTLOOP
 479
 480                     // No fallback, just get next char
 481                     ch = *chars;
 482                     chars++;
 483                 }
 484                 else
 485                 {
 486                     // We weren't preallocating fallback space.
 487                     byteCount += 2;
 488                 }
 489
 490                 // Check for high or low surrogates
 491                 if (ch >= 0xd800 && ch <= 0xdfff)
 492                 {
 493                     // Was it a high surrogate?
 494                     if (ch <= 0xdbff)
 495                     {
 496                         // Its a high surrogate, if we already had a high surrogate do its fallback
 497                         if (charLeftOver > 0)
 498                         {
 499                             // Unwind the current character, this should be safe because we
 500                             // don't have leftover data in the fallback, so chars must have
 501                             // advanced already.
 502                             Debug.Assert(chars > charStart,
 503                                 "[UnicodeEncoding.GetByteCount]Expected chars to have advanced in unexpected high surrogate");
 504                             chars--;
 505
 506                             // If previous high surrogate deallocate 2 bytes
 507                             byteCount -= 2;
 508
 509                             // Fallback the previous surrogate
 510                             // Need to initialize fallback buffer?
 511                             if (fallbackBuffer == null)
 512                             {
 513                                 if (encoder == null)
 514                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 515                                 else
 516                                     fallbackBuffer = encoder.FallbackBuffer;
 517
 518                                 // Set our internal fallback interesting things.
 519                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 520                             }
 521
 522                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
 523                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 524                             chars = charsForFallback;
 525
 526                             // Now no high surrogate left over
 527                             charLeftOver = (char)0;
 528                             continue;
 529                         }
 530
 531                         // Remember this high surrogate
 532                         charLeftOver = ch;
 533                         continue;
 534                     }
 535
 536
 537                     // Its a low surrogate
 538                     if (charLeftOver == 0)
 539                     {
 540                         // Expected a previous high surrogate.
 541                         // Don't count this one (we'll count its fallback if necessary)
 542                         byteCount -= 2;
 543
 544                         // fallback this one
 545                         // Need to initialize fallback buffer?
 546                         if (fallbackBuffer == null)
 547                         {
 548                             if (encoder == null)
 549                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 550                             else
 551                                 fallbackBuffer = encoder.FallbackBuffer;
 552
 553                             // Set our internal fallback interesting things.
 554                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 555                         }
 556                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 557                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
 558                         chars = charsForFallback;
 559                         continue;
 560                     }
 561
 562                     // Valid surrogate pair, add our charLeftOver
 563                     charLeftOver = (char)0;
 564                     continue;
 565                 }
 566                 else if (charLeftOver > 0)
 567                 {
 568                     // Expected a low surrogate, but this char is normal
 569
 570                     // Rewind the current character, fallback previous character.
 571                     // this should be safe because we don't have leftover data in the
 572                     // fallback, so chars must have advanced already.
 573                     Debug.Assert(chars > charStart,
 574                         "[UnicodeEncoding.GetByteCount]Expected chars to have advanced when expected low surrogate");
 575                     chars--;
 576
 577                     // fallback previous chars
 578                     // Need to initialize fallback buffer?
 579                     if (fallbackBuffer == null)
 580                     {
 581                         if (encoder == null)
 582                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 583                         else
 584                             fallbackBuffer = encoder.FallbackBuffer;
 585
 586                         // Set our internal fallback interesting things.
 587                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 588                     }
 589                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 590                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 591                     chars = charsForFallback;
 592
 593                     // Ignore charLeftOver or throw
 594                     byteCount -= 2;
 595                     charLeftOver = (char)0;
 596
 597                     continue;
 598                 }
 599
 600                 // Ok we had something to add (already counted)
 601             }
 602
 603             // Don't allocate space for left over char
 604             if (charLeftOver > 0)
 605             {
 606                 byteCount -= 2;
 607
 608                 // If we have to flush, stick it in fallback and try again
 609                 if (encoder == null || encoder.MustFlush)
 610                 {
 611                     if (wasHereBefore)
 612                     {
 613                         // Throw it, using our complete character
 614                         throw new ArgumentException(
 615                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
 616                     }
 617                     else
 618                     {
 619                         // Need to initialize fallback buffer?
 620                         if (fallbackBuffer == null)
 621                         {
 622                             if (encoder == null)
 623                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 624                             else
 625                                 fallbackBuffer = encoder.FallbackBuffer;
 626
 627                             // Set our internal fallback interesting things.
 628                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 629                         }
 630                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 631                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 632                         chars = charsForFallback;
 633                         charLeftOver = (char)0;
 634                         wasHereBefore = true;
 635                         goto TryAgain;
 636                     }
 637                 }
 638             }
 639
 640             // Shouldn't have anything in fallback buffer for GetByteCount
 641             // (don't have to check _throwOnOverflow for count)
 642             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
 643                 "[UnicodeEncoding.GetByteCount]Expected empty fallback buffer at end");
 644
 645             // Don't remember fallbackBuffer.encoder for counting
 646             return byteCount;
 647         }
 648
 649         internal sealed override unsafe int GetBytes(
 650             char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS? encoder)
 651         {
 652             Debug.Assert(chars != null, "[UnicodeEncoding.GetBytes]chars!=null");
 653             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetBytes]byteCount >=0");
 654             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetBytes]charCount >=0");
 655             Debug.Assert(bytes != null, "[UnicodeEncoding.GetBytes]bytes!=null");
 656
 657             char charLeftOver = (char)0;
 658             char ch;
 659             bool wasHereBefore = false;
 660
 661
 662             byte* byteEnd = bytes + byteCount;
 663             char* charEnd = chars + charCount;
 664             byte* byteStart = bytes;
 665             char* charStart = chars;
 666
 667             // For fallback we may need a fallback buffer
 668             EncoderFallbackBuffer? fallbackBuffer = null;
 669             char* charsForFallback;
 670
 671             // Get our encoder, but don't clear it yet.
 672             if (encoder != null)
 673             {
 674                 charLeftOver = encoder._charLeftOver;
 675
 676                 // We mustn't have left over fallback data when counting
 677                 if (encoder.InternalHasFallbackBuffer)
 678                 {
 679                     // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
 680                     fallbackBuffer = encoder.FallbackBuffer;
 681                     if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
 682                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback?.GetType()));
 683
 684                     // Set our internal fallback interesting things.
 685                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
 686                 }
 687             }
 688
 689         TryAgain:
 690             while (((ch = (fallbackBuffer == null) ?
 691                         (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) ||
 692                     chars < charEnd)
 693             {
 694                 // First unwind any fallback
 695                 if (ch == 0)
 696                 {
 697                     // No fallback, maybe we can do it fast
 698 #if FASTLOOP
 699                     // If endianess is backwards then each pair of bytes would be backwards.
 700                     if ( (bigEndian ^ BitConverter.IsLittleEndian) &&
 701 #if BIT64
 702                         (unchecked((long)chars) & 7) == 0 &&
 703 #else
 704                         (unchecked((int)chars) & 3) == 0 &&
 705 #endif
 706                         charLeftOver == 0)
 707                     {
 708                         // Need -1 to check 2 at a time.  If we have an even #, longChars will go
 709                         // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
 710                         // will go from longEnd - 1 long to longEnd. (Might not get to use this)
 711                         // We can only go iCount units (limited by shorter of char or byte buffers.
 712                         ulong* longEnd = (ulong*)(chars - 3 +
 713                                                   (((byteEnd - bytes) >> 1 < charEnd - chars) ?
 714                                                     (byteEnd - bytes) >> 1 : charEnd - chars));
 715
 716                         // Need new char* so we can check 4 at a time
 717                         ulong* longChars = (ulong*)chars;
 718                         ulong* longBytes = (ulong*)bytes;
 719
 720                         while (longChars < longEnd)
 721                         {
 722                             // See if we potentially have surrogates (0x8000 bit set)
 723                             // (We're either big endian on a big endian machine or little endian on
 724                             // a little endian machine so that'll work)
 725                             if ((0x8000800080008000 & *longChars) != 0)
 726                             {
 727                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
 728                                 // 5 bits looks like 11011, then its a high or low surrogate.
 729                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
 730                                 // Note that we expect BMP characters to be more common than surrogates
 731                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
 732                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
 733
 734                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
 735                                 // but no clue if they're high or low.
 736                                 // If each of the 4 characters are non-zero, then none are surrogates.
 737                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
 738                                     (uTemp & 0x0000FFFF00000000) == 0 ||
 739                                     (uTemp & 0x00000000FFFF0000) == 0 ||
 740                                     (uTemp & 0x000000000000FFFF) == 0)
 741                                 {
 742                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
 743                                     // or if there's 1 or 4 surrogates
 744
 745                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
 746                                     // bit to see if its set (low) or not (high) in the right pattern
 747                                     if ((0xfc00fc00fc00fc00 & *longChars) !=
 748                                             (BitConverter.IsLittleEndian ? (ulong)0xdc00d800dc00d800 : (ulong)0xd800dc00d800dc00))
 749                                     {
 750                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
 751                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
 752
 753                                         // Drop out to the slow loop to resolve the surrogates
 754                                         break;
 755                                     }
 756                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
 757                                 }
 758                                 // else none are surrogates, so we can use them.
 759                             }
 760                             // else all < 0x8000 so we can use them
 761
 762                             // We can use these 4 chars.
 763                             Unsafe.WriteUnaligned<ulong>(longBytes, *longChars);
 764                             longChars++;
 765                             longBytes++;
 766                         }
 767
 768                         chars = (char*)longChars;
 769                         bytes = (byte*)longBytes;
 770
 771                         if (chars >= charEnd)
 772                             break;
 773                     }
 774 #endif // FASTLOOP
 775
 776                     // No fallback, just get next char
 777                     ch = *chars;
 778                     chars++;
 779                 }
 780
 781                 // Check for high or low surrogates
 782                 if (ch >= 0xd800 && ch <= 0xdfff)
 783                 {
 784                     // Was it a high surrogate?
 785                     if (ch <= 0xdbff)
 786                     {
 787                         // Its a high surrogate, see if we already had a high surrogate
 788                         if (charLeftOver > 0)
 789                         {
 790                             // Unwind the current character, this should be safe because we
 791                             // don't have leftover data in the fallback, so chars must have
 792                             // advanced already.
 793                             Debug.Assert(chars > charStart,
 794                                 "[UnicodeEncoding.GetBytes]Expected chars to have advanced in unexpected high surrogate");
 795                             chars--;
 796
 797                             // Fallback the previous surrogate
 798                             // Might need to create our fallback buffer
 799                             if (fallbackBuffer == null)
 800                             {
 801                                 if (encoder == null)
 802                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 803                                 else
 804                                     fallbackBuffer = encoder.FallbackBuffer;
 805
 806                                 // Set our internal fallback interesting things.
 807                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 808                             }
 809
 810                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 811                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 812                             chars = charsForFallback;
 813
 814                             charLeftOver = (char)0;
 815                             continue;
 816                         }
 817
 818                         // Remember this high surrogate
 819                         charLeftOver = ch;
 820                         continue;
 821                     }
 822
 823                     // Its a low surrogate
 824                     if (charLeftOver == 0)
 825                     {
 826                         // We'll fall back this one
 827                         // Might need to create our fallback buffer
 828                         if (fallbackBuffer == null)
 829                         {
 830                             if (encoder == null)
 831                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 832                             else
 833                                 fallbackBuffer = encoder.FallbackBuffer;
 834
 835                             // Set our internal fallback interesting things.
 836                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 837                         }
 838
 839                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 840                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
 841                         chars = charsForFallback;
 842                         continue;
 843                     }
 844
 845                     // Valid surrogate pair, add our charLeftOver
 846                     if (bytes + 3 >= byteEnd)
 847                     {
 848                         // Not enough room to add this surrogate pair
 849                         if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
 850                         {
 851                             // These must have both been from the fallbacks.
 852                             // Both of these MUST have been from a fallback because if the 1st wasn't
 853                             // from a fallback, then a high surrogate followed by an illegal char
 854                             // would've caused the high surrogate to fall back.  If a high surrogate
 855                             // fell back, then it was consumed and both chars came from the fallback.
 856                             fallbackBuffer.MovePrevious();                     // Didn't use either fallback surrogate
 857                             fallbackBuffer.MovePrevious();
 858                         }
 859                         else
 860                         {
 861                             // If we don't have enough room, then either we should've advanced a while
 862                             // or we should have bytes==byteStart and throw below
 863                             Debug.Assert(chars > charStart + 1 || bytes == byteStart,
 864                                 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
 865                             chars -= 2;                                        // Didn't use either surrogate
 866                         }
 867                         ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
 868                         charLeftOver = (char)0;                             // we'll retry it later
 869                         break;                                               // Didn't throw, but stop 'til next time.
 870                     }
 871
 872                     if (bigEndian)
 873                     {
 874                         *(bytes++) = (byte)(charLeftOver >> 8);
 875                         *(bytes++) = (byte)charLeftOver;
 876                     }
 877                     else
 878                     {
 879                         *(bytes++) = (byte)charLeftOver;
 880                         *(bytes++) = (byte)(charLeftOver >> 8);
 881                     }
 882
 883                     charLeftOver = (char)0;
 884                 }
 885                 else if (charLeftOver > 0)
 886                 {
 887                     // Expected a low surrogate, but this char is normal
 888
 889                     // Rewind the current character, fallback previous character.
 890                     // this should be safe because we don't have leftover data in the
 891                     // fallback, so chars must have advanced already.
 892                     Debug.Assert(chars > charStart,
 893                         "[UnicodeEncoding.GetBytes]Expected chars to have advanced after expecting low surrogate");
 894                     chars--;
 895
 896                     // fallback previous chars
 897                     // Might need to create our fallback buffer
 898                     if (fallbackBuffer == null)
 899                     {
 900                         if (encoder == null)
 901                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 902                         else
 903                             fallbackBuffer = encoder.FallbackBuffer;
 904
 905                         // Set our internal fallback interesting things.
 906                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 907                     }
 908
 909                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 910                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 911                     chars = charsForFallback;
 912
 913                     // Ignore charLeftOver or throw
 914                     charLeftOver = (char)0;
 915                     continue;
 916                 }
 917
 918                 // Ok, we have a char to add
 919                 if (bytes + 1 >= byteEnd)
 920                 {
 921                     // Couldn't add this char
 922                     if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
 923                         fallbackBuffer.MovePrevious();                     // Not using this fallback char
 924                     else
 925                     {
 926                         // Lonely charLeftOver (from previous call) would've been caught up above,
 927                         // so this must be a case where we've already read an input char.
 928                         Debug.Assert(chars > charStart,
 929                             "[UnicodeEncoding.GetBytes]Expected chars to have advanced for failed fallback");
 930                         chars--;                                         // Not using this char
 931                     }
 932                     ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
 933                     break;                                               // didn't throw, just stop
 934                 }
 935
 936                 if (bigEndian)
 937                 {
 938                     *(bytes++) = (byte)(ch >> 8);
 939                     *(bytes++) = (byte)ch;
 940                 }
 941                 else
 942                 {
 943                     *(bytes++) = (byte)ch;
 944                     *(bytes++) = (byte)(ch >> 8);
 945                 }
 946             }
 947
 948             // Don't allocate space for left over char
 949             if (charLeftOver > 0)
 950             {
 951                 // If we aren't flushing we need to fall this back
 952                 if (encoder == null || encoder.MustFlush)
 953                 {
 954                     if (wasHereBefore)
 955                     {
 956                         // Throw it, using our complete character
 957                         throw new ArgumentException(
 958                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
 959                     }
 960                     else
 961                     {
 962                         // If we have to flush, stick it in fallback and try again
 963                         // Might need to create our fallback buffer
 964                         if (fallbackBuffer == null)
 965                         {
 966                             if (encoder == null)
 967                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
 968                             else
 969                                 fallbackBuffer = encoder.FallbackBuffer;
 970
 971                             // Set our internal fallback interesting things.
 972                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
 973                         }
 974
 975                         // If we're not flushing, that'll remember the left over character.
 976                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
 977                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
 978                         chars = charsForFallback;
 979
 980                         charLeftOver = (char)0;
 981                         wasHereBefore = true;
 982                         goto TryAgain;
 983                     }
 984                 }
 985             }
 986
 987             // Not flushing, remember it in the encoder
 988             if (encoder != null)
 989             {
 990                 encoder._charLeftOver = charLeftOver;
 991                 encoder._charsUsed = (int)(chars - charStart);
 992             }
 993
 994             // Remember charLeftOver if we must, or clear it if we're flushing
 995             // (charLeftOver should be 0 if we're flushing)
 996             Debug.Assert((encoder != null && !encoder.MustFlush) || charLeftOver == (char)0,
 997                 "[UnicodeEncoding.GetBytes] Expected no left over characters if flushing");
 998
 999             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
1000                 encoder == null || !encoder._throwOnOverflow,
1001                 "[UnicodeEncoding.GetBytes]Expected empty fallback buffer if not converting");
1002
1003             return (int)(bytes - byteStart);
1004         }
1005
1006         internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS? baseDecoder)
1007         {
1008             Debug.Assert(bytes != null, "[UnicodeEncoding.GetCharCount]bytes!=null");
1009             Debug.Assert(count >= 0, "[UnicodeEncoding.GetCharCount]count >=0");
1010
1011             UnicodeEncoding.Decoder? decoder = (UnicodeEncoding.Decoder?)baseDecoder;
1012
1013             byte* byteEnd = bytes + count;
1014             byte* byteStart = bytes;
1015
1016             // Need last vars
1017             int lastByte = -1;
1018             char lastChar = (char)0;
1019
1020             // Start by assuming same # of chars as bytes
1021             int charCount = count >> 1;
1022
1023             // For fallback we may need a fallback buffer
1024             DecoderFallbackBuffer? fallbackBuffer = null;
1025
1026             if (decoder != null)
1027             {
1028                 lastByte = decoder.lastByte;
1029                 lastChar = decoder.lastChar;
1030
1031                 // Assume extra char if last char was around
1032                 if (lastChar > 0)
1033                     charCount++;
1034
1035                 // Assume extra char if extra last byte makes up odd # of input bytes
1036                 if (lastByte >= 0 && (count & 1) == 1)
1037                 {
1038                     charCount++;
1039                 }
1040
1041                 // Shouldn't have anything in fallback buffer for GetCharCount
1042                 // (don't have to check _throwOnOverflow for count)
1043                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1044                     "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at start");
1045             }
1046
1047             while (bytes < byteEnd)
1048             {
1049                 // If we're aligned then maybe we can do it fast
1050                 // That'll hurt if we're unaligned because we'll always test but never be aligned
1051 #if FASTLOOP
1052                 if ((bigEndian ^ BitConverter.IsLittleEndian) &&
1053 #if BIT64
1054                     (unchecked((long)bytes) & 7) == 0 &&
1055 #else
1056                     (unchecked((int)bytes) & 3) == 0 &&
1057 #endif // BIT64
1058                     lastByte == -1 && lastChar == 0)
1059                 {
1060                     // Need -1 to check 2 at a time.  If we have an even #, longBytes will go
1061                     // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longBytes
1062                     // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1063                     ulong* longEnd = (ulong*)(byteEnd - 7);
1064
1065                     // Need new char* so we can check 4 at a time
1066                     ulong* longBytes = (ulong*)bytes;
1067
1068                     while (longBytes < longEnd)
1069                     {
1070                         // See if we potentially have surrogates (0x8000 bit set)
1071                         // (We're either big endian on a big endian machine or little endian on
1072                         // a little endian machine so that'll work)
1073                         if ((0x8000800080008000 & *longBytes) != 0)
1074                         {
1075                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1076                             // 5 bits looks like 11011, then its a high or low surrogate.
1077                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1078                             // Note that we expect BMP characters to be more common than surrogates
1079                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1080                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1081
1082                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1083                             // but no clue if they're high or low.
1084                             // If each of the 4 characters are non-zero, then none are surrogates.
1085                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1086                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1087                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1088                                 (uTemp & 0x000000000000FFFF) == 0)
1089                             {
1090                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1091                                 // or if there's 1 or 4 surrogates
1092
1093                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1094                                 // bit to see if its set (low) or not (high) in the right pattern
1095                                 if ((0xfc00fc00fc00fc00 & *longBytes) !=
1096                                         (BitConverter.IsLittleEndian ? (ulong)0xdc00d800dc00d800 : (ulong)0xd800dc00d800dc00))
1097                                 {
1098                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1099                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1100
1101                                     // Drop out to the slow loop to resolve the surrogates
1102                                     break;
1103                                 }
1104                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1105                             }
1106                             // else none are surrogates, so we can use them.
1107                         }
1108                         // else all < 0x8000 so we can use them
1109
1110                         // We can use these 4 chars.
1111                         longBytes++;
1112                     }
1113
1114                     bytes = (byte*)longBytes;
1115
1116                     if (bytes >= byteEnd)
1117                         break;
1118                 }
1119 #endif // FASTLOOP
1120
1121                 // Get 1st byte
1122                 if (lastByte < 0)
1123                 {
1124                     lastByte = *bytes++;
1125                     if (bytes >= byteEnd) break;
1126                 }
1127
1128                 // Get full char
1129                 char ch;
1130                 if (bigEndian)
1131                 {
1132                     ch = (char)(lastByte << 8 | *(bytes++));
1133                 }
1134                 else
1135                 {
1136                     ch = (char)(*(bytes++) << 8 | lastByte);
1137                 }
1138                 lastByte = -1;
1139
1140                 // See if the char's valid
1141                 if (ch >= 0xd800 && ch <= 0xdfff)
1142                 {
1143                     // Was it a high surrogate?
1144                     if (ch <= 0xdbff)
1145                     {
1146                         // Its a high surrogate, if we had one then do fallback for previous one
1147                         if (lastChar > 0)
1148                         {
1149                             // Ignore previous bad high surrogate
1150                             charCount--;
1151
1152                             // Get fallback for previous high surrogate
1153                             // Note we have to reconstruct bytes because some may have been in decoder
1154                             byte[]? byteBuffer = null;
1155                             if (bigEndian)
1156                             {
1157                                 byteBuffer = new byte[]
1158                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1159                             }
1160                             else
1161                             {
1162                                 byteBuffer = new byte[]
1163                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1164                             }
1165
1166                             if (fallbackBuffer == null)
1167                             {
1168                                 if (decoder == null)
1169                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1170                                 else
1171                                     fallbackBuffer = decoder.FallbackBuffer;
1172
1173                                 // Set our internal fallback interesting things.
1174                                 fallbackBuffer.InternalInitialize(byteStart, null);
1175                             }
1176
1177                             // Get fallback.
1178                             charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1179                         }
1180
1181                         // Ignore the last one which fell back already,
1182                         // and remember the new high surrogate
1183                         lastChar = ch;
1184                         continue;
1185                     }
1186
1187                     // Its a low surrogate
1188                     if (lastChar == 0)
1189                     {
1190                         // Expected a previous high surrogate
1191                         charCount--;
1192
1193                         // Get fallback for this low surrogate
1194                         // Note we have to reconstruct bytes because some may have been in decoder
1195                         byte[]? byteBuffer = null;
1196                         if (bigEndian)
1197                         {
1198                             byteBuffer = new byte[]
1199                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1200                         }
1201                         else
1202                         {
1203                             byteBuffer = new byte[]
1204                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1205                         }
1206
1207                         if (fallbackBuffer == null)
1208                         {
1209                             if (decoder == null)
1210                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1211                             else
1212                                 fallbackBuffer = decoder.FallbackBuffer;
1213
1214                             // Set our internal fallback interesting things.
1215                             fallbackBuffer.InternalInitialize(byteStart, null);
1216                         }
1217
1218                         charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1219
1220                         // Ignore this one (we already did its fallback)
1221                         continue;
1222                     }
1223
1224                     // Valid surrogate pair, already counted.
1225                     lastChar = (char)0;
1226                 }
1227                 else if (lastChar > 0)
1228                 {
1229                     // Had a high surrogate, expected a low surrogate
1230                     // Un-count the last high surrogate
1231                     charCount--;
1232
1233                     // fall back the high surrogate.
1234                     byte[]? byteBuffer = null;
1235                     if (bigEndian)
1236                     {
1237                         byteBuffer = new byte[]
1238                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1239                     }
1240                     else
1241                     {
1242                         byteBuffer = new byte[]
1243                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1244                     }
1245
1246                     if (fallbackBuffer == null)
1247                     {
1248                         if (decoder == null)
1249                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1250                         else
1251                             fallbackBuffer = decoder.FallbackBuffer;
1252
1253                         // Set our internal fallback interesting things.
1254                         fallbackBuffer.InternalInitialize(byteStart, null);
1255                     }
1256
1257                     // Already subtracted high surrogate
1258                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1259
1260                     // Not left over now, clear previous high surrogate and continue to add current char
1261                     lastChar = (char)0;
1262                 }
1263
1264                 // Valid char, already counted
1265             }
1266
1267             // Extra space if we can't use decoder
1268             if (decoder == null || decoder.MustFlush)
1269             {
1270                 if (lastChar > 0)
1271                 {
1272                     // No hanging high surrogates allowed, do fallback and remove count for it
1273                     charCount--;
1274                     byte[]? byteBuffer = null;
1275                     if (bigEndian)
1276                     {
1277                         byteBuffer = new byte[]
1278                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1279                     }
1280                     else
1281                     {
1282                         byteBuffer = new byte[]
1283                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1284                     }
1285
1286                     if (fallbackBuffer == null)
1287                     {
1288                         if (decoder == null)
1289                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1290                         else
1291                             fallbackBuffer = decoder.FallbackBuffer;
1292
1293                         // Set our internal fallback interesting things.
1294                         fallbackBuffer.InternalInitialize(byteStart, null);
1295                     }
1296
1297                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1298
1299                     lastChar = (char)0;
1300                 }
1301
1302                 if (lastByte >= 0)
1303                 {
1304                     if (fallbackBuffer == null)
1305                     {
1306                         if (decoder == null)
1307                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1308                         else
1309                             fallbackBuffer = decoder.FallbackBuffer;
1310
1311                         // Set our internal fallback interesting things.
1312                         fallbackBuffer.InternalInitialize(byteStart, null);
1313                     }
1314
1315                     // No hanging odd bytes allowed if must flush
1316                     charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes);
1317                     lastByte = -1;
1318                 }
1319             }
1320
1321             // If we had a high surrogate left over, we can't count it
1322             if (lastChar > 0)
1323                 charCount--;
1324
1325             // Shouldn't have anything in fallback buffer for GetCharCount
1326             // (don't have to check _throwOnOverflow for count)
1327             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1328                 "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at end");
1329
1330             return charCount;
1331         }
1332
1333         internal sealed override unsafe int GetChars(
1334             byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS? baseDecoder)
1335         {
1336             Debug.Assert(chars != null, "[UnicodeEncoding.GetChars]chars!=null");
1337             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetChars]byteCount >=0");
1338             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetChars]charCount >=0");
1339             Debug.Assert(bytes != null, "[UnicodeEncoding.GetChars]bytes!=null");
1340
1341             UnicodeEncoding.Decoder? decoder = (UnicodeEncoding.Decoder?)baseDecoder;
1342
1343             // Need last vars
1344             int lastByte = -1;
1345             char lastChar = (char)0;
1346
1347             // Get our decoder (but don't clear it yet)
1348             if (decoder != null)
1349             {
1350                 lastByte = decoder.lastByte;
1351                 lastChar = decoder.lastChar;
1352
1353                 // Shouldn't have anything in fallback buffer for GetChars
1354                 // (don't have to check _throwOnOverflow for chars)
1355                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1356                     "[UnicodeEncoding.GetChars]Expected empty fallback buffer at start");
1357             }
1358
1359             // For fallback we may need a fallback buffer
1360             DecoderFallbackBuffer? fallbackBuffer = null;
1361             char* charsForFallback;
1362
1363             byte* byteEnd = bytes + byteCount;
1364             char* charEnd = chars + charCount;
1365             byte* byteStart = bytes;
1366             char* charStart = chars;
1367
1368             while (bytes < byteEnd)
1369             {
1370                 // If we're aligned then maybe we can do it fast
1371                 // That'll hurt if we're unaligned because we'll always test but never be aligned
1372 #if FASTLOOP
1373                 if ((bigEndian ^ BitConverter.IsLittleEndian) &&
1374 #if BIT64
1375                     (unchecked((long)chars) & 7) == 0 &&
1376 #else
1377                     (unchecked((int)chars) & 3) == 0 &&
1378 #endif
1379                     lastByte == -1 && lastChar == 0)
1380                 {
1381                     // Need -1 to check 2 at a time.  If we have an even #, longChars will go
1382                     // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
1383                     // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1384                     // We can only go iCount units (limited by shorter of char or byte buffers.
1385                     ulong* longEnd = (ulong*)(bytes - 7 +
1386                                                 (((byteEnd - bytes) >> 1 < charEnd - chars) ?
1387                                                   (byteEnd - bytes) : (charEnd - chars) << 1));
1388
1389                     // Need new char* so we can check 4 at a time
1390                     ulong* longBytes = (ulong*)bytes;
1391                     ulong* longChars = (ulong*)chars;
1392
1393                     while (longBytes < longEnd)
1394                     {
1395                         // See if we potentially have surrogates (0x8000 bit set)
1396                         // (We're either big endian on a big endian machine or little endian on
1397                         // a little endian machine so that'll work)
1398                         if ((0x8000800080008000 & *longBytes) != 0)
1399                         {
1400                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1401                             // 5 bits looks like 11011, then its a high or low surrogate.
1402                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1403                             // Note that we expect BMP characters to be more common than surrogates
1404                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1405                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1406
1407                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1408                             // but no clue if they're high or low.
1409                             // If each of the 4 characters are non-zero, then none are surrogates.
1410                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1411                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1412                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1413                                 (uTemp & 0x000000000000FFFF) == 0)
1414                             {
1415                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1416                                 // or if there's 1 or 4 surrogates
1417
1418                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1419                                 // bit to see if its set (low) or not (high) in the right pattern
1420                                 if ((0xfc00fc00fc00fc00 & *longBytes) !=
1421                                         (BitConverter.IsLittleEndian ? (ulong)0xdc00d800dc00d800 : (ulong)0xd800dc00d800dc00))
1422                                 {
1423                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1424                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1425
1426                                     // Drop out to the slow loop to resolve the surrogates
1427                                     break;
1428                                 }
1429                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1430                             }
1431                             // else none are surrogates, so we can use them.
1432                         }
1433                         // else all < 0x8000 so we can use them
1434
1435                         // We can use these 4 chars.
1436                         Unsafe.WriteUnaligned<ulong>(longChars, *longBytes);
1437                         longBytes++;
1438                         longChars++;
1439                     }
1440
1441                     chars = (char*)longChars;
1442                     bytes = (byte*)longBytes;
1443
1444                     if (bytes >= byteEnd)
1445                         break;
1446                 }
1447 #endif // FASTLOOP
1448
1449                 // Get 1st byte
1450                 if (lastByte < 0)
1451                 {
1452                     lastByte = *bytes++;
1453                     continue;
1454                 }
1455
1456                 // Get full char
1457                 char ch;
1458                 if (bigEndian)
1459                 {
1460                     ch = (char)(lastByte << 8 | *(bytes++));
1461                 }
1462                 else
1463                 {
1464                     ch = (char)(*(bytes++) << 8 | lastByte);
1465                 }
1466                 lastByte = -1;
1467
1468                 // See if the char's valid
1469                 if (ch >= 0xd800 && ch <= 0xdfff)
1470                 {
1471                     // Was it a high surrogate?
1472                     if (ch <= 0xdbff)
1473                     {
1474                         // Its a high surrogate, if we had one then do fallback for previous one
1475                         if (lastChar > 0)
1476                         {
1477                             // Get fallback for previous high surrogate
1478                             // Note we have to reconstruct bytes because some may have been in decoder
1479                             byte[]? byteBuffer = null;
1480                             if (bigEndian)
1481                             {
1482                                 byteBuffer = new byte[]
1483                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1484                             }
1485                             else
1486                             {
1487                                 byteBuffer = new byte[]
1488                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1489                             }
1490
1491                             if (fallbackBuffer == null)
1492                             {
1493                                 if (decoder == null)
1494                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1495                                 else
1496                                     fallbackBuffer = decoder.FallbackBuffer;
1497
1498                                 // Set our internal fallback interesting things.
1499                                 fallbackBuffer.InternalInitialize(byteStart, charEnd);
1500                             }
1501
1502                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1503                             bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1504                             chars = charsForFallback;
1505
1506                             if (!fallbackResult)
1507                             {
1508                                 // couldn't fall back lonely surrogate
1509                                 // We either advanced bytes or chars should == charStart and throw below
1510                                 Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1511                                     "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (bad surrogate)");
1512                                 bytes -= 2;                                       // didn't use these 2 bytes
1513                                 fallbackBuffer.InternalReset();
1514                                 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1515                                 break;                                          // couldn't fallback but didn't throw
1516                             }
1517                         }
1518
1519                         // Ignore the previous high surrogate which fell back already,
1520                         // yet remember the current high surrogate for next time.
1521                         lastChar = ch;
1522                         continue;
1523                     }
1524
1525                     // Its a low surrogate
1526                     if (lastChar == 0)
1527                     {
1528                         // Expected a previous high surrogate
1529                         // Get fallback for this low surrogate
1530                         // Note we have to reconstruct bytes because some may have been in decoder
1531                         byte[]? byteBuffer = null;
1532                         if (bigEndian)
1533                         {
1534                             byteBuffer = new byte[]
1535                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1536                         }
1537                         else
1538                         {
1539                             byteBuffer = new byte[]
1540                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1541                         }
1542
1543                         if (fallbackBuffer == null)
1544                         {
1545                             if (decoder == null)
1546                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1547                             else
1548                                 fallbackBuffer = decoder.FallbackBuffer;
1549
1550                             // Set our internal fallback interesting things.
1551                             fallbackBuffer.InternalInitialize(byteStart, charEnd);
1552                         }
1553
1554                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1555                         bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1556                         chars = charsForFallback;
1557
1558                         if (!fallbackResult)
1559                         {
1560                             // couldn't fall back lonely surrogate
1561                             // We either advanced bytes or chars should == charStart and throw below
1562                             Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1563                                 "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (lonely surrogate)");
1564                             bytes -= 2;                                       // didn't use these 2 bytes
1565                             fallbackBuffer.InternalReset();
1566                             ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1567                             break;                                          // couldn't fallback but didn't throw
1568                         }
1569
1570                         // Didn't throw, ignore this one (we already did its fallback)
1571                         continue;
1572                     }
1573
1574                     // Valid surrogate pair, add our lastChar (will need 2 chars)
1575                     if (chars >= charEnd - 1)
1576                     {
1577                         // couldn't find room for this surrogate pair
1578                         // We either advanced bytes or chars should == charStart and throw below
1579                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1580                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (surrogate pair)");
1581                         bytes -= 2;                                       // didn't use these 2 bytes
1582                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1583                         // Leave lastChar for next call to Convert()
1584                         break;                                          // couldn't fallback but didn't throw
1585                     }
1586
1587                     *chars++ = lastChar;
1588                     lastChar = (char)0;
1589                 }
1590                 else if (lastChar > 0)
1591                 {
1592                     // Had a high surrogate, expected a low surrogate, fall back the high surrogate.
1593                     byte[]? byteBuffer = null;
1594                     if (bigEndian)
1595                     {
1596                         byteBuffer = new byte[]
1597                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1598                     }
1599                     else
1600                     {
1601                         byteBuffer = new byte[]
1602                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1603                     }
1604
1605                     if (fallbackBuffer == null)
1606                     {
1607                         if (decoder == null)
1608                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1609                         else
1610                             fallbackBuffer = decoder.FallbackBuffer;
1611
1612                         // Set our internal fallback interesting things.
1613                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1614                     }
1615
1616                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1617                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1618                     chars = charsForFallback;
1619
1620                     if (!fallbackResult)
1621                     {
1622                         // couldn't fall back high surrogate, or char that would be next
1623                         // We either advanced bytes or chars should == charStart and throw below
1624                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1625                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (no low surrogate)");
1626                         bytes -= 2;                                       // didn't use these 2 bytes
1627                         fallbackBuffer.InternalReset();
1628                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1629                         break;                                          // couldn't fallback but didn't throw
1630                     }
1631
1632                     // Not left over now, clear previous high surrogate and continue to add current char
1633                     lastChar = (char)0;
1634                 }
1635
1636                 // Valid char, room for it?
1637                 if (chars >= charEnd)
1638                 {
1639                     // 2 bytes couldn't fall back
1640                     // We either advanced bytes or chars should == charStart and throw below
1641                     Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1642                         "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (normal)");
1643                     bytes -= 2;                                       // didn't use these bytes
1644                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1645                     break;                                          // couldn't fallback but didn't throw
1646                 }
1647
1648                 // add it
1649                 *chars++ = ch;
1650             }
1651
1652             // Remember our decoder if we must
1653             if (decoder == null || decoder.MustFlush)
1654             {
1655                 if (lastChar > 0)
1656                 {
1657                     // No hanging high surrogates allowed, do fallback and remove count for it
1658                     byte[]? byteBuffer = null;
1659                     if (bigEndian)
1660                     {
1661                         byteBuffer = new byte[]
1662                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1663                     }
1664                     else
1665                     {
1666                         byteBuffer = new byte[]
1667                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1668                     }
1669
1670                     if (fallbackBuffer == null)
1671                     {
1672                         if (decoder == null)
1673                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1674                         else
1675                             fallbackBuffer = decoder.FallbackBuffer;
1676
1677                         // Set our internal fallback interesting things.
1678                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1679                     }
1680
1681                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1682                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1683                     chars = charsForFallback;
1684
1685                     if (!fallbackResult)
1686                     {
1687                         // 2 bytes couldn't fall back
1688                         // We either advanced bytes or chars should == charStart and throw below
1689                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1690                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (decoder)");
1691                         bytes -= 2;                                       // didn't use these bytes
1692                         if (lastByte >= 0)
1693                             bytes--;                                    // had an extra last byte hanging around
1694                         fallbackBuffer.InternalReset();
1695                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1696                         // We'll remember these in our decoder though
1697                         bytes += 2;
1698                         if (lastByte >= 0)
1699                             bytes++;
1700                         goto End;
1701                     }
1702
1703                     // done with this one
1704                     lastChar = (char)0;
1705                 }
1706
1707                 if (lastByte >= 0)
1708                 {
1709                     if (fallbackBuffer == null)
1710                     {
1711                         if (decoder == null)
1712                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1713                         else
1714                             fallbackBuffer = decoder.FallbackBuffer;
1715
1716                         // Set our internal fallback interesting things.
1717                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1718                     }
1719
1720                     // No hanging odd bytes allowed if must flush
1721                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1722                     bool fallbackResult = fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref charsForFallback);
1723                     chars = charsForFallback;
1724
1725                     if (!fallbackResult)
1726                     {
1727                         // odd byte couldn't fall back
1728                         bytes--;                                        // didn't use this byte
1729                         fallbackBuffer.InternalReset();
1730                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1731                         // didn't throw, but we'll remember it in the decoder
1732                         bytes++;
1733                         goto End;
1734                     }
1735
1736                     // Didn't fail, clear buffer
1737                     lastByte = -1;
1738                 }
1739             }
1740
1741         End:
1742
1743             // Remember our decoder if we must
1744             if (decoder != null)
1745             {
1746                 Debug.Assert((decoder.MustFlush == false) || ((lastChar == (char)0) && (lastByte == -1)),
1747                     "[UnicodeEncoding.GetChars] Expected no left over chars or bytes if flushing"
1748                     //                    + " " + ((int)lastChar).ToString("X4") + " " + lastByte.ToString("X2")
1749                     );
1750
1751                 decoder._bytesUsed = (int)(bytes - byteStart);
1752                 decoder.lastChar = lastChar;
1753                 decoder.lastByte = lastByte;
1754             }
1755
1756             // Shouldn't have anything in fallback buffer for GetChars
1757             // (don't have to check _throwOnOverflow for count or chars)
1758             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1759                 "[UnicodeEncoding.GetChars]Expected empty fallback buffer at end");
1760
1761             return (int)(chars - charStart);
1762         }
1763
1764
1765         public override System.Text.Encoder GetEncoder()
1766         {
1767             return new EncoderNLS(this);
1768         }
1769
1770
1771         public override System.Text.Decoder GetDecoder()
1772         {
1773             return new UnicodeEncoding.Decoder(this);
1774         }
1775
1776
1777         public override byte[] GetPreamble()
1778         {
1779             if (byteOrderMark)
1780             {
1781                 // Note - we must allocate new byte[]'s here to prevent someone
1782                 // from modifying a cached byte[].
1783                 if (bigEndian)
1784                     return new byte[2] { 0xfe, 0xff };
1785                 else
1786                     return new byte[2] { 0xff, 0xfe };
1787             }
1788             return Array.Empty<byte>();
1789         }
1790
1791         public override ReadOnlySpan<byte> Preamble =>
1792             GetType() != typeof(UnicodeEncoding) ? new ReadOnlySpan<byte>(GetPreamble()) : // in case a derived UnicodeEncoding overrode GetPreamble
1793             !byteOrderMark ? default :
1794             bigEndian ? (ReadOnlySpan<byte>)new byte[2] { 0xfe, 0xff } : // uses C# compiler's optimization for static byte[] data
1795             (ReadOnlySpan<byte>)new byte[2] { 0xff, 0xfe };
1796
1797         public override int GetMaxByteCount(int charCount)
1798         {
1799             if (charCount < 0)
1800                 throw new ArgumentOutOfRangeException(nameof(charCount),
1801                      SR.ArgumentOutOfRange_NeedNonNegNum);
1802
1803             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1804             long byteCount = (long)charCount + 1;
1805
1806             if (EncoderFallback.MaxCharCount > 1)
1807                 byteCount *= EncoderFallback.MaxCharCount;
1808
1809             // 2 bytes per char
1810             byteCount <<= 1;
1811
1812             if (byteCount > 0x7fffffff)
1813                 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1814
1815             return (int)byteCount;
1816         }
1817
1818
1819         public override int GetMaxCharCount(int byteCount)
1820         {
1821             if (byteCount < 0)
1822                 throw new ArgumentOutOfRangeException(nameof(byteCount),
1823                      SR.ArgumentOutOfRange_NeedNonNegNum);
1824
1825             // long because byteCount could be biggest int.
1826             // 1 char per 2 bytes.  Round up in case 1 left over in decoder.
1827             // Round up using &1 in case byteCount is max size
1828             // Might also need an extra 1 if there's a left over high surrogate in the decoder.
1829             long charCount = (long)(byteCount >> 1) + (byteCount & 1) + 1;
1830
1831             // Don't forget fallback (in case they have a bunch of lonely surrogates or something bizarre like that)
1832             if (DecoderFallback.MaxCharCount > 1)
1833                 charCount *= DecoderFallback.MaxCharCount;
1834
1835             if (charCount > 0x7fffffff)
1836                 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1837
1838             return (int)charCount;
1839         }
1840
1841
1842         public override bool Equals(object? value)
1843         {
1844             if (value is UnicodeEncoding that)
1845             {
1846                 //
1847                 // Big Endian Unicode has different code page (1201) than small Endian one (1200),
1848                 // so we still have to check _codePage here.
1849                 //
1850                 return (CodePage == that.CodePage) &&
1851                         byteOrderMark == that.byteOrderMark &&
1852                         //                        isThrowException == that.isThrowException &&  // Same as Encoder/Decoder being exception fallbacks
1853                         bigEndian == that.bigEndian &&
1854                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1855                        (DecoderFallback.Equals(that.DecoderFallback));
1856             }
1857             return (false);
1858         }
1859
1860         public override int GetHashCode()
1861         {
1862             return CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1863                    (byteOrderMark ? 4 : 0) + (bigEndian ? 8 : 0);
1864         }
1865
1866         private sealed class Decoder : System.Text.DecoderNLS
1867         {
1868             internal int lastByte = -1;
1869             internal char lastChar = '\0';
1870
1871             public Decoder(UnicodeEncoding encoding) : base(encoding)
1872             {
1873                 // base calls reset
1874             }
1875
1876             public override void Reset()
1877             {
1878                 lastByte = -1;
1879                 lastChar = '\0';
1880                 if (_fallbackBuffer != null)
1881                     _fallbackBuffer.Reset();
1882             }
1883
1884             // Anything left in our decoder?
1885             internal override bool HasState => (this.lastByte != -1 || this.lastChar != '\0');
1886         }
1887     }
1888 }