Fix StyleCop warning SA1206 (modifer ordering)
[mono-project.git] / netcore / System.Private.CoreLib / shared / System / Text / UTF32Encoding.cs
blobe1d2115b58582e2afb3f3d53ddd6b85233c6498a
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 //
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
7 //
9 using System;
10 using System.Diagnostics;
11 using System.Globalization;
12 using System.Runtime.InteropServices;
14 namespace System.Text
16 // Encodes text into and out of UTF-32. UTF-32 is a way of writing
17 // Unicode characters with a single storage unit (32 bits) per character,
19 // The UTF-32 byte order mark is simply the Unicode byte order mark
20 // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
21 // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
22 // switch the byte orderings.
24 public sealed class UTF32Encoding : Encoding
27 words bits UTF-32 representation
28 ----- ---- -----------------------------------
29 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
30 2 21 00000000 000xxxxx hhhhhhll llllllll
31 ----- ---- -----------------------------------
33 Surrogate:
34 Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
37 // Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
38 // The initialization code will not be run until a static member of the class is referenced
39 internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
40 internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
42 private readonly bool _emitUTF32ByteOrderMark = false;
43 private readonly bool _isThrowException = false;
44 private readonly bool _bigEndian = false;
47 public UTF32Encoding() : this(false, true)
52 public UTF32Encoding(bool bigEndian, bool byteOrderMark) :
53 base(bigEndian ? 12001 : 12000)
55 _bigEndian = bigEndian;
56 _emitUTF32ByteOrderMark = byteOrderMark;
60 public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters) :
61 this(bigEndian, byteOrderMark)
63 _isThrowException = throwOnInvalidCharacters;
65 // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
66 if (_isThrowException)
67 SetDefaultFallbacks();
70 internal override void SetDefaultFallbacks()
72 // For UTF-X encodings, we use a replacement fallback with an empty string
73 if (_isThrowException)
75 this.encoderFallback = EncoderFallback.ExceptionFallback;
76 this.decoderFallback = DecoderFallback.ExceptionFallback;
78 else
80 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
81 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
86 // The following methods are copied from EncodingNLS.cs.
87 // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
88 // These should be kept in sync for the following classes:
89 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
91 // Returns the number of bytes required to encode a range of characters in
92 // a character array.
94 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
95 // So if you fix this, fix the others. Currently those include:
96 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
97 // parent method is safe
99 public override unsafe int GetByteCount(char[] chars, int index, int count)
101 // Validate input parameters
102 if (chars == null)
103 throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
105 if (index < 0 || count < 0)
106 throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
108 if (chars.Length - index < count)
109 throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
111 // If no input, return 0, avoid fixed empty array problem
112 if (count == 0)
113 return 0;
115 // Just call the pointer version
116 fixed (char* pChars = chars)
117 return GetByteCount(pChars + index, count, null);
120 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
121 // So if you fix this, fix the others. Currently those include:
122 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
123 // parent method is safe
125 public override unsafe int GetByteCount(string s)
127 // Validate input
128 if (s == null)
129 throw new ArgumentNullException(nameof(s));
131 fixed (char* pChars = s)
132 return GetByteCount(pChars, s.Length, null);
135 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
136 // So if you fix this, fix the others. Currently those include:
137 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
139 [CLSCompliant(false)]
140 public override unsafe int GetByteCount(char* chars, int count)
142 // Validate Parameters
143 if (chars == null)
144 throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
146 if (count < 0)
147 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
149 // Call it with empty encoder
150 return GetByteCount(chars, count, null);
153 // Parent method is safe.
154 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
155 // So if you fix this, fix the others. Currently those include:
156 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
158 public override unsafe int GetBytes(string s, int charIndex, int charCount,
159 byte[] bytes, int byteIndex)
161 if (s == null || bytes == null)
162 throw new ArgumentNullException((s == null ? nameof(s) : nameof(bytes)), SR.ArgumentNull_Array);
164 if (charIndex < 0 || charCount < 0)
165 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
167 if (s.Length - charIndex < charCount)
168 throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
170 if (byteIndex < 0 || byteIndex > bytes.Length)
171 throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
173 int byteCount = bytes.Length - byteIndex;
175 fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
176 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
179 // Encodes a range of characters in a character array into a range of bytes
180 // in a byte array. An exception occurs if the byte array is not large
181 // enough to hold the complete encoding of the characters. The
182 // GetByteCount method can be used to determine the exact number of
183 // bytes that will be produced for a given range of characters.
184 // Alternatively, the GetMaxByteCount method can be used to
185 // determine the maximum number of bytes that will be produced for a given
186 // number of characters, regardless of the actual character values.
188 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
189 // So if you fix this, fix the others. Currently those include:
190 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
191 // parent method is safe
193 public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
194 byte[] bytes, int byteIndex)
196 // Validate parameters
197 if (chars == null || bytes == null)
198 throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
200 if (charIndex < 0 || charCount < 0)
201 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
203 if (chars.Length - charIndex < charCount)
204 throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
206 if (byteIndex < 0 || byteIndex > bytes.Length)
207 throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
209 // If nothing to encode return 0, avoid fixed problem
210 if (charCount == 0)
211 return 0;
213 // Just call pointer version
214 int byteCount = bytes.Length - byteIndex;
216 fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
217 // Remember that byteCount is # to decode, not size of array.
218 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
221 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
222 // So if you fix this, fix the others. Currently those include:
223 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
225 [CLSCompliant(false)]
226 public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
228 // Validate Parameters
229 if (bytes == null || chars == null)
230 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
232 if (charCount < 0 || byteCount < 0)
233 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
235 return GetBytes(chars, charCount, bytes, byteCount, null);
238 // Returns the number of characters produced by decoding a range of bytes
239 // in a byte array.
241 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
242 // So if you fix this, fix the others. Currently those include:
243 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
244 // parent method is safe
246 public override unsafe int GetCharCount(byte[] bytes, int index, int count)
248 // Validate Parameters
249 if (bytes == null)
250 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
252 if (index < 0 || count < 0)
253 throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
255 if (bytes.Length - index < count)
256 throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
258 // If no input just return 0, fixed doesn't like 0 length arrays.
259 if (count == 0)
260 return 0;
262 // Just call pointer version
263 fixed (byte* pBytes = bytes)
264 return GetCharCount(pBytes + index, count, null);
267 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
268 // So if you fix this, fix the others. Currently those include:
269 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
271 [CLSCompliant(false)]
272 public override unsafe int GetCharCount(byte* bytes, int count)
274 // Validate Parameters
275 if (bytes == null)
276 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
278 if (count < 0)
279 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
281 return GetCharCount(bytes, count, null);
284 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
285 // So if you fix this, fix the others. Currently those include:
286 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
287 // parent method is safe
289 public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
290 char[] chars, int charIndex)
292 // Validate Parameters
293 if (bytes == null || chars == null)
294 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
296 if (byteIndex < 0 || byteCount < 0)
297 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
299 if ( bytes.Length - byteIndex < byteCount)
300 throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
302 if (charIndex < 0 || charIndex > chars.Length)
303 throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
305 // If no input, return 0 & avoid fixed problem
306 if (byteCount == 0)
307 return 0;
309 // Just call pointer version
310 int charCount = chars.Length - charIndex;
312 fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
313 // Remember that charCount is # to decode, not size of array
314 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
317 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
318 // So if you fix this, fix the others. Currently those include:
319 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
321 [CLSCompliant(false)]
322 public override unsafe int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
324 // Validate Parameters
325 if (bytes == null || chars == null)
326 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
328 if (charCount < 0 || byteCount < 0)
329 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
331 return GetChars(bytes, byteCount, chars, charCount, null);
334 // Returns a string containing the decoded representation of a range of
335 // bytes in a byte array.
337 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
338 // So if you fix this, fix the others. Currently those include:
339 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
340 // parent method is safe
342 public override unsafe string GetString(byte[] bytes, int index, int count)
344 // Validate Parameters
345 if (bytes == null)
346 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
348 if (index < 0 || count < 0)
349 throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
351 if (bytes.Length - index < count)
352 throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
354 // Avoid problems with empty input buffer
355 if (count == 0) return string.Empty;
357 fixed (byte* pBytes = bytes)
358 return string.CreateStringFromEncoding(
359 pBytes + index, count, this);
363 // End of standard methods copied from EncodingNLS.cs
365 internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS? encoder)
367 Debug.Assert(chars != null, "[UTF32Encoding.GetByteCount]chars!=null");
368 Debug.Assert(count >= 0, "[UTF32Encoding.GetByteCount]count >=0");
370 char* end = chars + count;
371 char* charStart = chars;
372 int byteCount = 0;
374 char highSurrogate = '\0';
376 // For fallback we may need a fallback buffer
377 EncoderFallbackBuffer? fallbackBuffer = null;
378 char* charsForFallback;
380 if (encoder != null)
382 highSurrogate = encoder._charLeftOver;
383 fallbackBuffer = encoder.FallbackBuffer;
385 // We mustn't have left over fallback data when counting
386 if (fallbackBuffer.Remaining > 0)
387 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback?.GetType().ToString() ?? string.Empty));
389 else
391 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
394 // Set our internal fallback interesting things.
395 fallbackBuffer.InternalInitialize(charStart, end, encoder, false);
397 char ch;
398 TryAgain:
400 while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
402 // First unwind any fallback
403 if (ch == 0)
405 // No fallback, just get next char
406 ch = *chars;
407 chars++;
410 // Do we need a low surrogate?
411 if (highSurrogate != '\0')
414 // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
416 if (char.IsLowSurrogate(ch))
418 // They're all legal
419 highSurrogate = '\0';
422 // One surrogate pair will be translated into 4 bytes UTF32.
425 byteCount += 4;
426 continue;
429 // We are missing our low surrogate, decrement chars and fallback the high surrogate
430 // The high surrogate may have come from the encoder, but nothing else did.
431 Debug.Assert(chars > charStart,
432 "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
433 chars--;
435 // Do the fallback
436 charsForFallback = chars;
437 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
438 chars = charsForFallback;
440 // We're going to fallback the old high surrogate.
441 highSurrogate = '\0';
442 continue;
445 // Do we have another high surrogate?
446 if (char.IsHighSurrogate(ch))
449 // We'll have a high surrogate to check next time.
451 highSurrogate = ch;
452 continue;
455 // Check for illegal characters
456 if (char.IsLowSurrogate(ch))
458 // We have a leading low surrogate, do the fallback
459 charsForFallback = chars;
460 fallbackBuffer.InternalFallback(ch, ref charsForFallback);
461 chars = charsForFallback;
463 // Try again with fallback buffer
464 continue;
467 // We get to add the character (4 bytes UTF32)
468 byteCount += 4;
471 // May have to do our last surrogate
472 if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
474 // We have to do the fallback for the lonely high surrogate
475 charsForFallback = chars;
476 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
477 chars = charsForFallback;
479 highSurrogate = (char)0;
480 goto TryAgain;
483 // Check for overflows.
484 if (byteCount < 0)
485 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
487 // Shouldn't have anything in fallback buffer for GetByteCount
488 // (don't have to check _throwOnOverflow for count)
489 Debug.Assert(fallbackBuffer.Remaining == 0,
490 "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
492 // Return our count
493 return byteCount;
496 internal override unsafe int GetBytes(char* chars, int charCount,
497 byte* bytes, int byteCount, EncoderNLS? encoder)
499 Debug.Assert(chars != null, "[UTF32Encoding.GetBytes]chars!=null");
500 Debug.Assert(bytes != null, "[UTF32Encoding.GetBytes]bytes!=null");
501 Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetBytes]byteCount >=0");
502 Debug.Assert(charCount >= 0, "[UTF32Encoding.GetBytes]charCount >=0");
504 char* charStart = chars;
505 char* charEnd = chars + charCount;
506 byte* byteStart = bytes;
507 byte* byteEnd = bytes + byteCount;
509 char highSurrogate = '\0';
511 // For fallback we may need a fallback buffer
512 EncoderFallbackBuffer? fallbackBuffer = null;
513 char* charsForFallback;
515 if (encoder != null)
517 highSurrogate = encoder._charLeftOver;
518 fallbackBuffer = encoder.FallbackBuffer;
520 // We mustn't have left over fallback data when not converting
521 if (encoder._throwOnOverflow && fallbackBuffer.Remaining > 0)
522 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback?.GetType()));
524 else
526 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
529 // Set our internal fallback interesting things.
530 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
532 char ch;
533 TryAgain:
535 while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
537 // First unwind any fallback
538 if (ch == 0)
540 // No fallback, just get next char
541 ch = *chars;
542 chars++;
545 // Do we need a low surrogate?
546 if (highSurrogate != '\0')
549 // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
551 if (char.IsLowSurrogate(ch))
553 // Is it a legal one?
554 uint iTemp = GetSurrogate(highSurrogate, ch);
555 highSurrogate = '\0';
558 // One surrogate pair will be translated into 4 bytes UTF32.
560 if (bytes + 3 >= byteEnd)
562 // Don't have 4 bytes
563 if (fallbackBuffer.bFallingBack)
565 fallbackBuffer.MovePrevious(); // Aren't using these 2 fallback chars
566 fallbackBuffer.MovePrevious();
568 else
570 // If we don't have enough room, then either we should've advanced a while
571 // or we should have bytes==byteStart and throw below
572 Debug.Assert(chars > charStart + 1 || bytes == byteStart,
573 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
574 chars -= 2; // Aren't using those 2 chars
576 ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
577 highSurrogate = (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
578 break;
581 if (_bigEndian)
583 *(bytes++) = (byte)(0x00);
584 *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
585 *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
586 *(bytes++) = (byte)(iTemp); // Implies & 0xFF
588 else
590 *(bytes++) = (byte)(iTemp); // Implies & 0xFF
591 *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
592 *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
593 *(bytes++) = (byte)(0x00);
595 continue;
598 // We are missing our low surrogate, decrement chars and fallback the high surrogate
599 // The high surrogate may have come from the encoder, but nothing else did.
600 Debug.Assert(chars > charStart,
601 "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
602 chars--;
604 // Do the fallback
605 charsForFallback = chars;
606 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
607 chars = charsForFallback;
609 // We're going to fallback the old high surrogate.
610 highSurrogate = '\0';
611 continue;
614 // Do we have another high surrogate?, if so remember it
615 if (char.IsHighSurrogate(ch))
618 // We'll have a high surrogate to check next time.
620 highSurrogate = ch;
621 continue;
624 // Check for illegal characters (low surrogate)
625 if (char.IsLowSurrogate(ch))
627 // We have a leading low surrogate, do the fallback
628 charsForFallback = chars;
629 fallbackBuffer.InternalFallback(ch, ref charsForFallback);
630 chars = charsForFallback;
632 // Try again with fallback buffer
633 continue;
636 // We get to add the character, yippee.
637 if (bytes + 3 >= byteEnd)
639 // Don't have 4 bytes
640 if (fallbackBuffer.bFallingBack)
641 fallbackBuffer.MovePrevious(); // Aren't using this fallback char
642 else
644 // Must've advanced already
645 Debug.Assert(chars > charStart,
646 "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
647 chars--; // Aren't using this char
649 ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
650 break; // Didn't throw, stop
653 if (_bigEndian)
655 *(bytes++) = (byte)(0x00);
656 *(bytes++) = (byte)(0x00);
657 *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
658 *(bytes++) = (byte)(ch); // Implies & 0xFF
660 else
662 *(bytes++) = (byte)(ch); // Implies & 0xFF
663 *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
664 *(bytes++) = (byte)(0x00);
665 *(bytes++) = (byte)(0x00);
669 // May have to do our last surrogate
670 if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
672 // We have to do the fallback for the lonely high surrogate
673 charsForFallback = chars;
674 fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
675 chars = charsForFallback;
677 highSurrogate = (char)0;
678 goto TryAgain;
681 // Fix our encoder if we have one
682 Debug.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
683 "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
685 if (encoder != null)
687 // Remember our left over surrogate (or 0 if flushing)
688 encoder._charLeftOver = highSurrogate;
690 // Need # chars used
691 encoder._charsUsed = (int)(chars - charStart);
694 // return the new length
695 return (int)(bytes - byteStart);
698 internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS? baseDecoder)
700 Debug.Assert(bytes != null, "[UTF32Encoding.GetCharCount]bytes!=null");
701 Debug.Assert(count >= 0, "[UTF32Encoding.GetCharCount]count >=0");
703 UTF32Decoder? decoder = (UTF32Decoder?)baseDecoder;
705 // None so far!
706 int charCount = 0;
707 byte* end = bytes + count;
708 byte* byteStart = bytes;
710 // Set up decoder
711 int readCount = 0;
712 uint iChar = 0;
714 // For fallback we may need a fallback buffer
715 DecoderFallbackBuffer? fallbackBuffer = null;
717 // See if there's anything in our decoder
718 if (decoder != null)
720 readCount = decoder.readByteCount;
721 iChar = (uint)decoder.iChar;
722 fallbackBuffer = decoder.FallbackBuffer;
724 // Shouldn't have anything in fallback buffer for GetCharCount
725 // (don't have to check _throwOnOverflow for chars or count)
726 Debug.Assert(fallbackBuffer.Remaining == 0,
727 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
729 else
731 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
734 // Set our internal fallback interesting things.
735 fallbackBuffer.InternalInitialize(byteStart, null);
737 // Loop through our input, 4 characters at a time!
738 while (bytes < end && charCount >= 0)
740 // Get our next character
741 if (_bigEndian)
743 // Scoot left and add it to the bottom
744 iChar <<= 8;
745 iChar += *(bytes++);
747 else
749 // Scoot right and add it to the top
750 iChar >>= 8;
751 iChar += (uint)(*(bytes++)) << 24;
754 readCount++;
756 // See if we have all the bytes yet
757 if (readCount < 4)
758 continue;
760 // Have the bytes
761 readCount = 0;
763 // See if its valid to encode
764 if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
766 // Need to fall back these 4 bytes
767 byte[] fallbackBytes;
768 if (_bigEndian)
770 fallbackBytes = new byte[] {
771 unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
772 unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
774 else
776 fallbackBytes = new byte[] {
777 unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
778 unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
781 charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
783 // Ignore the illegal character
784 iChar = 0;
785 continue;
788 // Ok, we have something we can add to our output
789 if (iChar >= 0x10000)
791 // Surrogates take 2
792 charCount++;
795 // Add the rest of the surrogate or our normal character
796 charCount++;
798 // iChar is back to 0
799 iChar = 0;
802 // See if we have something left over that has to be decoded
803 if (readCount > 0 && (decoder == null || decoder.MustFlush))
805 // Oops, there's something left over with no place to go.
806 byte[] fallbackBytes = new byte[readCount];
807 if (_bigEndian)
809 while (readCount > 0)
811 fallbackBytes[--readCount] = unchecked((byte)iChar);
812 iChar >>= 8;
815 else
817 while (readCount > 0)
819 fallbackBytes[--readCount] = unchecked((byte)(iChar >> 24));
820 iChar <<= 8;
824 charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
827 // Check for overflows.
828 if (charCount < 0)
829 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
831 // Shouldn't have anything in fallback buffer for GetCharCount
832 // (don't have to check _throwOnOverflow for chars or count)
833 Debug.Assert(fallbackBuffer.Remaining == 0,
834 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
836 // Return our count
837 return charCount;
840 internal override unsafe int GetChars(byte* bytes, int byteCount,
841 char* chars, int charCount, DecoderNLS? baseDecoder)
843 Debug.Assert(chars != null, "[UTF32Encoding.GetChars]chars!=null");
844 Debug.Assert(bytes != null, "[UTF32Encoding.GetChars]bytes!=null");
845 Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetChars]byteCount >=0");
846 Debug.Assert(charCount >= 0, "[UTF32Encoding.GetChars]charCount >=0");
848 UTF32Decoder? decoder = (UTF32Decoder?)baseDecoder;
850 // None so far!
851 char* charStart = chars;
852 char* charEnd = chars + charCount;
854 byte* byteStart = bytes;
855 byte* byteEnd = bytes + byteCount;
857 // See if there's anything in our decoder (but don't clear it yet)
858 int readCount = 0;
859 uint iChar = 0;
861 // For fallback we may need a fallback buffer
862 DecoderFallbackBuffer? fallbackBuffer = null;
863 char* charsForFallback;
865 // See if there's anything in our decoder
866 if (decoder != null)
868 readCount = decoder.readByteCount;
869 iChar = (uint)decoder.iChar;
870 Debug.Assert(baseDecoder != null);
871 fallbackBuffer = baseDecoder.FallbackBuffer;
873 // Shouldn't have anything in fallback buffer for GetChars
874 // (don't have to check _throwOnOverflow for chars)
875 Debug.Assert(fallbackBuffer.Remaining == 0,
876 "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
878 else
880 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
883 // Set our internal fallback interesting things.
884 fallbackBuffer.InternalInitialize(bytes, chars + charCount);
886 // Loop through our input, 4 characters at a time!
887 while (bytes < byteEnd)
889 // Get our next character
890 if (_bigEndian)
892 // Scoot left and add it to the bottom
893 iChar <<= 8;
894 iChar += *(bytes++);
896 else
898 // Scoot right and add it to the top
899 iChar >>= 8;
900 iChar += (uint)(*(bytes++)) << 24;
903 readCount++;
905 // See if we have all the bytes yet
906 if (readCount < 4)
907 continue;
909 // Have the bytes
910 readCount = 0;
912 // See if its valid to encode
913 if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
915 // Need to fall back these 4 bytes
916 byte[] fallbackBytes;
917 if (_bigEndian)
919 fallbackBytes = new byte[] {
920 unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
921 unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
923 else
925 fallbackBytes = new byte[] {
926 unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
927 unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
930 // Chars won't be updated unless this works.
931 charsForFallback = chars;
932 bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
933 chars = charsForFallback;
935 if (!fallbackResult)
937 // Couldn't fallback, throw or wait til next time
938 // We either read enough bytes for bytes-=4 to work, or we're
939 // going to throw in ThrowCharsOverflow because chars == charStart
940 Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
941 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
942 bytes -= 4; // get back to where we were
943 iChar = 0; // Remembering nothing
944 fallbackBuffer.InternalReset();
945 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
946 break; // Stop here, didn't throw
949 // Ignore the illegal character
950 iChar = 0;
951 continue;
955 // Ok, we have something we can add to our output
956 if (iChar >= 0x10000)
958 // Surrogates take 2
959 if (chars >= charEnd - 1)
961 // Throwing or stopping
962 // We either read enough bytes for bytes-=4 to work, or we're
963 // going to throw in ThrowCharsOverflow because chars == charStart
964 Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
965 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
966 bytes -= 4; // get back to where we were
967 iChar = 0; // Remembering nothing
968 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
969 break; // Stop here, didn't throw
972 *(chars++) = GetHighSurrogate(iChar);
973 iChar = GetLowSurrogate(iChar);
975 // Bounds check for normal character
976 else if (chars >= charEnd)
978 // Throwing or stopping
979 // We either read enough bytes for bytes-=4 to work, or we're
980 // going to throw in ThrowCharsOverflow because chars == charStart
981 Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
982 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
983 bytes -= 4; // get back to where we were
984 iChar = 0; // Remembering nothing
985 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
986 break; // Stop here, didn't throw
989 // Add the rest of the surrogate or our normal character
990 *(chars++) = (char)iChar;
992 // iChar is back to 0
993 iChar = 0;
996 // See if we have something left over that has to be decoded
997 if (readCount > 0 && (decoder == null || decoder.MustFlush))
999 // Oops, there's something left over with no place to go.
1000 byte[] fallbackBytes = new byte[readCount];
1001 int tempCount = readCount;
1002 if (_bigEndian)
1004 while (tempCount > 0)
1006 fallbackBytes[--tempCount] = unchecked((byte)iChar);
1007 iChar >>= 8;
1010 else
1012 while (tempCount > 0)
1014 fallbackBytes[--tempCount] = unchecked((byte)(iChar >> 24));
1015 iChar <<= 8;
1019 charsForFallback = chars;
1020 bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
1021 chars = charsForFallback;
1023 if (!fallbackResult)
1025 // Couldn't fallback.
1026 fallbackBuffer.InternalReset();
1027 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1028 // Stop here, didn't throw, backed up, so still nothing in buffer
1030 else
1032 // Don't clear our decoder unless we could fall it back.
1033 // If we caught the if above, then we're a convert() and will catch this next time.
1034 readCount = 0;
1035 iChar = 0;
1039 // Remember any left over stuff, clearing buffer as well for MustFlush
1040 if (decoder != null)
1042 decoder.iChar = (int)iChar;
1043 decoder.readByteCount = readCount;
1044 decoder._bytesUsed = (int)(bytes - byteStart);
1047 // Shouldn't have anything in fallback buffer for GetChars
1048 // (don't have to check _throwOnOverflow for chars)
1049 Debug.Assert(fallbackBuffer.Remaining == 0,
1050 "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
1052 // Return our count
1053 return (int)(chars - charStart);
1057 private uint GetSurrogate(char cHigh, char cLow)
1059 return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
1062 private char GetHighSurrogate(uint iChar)
1064 return (char)((iChar - 0x10000) / 0x400 + 0xD800);
1067 private char GetLowSurrogate(uint iChar)
1069 return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
1073 public override Decoder GetDecoder()
1075 return new UTF32Decoder(this);
1079 public override Encoder GetEncoder()
1081 return new EncoderNLS(this);
1085 public override int GetMaxByteCount(int charCount)
1087 if (charCount < 0)
1088 throw new ArgumentOutOfRangeException(nameof(charCount),
1089 SR.ArgumentOutOfRange_NeedNonNegNum);
1091 // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1092 long byteCount = (long)charCount + 1;
1094 if (EncoderFallback.MaxCharCount > 1)
1095 byteCount *= EncoderFallback.MaxCharCount;
1097 // 4 bytes per char
1098 byteCount *= 4;
1100 if (byteCount > 0x7fffffff)
1101 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1103 return (int)byteCount;
1107 public override int GetMaxCharCount(int byteCount)
1109 if (byteCount < 0)
1110 throw new ArgumentOutOfRangeException(nameof(byteCount),
1111 SR.ArgumentOutOfRange_NeedNonNegNum);
1113 // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
1114 // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
1115 // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
1116 int charCount = (byteCount / 2) + 2;
1118 // Also consider fallback because our input bytes could be out of range of unicode.
1119 // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
1120 if (DecoderFallback.MaxCharCount > 2)
1122 // Multiply time fallback size
1123 charCount *= DecoderFallback.MaxCharCount;
1125 // We were already figuring 2 chars per 4 bytes, but fallback will be different #
1126 charCount /= 2;
1129 if (charCount > 0x7fffffff)
1130 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1132 return (int)charCount;
1136 public override byte[] GetPreamble()
1138 if (_emitUTF32ByteOrderMark)
1140 // Allocate new array to prevent users from modifying it.
1141 if (_bigEndian)
1143 return new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
1145 else
1147 return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
1150 else
1151 return Array.Empty<byte>();
1154 public override ReadOnlySpan<byte> Preamble =>
1155 GetType() != typeof(UTF32Encoding) ? new ReadOnlySpan<byte>(GetPreamble()) : // in case a derived UTF32Encoding overrode GetPreamble
1156 !_emitUTF32ByteOrderMark ? default :
1157 _bigEndian ? (ReadOnlySpan<byte>)new byte[4] { 0x00, 0x00, 0xFE, 0xFF } : // uses C# compiler's optimization for static byte[] data
1158 (ReadOnlySpan<byte>)new byte[4] { 0xFF, 0xFE, 0x00, 0x00 };
1160 public override bool Equals(object? value)
1162 if (value is UTF32Encoding that)
1164 return (_emitUTF32ByteOrderMark == that._emitUTF32ByteOrderMark) &&
1165 (_bigEndian == that._bigEndian) &&
1166 (EncoderFallback.Equals(that.EncoderFallback)) &&
1167 (DecoderFallback.Equals(that.DecoderFallback));
1170 return false;
1174 public override int GetHashCode()
1176 //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
1177 return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1178 CodePage + (_emitUTF32ByteOrderMark ? 4 : 0) + (_bigEndian ? 8 : 0);
1181 private sealed class UTF32Decoder : DecoderNLS
1183 // Need a place to store any extra bytes we may have picked up
1184 internal int iChar = 0;
1185 internal int readByteCount = 0;
1187 public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
1189 // base calls reset
1192 public override void Reset()
1194 this.iChar = 0;
1195 this.readByteCount = 0;
1196 if (_fallbackBuffer != null)
1197 _fallbackBuffer.Reset();
1200 // Anything left in our decoder?
1201 internal override bool HasState
1205 // ReadByteCount is our flag. (iChar==0 doesn't mean much).
1206 return (this.readByteCount != 0);