1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
10 using System
.Diagnostics
;
11 using System
.Globalization
;
12 using System
.Runtime
.InteropServices
;
16 // Encodes text into and out of UTF-32. UTF-32 is a way of writing
17 // Unicode characters with a single storage unit (32 bits) per character,
19 // The UTF-32 byte order mark is simply the Unicode byte order mark
20 // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
21 // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
22 // switch the byte orderings.
24 public sealed class UTF32Encoding
: Encoding
27 words bits UTF-32 representation
28 ----- ---- -----------------------------------
29 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
30 2 21 00000000 000xxxxx hhhhhhll llllllll
31 ----- ---- -----------------------------------
34 Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
37 // Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
38 // The initialization code will not be run until a static member of the class is referenced
39 internal static readonly UTF32Encoding s_default
= new UTF32Encoding(bigEndian
: false, byteOrderMark
: true);
40 internal static readonly UTF32Encoding s_bigEndianDefault
= new UTF32Encoding(bigEndian
: true, byteOrderMark
: true);
42 private readonly bool _emitUTF32ByteOrderMark
= false;
43 private readonly bool _isThrowException
= false;
44 private readonly bool _bigEndian
= false;
47 public UTF32Encoding() : this(false, true)
52 public UTF32Encoding(bool bigEndian
, bool byteOrderMark
) :
53 base(bigEndian
? 12001 : 12000)
55 _bigEndian
= bigEndian
;
56 _emitUTF32ByteOrderMark
= byteOrderMark
;
60 public UTF32Encoding(bool bigEndian
, bool byteOrderMark
, bool throwOnInvalidCharacters
) :
61 this(bigEndian
, byteOrderMark
)
63 _isThrowException
= throwOnInvalidCharacters
;
65 // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
66 if (_isThrowException
)
67 SetDefaultFallbacks();
70 internal override void SetDefaultFallbacks()
72 // For UTF-X encodings, we use a replacement fallback with an empty string
73 if (_isThrowException
)
75 this.encoderFallback
= EncoderFallback
.ExceptionFallback
;
76 this.decoderFallback
= DecoderFallback
.ExceptionFallback
;
80 this.encoderFallback
= new EncoderReplacementFallback("\xFFFD");
81 this.decoderFallback
= new DecoderReplacementFallback("\xFFFD");
86 // The following methods are copied from EncodingNLS.cs.
87 // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
88 // These should be kept in sync for the following classes:
89 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
91 // Returns the number of bytes required to encode a range of characters in
94 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
95 // So if you fix this, fix the others. Currently those include:
96 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
97 // parent method is safe
99 public override unsafe int GetByteCount(char[] chars
, int index
, int count
)
101 // Validate input parameters
103 throw new ArgumentNullException(nameof(chars
), SR
.ArgumentNull_Array
);
105 if (index
< 0 || count
< 0)
106 throw new ArgumentOutOfRangeException((index
< 0 ? nameof(index
) : nameof(count
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
108 if (chars
.Length
- index
< count
)
109 throw new ArgumentOutOfRangeException(nameof(chars
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
111 // If no input, return 0, avoid fixed empty array problem
115 // Just call the pointer version
116 fixed (char* pChars
= chars
)
117 return GetByteCount(pChars
+ index
, count
, null);
120 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
121 // So if you fix this, fix the others. Currently those include:
122 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
123 // parent method is safe
125 public override unsafe int GetByteCount(string s
)
129 throw new ArgumentNullException(nameof(s
));
131 fixed (char* pChars
= s
)
132 return GetByteCount(pChars
, s
.Length
, null);
135 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
136 // So if you fix this, fix the others. Currently those include:
137 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
139 [CLSCompliant(false)]
140 public override unsafe int GetByteCount(char* chars
, int count
)
142 // Validate Parameters
144 throw new ArgumentNullException(nameof(chars
), SR
.ArgumentNull_Array
);
147 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
149 // Call it with empty encoder
150 return GetByteCount(chars
, count
, null);
153 // Parent method is safe.
154 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
155 // So if you fix this, fix the others. Currently those include:
156 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
158 public override unsafe int GetBytes(string s
, int charIndex
, int charCount
,
159 byte[] bytes
, int byteIndex
)
161 if (s
== null || bytes
== null)
162 throw new ArgumentNullException((s
== null ? nameof(s
) : nameof(bytes
)), SR
.ArgumentNull_Array
);
164 if (charIndex
< 0 || charCount
< 0)
165 throw new ArgumentOutOfRangeException((charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
167 if (s
.Length
- charIndex
< charCount
)
168 throw new ArgumentOutOfRangeException(nameof(s
), SR
.ArgumentOutOfRange_IndexCount
);
170 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
171 throw new ArgumentOutOfRangeException(nameof(byteIndex
), SR
.ArgumentOutOfRange_Index
);
173 int byteCount
= bytes
.Length
- byteIndex
;
175 fixed (char* pChars
= s
) fixed (byte* pBytes
= &MemoryMarshal
.GetReference((Span
<byte>)bytes
))
176 return GetBytes(pChars
+ charIndex
, charCount
, pBytes
+ byteIndex
, byteCount
, null);
179 // Encodes a range of characters in a character array into a range of bytes
180 // in a byte array. An exception occurs if the byte array is not large
181 // enough to hold the complete encoding of the characters. The
182 // GetByteCount method can be used to determine the exact number of
183 // bytes that will be produced for a given range of characters.
184 // Alternatively, the GetMaxByteCount method can be used to
185 // determine the maximum number of bytes that will be produced for a given
186 // number of characters, regardless of the actual character values.
188 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
189 // So if you fix this, fix the others. Currently those include:
190 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
191 // parent method is safe
193 public override unsafe int GetBytes(char[] chars
, int charIndex
, int charCount
,
194 byte[] bytes
, int byteIndex
)
196 // Validate parameters
197 if (chars
== null || bytes
== null)
198 throw new ArgumentNullException((chars
== null ? nameof(chars
) : nameof(bytes
)), SR
.ArgumentNull_Array
);
200 if (charIndex
< 0 || charCount
< 0)
201 throw new ArgumentOutOfRangeException((charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
203 if (chars
.Length
- charIndex
< charCount
)
204 throw new ArgumentOutOfRangeException(nameof(chars
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
206 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
207 throw new ArgumentOutOfRangeException(nameof(byteIndex
), SR
.ArgumentOutOfRange_Index
);
209 // If nothing to encode return 0, avoid fixed problem
213 // Just call pointer version
214 int byteCount
= bytes
.Length
- byteIndex
;
216 fixed (char* pChars
= chars
) fixed (byte* pBytes
= &MemoryMarshal
.GetReference((Span
<byte>)bytes
))
217 // Remember that byteCount is # to decode, not size of array.
218 return GetBytes(pChars
+ charIndex
, charCount
, pBytes
+ byteIndex
, byteCount
, null);
221 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
222 // So if you fix this, fix the others. Currently those include:
223 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
225 [CLSCompliant(false)]
226 public override unsafe int GetBytes(char* chars
, int charCount
, byte* bytes
, int byteCount
)
228 // Validate Parameters
229 if (bytes
== null || chars
== null)
230 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
232 if (charCount
< 0 || byteCount
< 0)
233 throw new ArgumentOutOfRangeException((charCount
< 0 ? nameof(charCount
) : nameof(byteCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
235 return GetBytes(chars
, charCount
, bytes
, byteCount
, null);
238 // Returns the number of characters produced by decoding a range of bytes
241 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
242 // So if you fix this, fix the others. Currently those include:
243 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
244 // parent method is safe
246 public override unsafe int GetCharCount(byte[] bytes
, int index
, int count
)
248 // Validate Parameters
250 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
252 if (index
< 0 || count
< 0)
253 throw new ArgumentOutOfRangeException((index
< 0 ? nameof(index
) : nameof(count
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
255 if (bytes
.Length
- index
< count
)
256 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
258 // If no input just return 0, fixed doesn't like 0 length arrays.
262 // Just call pointer version
263 fixed (byte* pBytes
= bytes
)
264 return GetCharCount(pBytes
+ index
, count
, null);
267 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
268 // So if you fix this, fix the others. Currently those include:
269 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
271 [CLSCompliant(false)]
272 public override unsafe int GetCharCount(byte* bytes
, int count
)
274 // Validate Parameters
276 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
279 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
281 return GetCharCount(bytes
, count
, null);
284 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
285 // So if you fix this, fix the others. Currently those include:
286 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
287 // parent method is safe
289 public override unsafe int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
290 char[] chars
, int charIndex
)
292 // Validate Parameters
293 if (bytes
== null || chars
== null)
294 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
296 if (byteIndex
< 0 || byteCount
< 0)
297 throw new ArgumentOutOfRangeException((byteIndex
< 0 ? nameof(byteIndex
) : nameof(byteCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
299 if ( bytes
.Length
- byteIndex
< byteCount
)
300 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
302 if (charIndex
< 0 || charIndex
> chars
.Length
)
303 throw new ArgumentOutOfRangeException(nameof(charIndex
), SR
.ArgumentOutOfRange_Index
);
305 // If no input, return 0 & avoid fixed problem
309 // Just call pointer version
310 int charCount
= chars
.Length
- charIndex
;
312 fixed (byte* pBytes
= bytes
) fixed (char* pChars
= &MemoryMarshal
.GetReference((Span
<char>)chars
))
313 // Remember that charCount is # to decode, not size of array
314 return GetChars(pBytes
+ byteIndex
, byteCount
, pChars
+ charIndex
, charCount
, null);
317 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
318 // So if you fix this, fix the others. Currently those include:
319 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
321 [CLSCompliant(false)]
322 public override unsafe int GetChars(byte* bytes
, int byteCount
, char* chars
, int charCount
)
324 // Validate Parameters
325 if (bytes
== null || chars
== null)
326 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
328 if (charCount
< 0 || byteCount
< 0)
329 throw new ArgumentOutOfRangeException((charCount
< 0 ? nameof(charCount
) : nameof(byteCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
331 return GetChars(bytes
, byteCount
, chars
, charCount
, null);
334 // Returns a string containing the decoded representation of a range of
335 // bytes in a byte array.
337 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
338 // So if you fix this, fix the others. Currently those include:
339 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
340 // parent method is safe
342 public override unsafe string GetString(byte[] bytes
, int index
, int count
)
344 // Validate Parameters
346 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
348 if (index
< 0 || count
< 0)
349 throw new ArgumentOutOfRangeException((index
< 0 ? nameof(index
) : nameof(count
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
351 if (bytes
.Length
- index
< count
)
352 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
354 // Avoid problems with empty input buffer
355 if (count
== 0) return string.Empty
;
357 fixed (byte* pBytes
= bytes
)
358 return string.CreateStringFromEncoding(
359 pBytes
+ index
, count
, this);
363 // End of standard methods copied from EncodingNLS.cs
365 internal override unsafe int GetByteCount(char* chars
, int count
, EncoderNLS
? encoder
)
367 Debug
.Assert(chars
!= null, "[UTF32Encoding.GetByteCount]chars!=null");
368 Debug
.Assert(count
>= 0, "[UTF32Encoding.GetByteCount]count >=0");
370 char* end
= chars
+ count
;
371 char* charStart
= chars
;
374 char highSurrogate
= '\0';
376 // For fallback we may need a fallback buffer
377 EncoderFallbackBuffer
? fallbackBuffer
= null;
378 char* charsForFallback
;
382 highSurrogate
= encoder
._charLeftOver
;
383 fallbackBuffer
= encoder
.FallbackBuffer
;
385 // We mustn't have left over fallback data when counting
386 if (fallbackBuffer
.Remaining
> 0)
387 throw new ArgumentException(SR
.Format(SR
.Argument_EncoderFallbackNotEmpty
, this.EncodingName
, encoder
.Fallback
?.GetType().ToString() ?? string.Empty
));
391 fallbackBuffer
= this.encoderFallback
.CreateFallbackBuffer();
394 // Set our internal fallback interesting things.
395 fallbackBuffer
.InternalInitialize(charStart
, end
, encoder
, false);
400 while (((ch
= fallbackBuffer
.InternalGetNextChar()) != 0) || chars
< end
)
402 // First unwind any fallback
405 // No fallback, just get next char
410 // Do we need a low surrogate?
411 if (highSurrogate
!= '\0')
414 // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
416 if (char.IsLowSurrogate(ch
))
419 highSurrogate
= '\0';
422 // One surrogate pair will be translated into 4 bytes UTF32.
429 // We are missing our low surrogate, decrement chars and fallback the high surrogate
430 // The high surrogate may have come from the encoder, but nothing else did.
431 Debug
.Assert(chars
> charStart
,
432 "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
436 charsForFallback
= chars
;
437 fallbackBuffer
.InternalFallback(highSurrogate
, ref charsForFallback
);
438 chars
= charsForFallback
;
440 // We're going to fallback the old high surrogate.
441 highSurrogate
= '\0';
445 // Do we have another high surrogate?
446 if (char.IsHighSurrogate(ch
))
449 // We'll have a high surrogate to check next time.
455 // Check for illegal characters
456 if (char.IsLowSurrogate(ch
))
458 // We have a leading low surrogate, do the fallback
459 charsForFallback
= chars
;
460 fallbackBuffer
.InternalFallback(ch
, ref charsForFallback
);
461 chars
= charsForFallback
;
463 // Try again with fallback buffer
467 // We get to add the character (4 bytes UTF32)
471 // May have to do our last surrogate
472 if ((encoder
== null || encoder
.MustFlush
) && highSurrogate
> 0)
474 // We have to do the fallback for the lonely high surrogate
475 charsForFallback
= chars
;
476 fallbackBuffer
.InternalFallback(highSurrogate
, ref charsForFallback
);
477 chars
= charsForFallback
;
479 highSurrogate
= (char)0;
483 // Check for overflows.
485 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_GetByteCountOverflow
);
487 // Shouldn't have anything in fallback buffer for GetByteCount
488 // (don't have to check _throwOnOverflow for count)
489 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
490 "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
496 internal override unsafe int GetBytes(char* chars
, int charCount
,
497 byte* bytes
, int byteCount
, EncoderNLS
? encoder
)
499 Debug
.Assert(chars
!= null, "[UTF32Encoding.GetBytes]chars!=null");
500 Debug
.Assert(bytes
!= null, "[UTF32Encoding.GetBytes]bytes!=null");
501 Debug
.Assert(byteCount
>= 0, "[UTF32Encoding.GetBytes]byteCount >=0");
502 Debug
.Assert(charCount
>= 0, "[UTF32Encoding.GetBytes]charCount >=0");
504 char* charStart
= chars
;
505 char* charEnd
= chars
+ charCount
;
506 byte* byteStart
= bytes
;
507 byte* byteEnd
= bytes
+ byteCount
;
509 char highSurrogate
= '\0';
511 // For fallback we may need a fallback buffer
512 EncoderFallbackBuffer
? fallbackBuffer
= null;
513 char* charsForFallback
;
517 highSurrogate
= encoder
._charLeftOver
;
518 fallbackBuffer
= encoder
.FallbackBuffer
;
520 // We mustn't have left over fallback data when not converting
521 if (encoder
._throwOnOverflow
&& fallbackBuffer
.Remaining
> 0)
522 throw new ArgumentException(SR
.Format(SR
.Argument_EncoderFallbackNotEmpty
, this.EncodingName
, encoder
.Fallback
?.GetType()));
526 fallbackBuffer
= this.encoderFallback
.CreateFallbackBuffer();
529 // Set our internal fallback interesting things.
530 fallbackBuffer
.InternalInitialize(charStart
, charEnd
, encoder
, true);
535 while (((ch
= fallbackBuffer
.InternalGetNextChar()) != 0) || chars
< charEnd
)
537 // First unwind any fallback
540 // No fallback, just get next char
545 // Do we need a low surrogate?
546 if (highSurrogate
!= '\0')
549 // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
551 if (char.IsLowSurrogate(ch
))
553 // Is it a legal one?
554 uint iTemp
= GetSurrogate(highSurrogate
, ch
);
555 highSurrogate
= '\0';
558 // One surrogate pair will be translated into 4 bytes UTF32.
560 if (bytes
+ 3 >= byteEnd
)
562 // Don't have 4 bytes
563 if (fallbackBuffer
.bFallingBack
)
565 fallbackBuffer
.MovePrevious(); // Aren't using these 2 fallback chars
566 fallbackBuffer
.MovePrevious();
570 // If we don't have enough room, then either we should've advanced a while
571 // or we should have bytes==byteStart and throw below
572 Debug
.Assert(chars
> charStart
+ 1 || bytes
== byteStart
,
573 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
574 chars
-= 2; // Aren't using those 2 chars
576 ThrowBytesOverflow(encoder
, bytes
== byteStart
); // Throw maybe (if no bytes written)
577 highSurrogate
= (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
583 *(bytes
++) = (byte)(0x00);
584 *(bytes
++) = (byte)(iTemp
>> 16); // Implies & 0xFF, which isn't needed cause high are all 0
585 *(bytes
++) = (byte)(iTemp
>> 8); // Implies & 0xFF
586 *(bytes
++) = (byte)(iTemp
); // Implies & 0xFF
590 *(bytes
++) = (byte)(iTemp
); // Implies & 0xFF
591 *(bytes
++) = (byte)(iTemp
>> 8); // Implies & 0xFF
592 *(bytes
++) = (byte)(iTemp
>> 16); // Implies & 0xFF, which isn't needed cause high are all 0
593 *(bytes
++) = (byte)(0x00);
598 // We are missing our low surrogate, decrement chars and fallback the high surrogate
599 // The high surrogate may have come from the encoder, but nothing else did.
600 Debug
.Assert(chars
> charStart
,
601 "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
605 charsForFallback
= chars
;
606 fallbackBuffer
.InternalFallback(highSurrogate
, ref charsForFallback
);
607 chars
= charsForFallback
;
609 // We're going to fallback the old high surrogate.
610 highSurrogate
= '\0';
614 // Do we have another high surrogate?, if so remember it
615 if (char.IsHighSurrogate(ch
))
618 // We'll have a high surrogate to check next time.
624 // Check for illegal characters (low surrogate)
625 if (char.IsLowSurrogate(ch
))
627 // We have a leading low surrogate, do the fallback
628 charsForFallback
= chars
;
629 fallbackBuffer
.InternalFallback(ch
, ref charsForFallback
);
630 chars
= charsForFallback
;
632 // Try again with fallback buffer
636 // We get to add the character, yippee.
637 if (bytes
+ 3 >= byteEnd
)
639 // Don't have 4 bytes
640 if (fallbackBuffer
.bFallingBack
)
641 fallbackBuffer
.MovePrevious(); // Aren't using this fallback char
644 // Must've advanced already
645 Debug
.Assert(chars
> charStart
,
646 "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
647 chars
--; // Aren't using this char
649 ThrowBytesOverflow(encoder
, bytes
== byteStart
); // Throw maybe (if no bytes written)
650 break; // Didn't throw, stop
655 *(bytes
++) = (byte)(0x00);
656 *(bytes
++) = (byte)(0x00);
657 *(bytes
++) = (byte)((uint)ch
>> 8); // Implies & 0xFF
658 *(bytes
++) = (byte)(ch
); // Implies & 0xFF
662 *(bytes
++) = (byte)(ch
); // Implies & 0xFF
663 *(bytes
++) = (byte)((uint)ch
>> 8); // Implies & 0xFF
664 *(bytes
++) = (byte)(0x00);
665 *(bytes
++) = (byte)(0x00);
669 // May have to do our last surrogate
670 if ((encoder
== null || encoder
.MustFlush
) && highSurrogate
> 0)
672 // We have to do the fallback for the lonely high surrogate
673 charsForFallback
= chars
;
674 fallbackBuffer
.InternalFallback(highSurrogate
, ref charsForFallback
);
675 chars
= charsForFallback
;
677 highSurrogate
= (char)0;
681 // Fix our encoder if we have one
682 Debug
.Assert(highSurrogate
== 0 || (encoder
!= null && !encoder
.MustFlush
),
683 "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
687 // Remember our left over surrogate (or 0 if flushing)
688 encoder
._charLeftOver
= highSurrogate
;
691 encoder
._charsUsed
= (int)(chars
- charStart
);
694 // return the new length
695 return (int)(bytes
- byteStart
);
698 internal override unsafe int GetCharCount(byte* bytes
, int count
, DecoderNLS
? baseDecoder
)
700 Debug
.Assert(bytes
!= null, "[UTF32Encoding.GetCharCount]bytes!=null");
701 Debug
.Assert(count
>= 0, "[UTF32Encoding.GetCharCount]count >=0");
703 UTF32Decoder
? decoder
= (UTF32Decoder
?)baseDecoder
;
707 byte* end
= bytes
+ count
;
708 byte* byteStart
= bytes
;
714 // For fallback we may need a fallback buffer
715 DecoderFallbackBuffer
? fallbackBuffer
= null;
717 // See if there's anything in our decoder
720 readCount
= decoder
.readByteCount
;
721 iChar
= (uint)decoder
.iChar
;
722 fallbackBuffer
= decoder
.FallbackBuffer
;
724 // Shouldn't have anything in fallback buffer for GetCharCount
725 // (don't have to check _throwOnOverflow for chars or count)
726 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
727 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
731 fallbackBuffer
= this.decoderFallback
.CreateFallbackBuffer();
734 // Set our internal fallback interesting things.
735 fallbackBuffer
.InternalInitialize(byteStart
, null);
737 // Loop through our input, 4 characters at a time!
738 while (bytes
< end
&& charCount
>= 0)
740 // Get our next character
743 // Scoot left and add it to the bottom
749 // Scoot right and add it to the top
751 iChar
+= (uint)(*(bytes
++)) << 24;
756 // See if we have all the bytes yet
763 // See if its valid to encode
764 if (iChar
> 0x10FFFF || (iChar
>= 0xD800 && iChar
<= 0xDFFF))
766 // Need to fall back these 4 bytes
767 byte[] fallbackBytes
;
770 fallbackBytes
= new byte[] {
771 unchecked((byte)(iChar
>>24)), unchecked((byte)(iChar
>>16)),
772 unchecked((byte)(iChar
>>8)), unchecked((byte)(iChar
)) };
776 fallbackBytes
= new byte[] {
777 unchecked((byte)(iChar
)), unchecked((byte)(iChar
>>8)),
778 unchecked((byte)(iChar
>>16)), unchecked((byte)(iChar
>>24)) };
781 charCount
+= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
);
783 // Ignore the illegal character
788 // Ok, we have something we can add to our output
789 if (iChar
>= 0x10000)
795 // Add the rest of the surrogate or our normal character
798 // iChar is back to 0
802 // See if we have something left over that has to be decoded
803 if (readCount
> 0 && (decoder
== null || decoder
.MustFlush
))
805 // Oops, there's something left over with no place to go.
806 byte[] fallbackBytes
= new byte[readCount
];
809 while (readCount
> 0)
811 fallbackBytes
[--readCount
] = unchecked((byte)iChar
);
817 while (readCount
> 0)
819 fallbackBytes
[--readCount
] = unchecked((byte)(iChar
>> 24));
824 charCount
+= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
);
827 // Check for overflows.
829 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_GetByteCountOverflow
);
831 // Shouldn't have anything in fallback buffer for GetCharCount
832 // (don't have to check _throwOnOverflow for chars or count)
833 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
834 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
840 internal override unsafe int GetChars(byte* bytes
, int byteCount
,
841 char* chars
, int charCount
, DecoderNLS
? baseDecoder
)
843 Debug
.Assert(chars
!= null, "[UTF32Encoding.GetChars]chars!=null");
844 Debug
.Assert(bytes
!= null, "[UTF32Encoding.GetChars]bytes!=null");
845 Debug
.Assert(byteCount
>= 0, "[UTF32Encoding.GetChars]byteCount >=0");
846 Debug
.Assert(charCount
>= 0, "[UTF32Encoding.GetChars]charCount >=0");
848 UTF32Decoder
? decoder
= (UTF32Decoder
?)baseDecoder
;
851 char* charStart
= chars
;
852 char* charEnd
= chars
+ charCount
;
854 byte* byteStart
= bytes
;
855 byte* byteEnd
= bytes
+ byteCount
;
857 // See if there's anything in our decoder (but don't clear it yet)
861 // For fallback we may need a fallback buffer
862 DecoderFallbackBuffer
? fallbackBuffer
= null;
863 char* charsForFallback
;
865 // See if there's anything in our decoder
868 readCount
= decoder
.readByteCount
;
869 iChar
= (uint)decoder
.iChar
;
870 Debug
.Assert(baseDecoder
!= null);
871 fallbackBuffer
= baseDecoder
.FallbackBuffer
;
873 // Shouldn't have anything in fallback buffer for GetChars
874 // (don't have to check _throwOnOverflow for chars)
875 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
876 "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
880 fallbackBuffer
= this.decoderFallback
.CreateFallbackBuffer();
883 // Set our internal fallback interesting things.
884 fallbackBuffer
.InternalInitialize(bytes
, chars
+ charCount
);
886 // Loop through our input, 4 characters at a time!
887 while (bytes
< byteEnd
)
889 // Get our next character
892 // Scoot left and add it to the bottom
898 // Scoot right and add it to the top
900 iChar
+= (uint)(*(bytes
++)) << 24;
905 // See if we have all the bytes yet
912 // See if its valid to encode
913 if (iChar
> 0x10FFFF || (iChar
>= 0xD800 && iChar
<= 0xDFFF))
915 // Need to fall back these 4 bytes
916 byte[] fallbackBytes
;
919 fallbackBytes
= new byte[] {
920 unchecked((byte)(iChar
>>24)), unchecked((byte)(iChar
>>16)),
921 unchecked((byte)(iChar
>>8)), unchecked((byte)(iChar
)) };
925 fallbackBytes
= new byte[] {
926 unchecked((byte)(iChar
)), unchecked((byte)(iChar
>>8)),
927 unchecked((byte)(iChar
>>16)), unchecked((byte)(iChar
>>24)) };
930 // Chars won't be updated unless this works.
931 charsForFallback
= chars
;
932 bool fallbackResult
= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
, ref charsForFallback
);
933 chars
= charsForFallback
;
937 // Couldn't fallback, throw or wait til next time
938 // We either read enough bytes for bytes-=4 to work, or we're
939 // going to throw in ThrowCharsOverflow because chars == charStart
940 Debug
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
941 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
942 bytes
-= 4; // get back to where we were
943 iChar
= 0; // Remembering nothing
944 fallbackBuffer
.InternalReset();
945 ThrowCharsOverflow(decoder
, chars
== charStart
);// Might throw, if no chars output
946 break; // Stop here, didn't throw
949 // Ignore the illegal character
955 // Ok, we have something we can add to our output
956 if (iChar
>= 0x10000)
959 if (chars
>= charEnd
- 1)
961 // Throwing or stopping
962 // We either read enough bytes for bytes-=4 to work, or we're
963 // going to throw in ThrowCharsOverflow because chars == charStart
964 Debug
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
965 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
966 bytes
-= 4; // get back to where we were
967 iChar
= 0; // Remembering nothing
968 ThrowCharsOverflow(decoder
, chars
== charStart
);// Might throw, if no chars output
969 break; // Stop here, didn't throw
972 *(chars
++) = GetHighSurrogate(iChar
);
973 iChar
= GetLowSurrogate(iChar
);
975 // Bounds check for normal character
976 else if (chars
>= charEnd
)
978 // Throwing or stopping
979 // We either read enough bytes for bytes-=4 to work, or we're
980 // going to throw in ThrowCharsOverflow because chars == charStart
981 Debug
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
982 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
983 bytes
-= 4; // get back to where we were
984 iChar
= 0; // Remembering nothing
985 ThrowCharsOverflow(decoder
, chars
== charStart
);// Might throw, if no chars output
986 break; // Stop here, didn't throw
989 // Add the rest of the surrogate or our normal character
990 *(chars
++) = (char)iChar
;
992 // iChar is back to 0
996 // See if we have something left over that has to be decoded
997 if (readCount
> 0 && (decoder
== null || decoder
.MustFlush
))
999 // Oops, there's something left over with no place to go.
1000 byte[] fallbackBytes
= new byte[readCount
];
1001 int tempCount
= readCount
;
1004 while (tempCount
> 0)
1006 fallbackBytes
[--tempCount
] = unchecked((byte)iChar
);
1012 while (tempCount
> 0)
1014 fallbackBytes
[--tempCount
] = unchecked((byte)(iChar
>> 24));
1019 charsForFallback
= chars
;
1020 bool fallbackResult
= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
, ref charsForFallback
);
1021 chars
= charsForFallback
;
1023 if (!fallbackResult
)
1025 // Couldn't fallback.
1026 fallbackBuffer
.InternalReset();
1027 ThrowCharsOverflow(decoder
, chars
== charStart
);// Might throw, if no chars output
1028 // Stop here, didn't throw, backed up, so still nothing in buffer
1032 // Don't clear our decoder unless we could fall it back.
1033 // If we caught the if above, then we're a convert() and will catch this next time.
1039 // Remember any left over stuff, clearing buffer as well for MustFlush
1040 if (decoder
!= null)
1042 decoder
.iChar
= (int)iChar
;
1043 decoder
.readByteCount
= readCount
;
1044 decoder
._bytesUsed
= (int)(bytes
- byteStart
);
1047 // Shouldn't have anything in fallback buffer for GetChars
1048 // (don't have to check _throwOnOverflow for chars)
1049 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
1050 "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
1053 return (int)(chars
- charStart
);
1057 private uint GetSurrogate(char cHigh
, char cLow
)
1059 return (((uint)cHigh
- 0xD800) * 0x400) + ((uint)cLow
- 0xDC00) + 0x10000;
1062 private char GetHighSurrogate(uint iChar
)
1064 return (char)((iChar
- 0x10000) / 0x400 + 0xD800);
1067 private char GetLowSurrogate(uint iChar
)
1069 return (char)((iChar
- 0x10000) % 0x400 + 0xDC00);
1073 public override Decoder
GetDecoder()
1075 return new UTF32Decoder(this);
1079 public override Encoder
GetEncoder()
1081 return new EncoderNLS(this);
1085 public override int GetMaxByteCount(int charCount
)
1088 throw new ArgumentOutOfRangeException(nameof(charCount
),
1089 SR
.ArgumentOutOfRange_NeedNonNegNum
);
1091 // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1092 long byteCount
= (long)charCount
+ 1;
1094 if (EncoderFallback
.MaxCharCount
> 1)
1095 byteCount
*= EncoderFallback
.MaxCharCount
;
1100 if (byteCount
> 0x7fffffff)
1101 throw new ArgumentOutOfRangeException(nameof(charCount
), SR
.ArgumentOutOfRange_GetByteCountOverflow
);
1103 return (int)byteCount
;
1107 public override int GetMaxCharCount(int byteCount
)
1110 throw new ArgumentOutOfRangeException(nameof(byteCount
),
1111 SR
.ArgumentOutOfRange_NeedNonNegNum
);
1113 // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
1114 // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
1115 // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
1116 int charCount
= (byteCount
/ 2) + 2;
1118 // Also consider fallback because our input bytes could be out of range of unicode.
1119 // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
1120 if (DecoderFallback
.MaxCharCount
> 2)
1122 // Multiply time fallback size
1123 charCount
*= DecoderFallback
.MaxCharCount
;
1125 // We were already figuring 2 chars per 4 bytes, but fallback will be different #
1129 if (charCount
> 0x7fffffff)
1130 throw new ArgumentOutOfRangeException(nameof(byteCount
), SR
.ArgumentOutOfRange_GetCharCountOverflow
);
1132 return (int)charCount
;
1136 public override byte[] GetPreamble()
1138 if (_emitUTF32ByteOrderMark
)
1140 // Allocate new array to prevent users from modifying it.
1143 return new byte[4] { 0x00, 0x00, 0xFE, 0xFF }
;
1147 return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }
; // 00 00 FE FF
1151 return Array
.Empty
<byte>();
1154 public override ReadOnlySpan
<byte> Preamble
=>
1155 GetType() != typeof(UTF32Encoding
) ? new ReadOnlySpan
<byte>(GetPreamble()) : // in case a derived UTF32Encoding overrode GetPreamble
1156 !_emitUTF32ByteOrderMark
? default :
1157 _bigEndian
? (ReadOnlySpan
<byte>)new byte[4] { 0x00, 0x00, 0xFE, 0xFF }
: // uses C# compiler's optimization for static byte[] data
1158 (ReadOnlySpan
<byte>)new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }
;
1160 public override bool Equals(object? value)
1162 if (value is UTF32Encoding that
)
1164 return (_emitUTF32ByteOrderMark
== that
._emitUTF32ByteOrderMark
) &&
1165 (_bigEndian
== that
._bigEndian
) &&
1166 (EncoderFallback
.Equals(that
.EncoderFallback
)) &&
1167 (DecoderFallback
.Equals(that
.DecoderFallback
));
1174 public override int GetHashCode()
1176 //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
1177 return this.EncoderFallback
.GetHashCode() + this.DecoderFallback
.GetHashCode() +
1178 CodePage
+ (_emitUTF32ByteOrderMark
? 4 : 0) + (_bigEndian
? 8 : 0);
1181 private sealed class UTF32Decoder
: DecoderNLS
1183 // Need a place to store any extra bytes we may have picked up
1184 internal int iChar
= 0;
1185 internal int readByteCount
= 0;
1187 public UTF32Decoder(UTF32Encoding encoding
) : base(encoding
)
1192 public override void Reset()
1195 this.readByteCount
= 0;
1196 if (_fallbackBuffer
!= null)
1197 _fallbackBuffer
.Reset();
1200 // Anything left in our decoder?
1201 internal override bool HasState
=>
1202 // ReadByteCount is our flag. (iChar==0 doesn't mean much).
1203 (this.readByteCount
!= 0);