1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
9 using System
.Diagnostics
;
10 using System
.Runtime
.InteropServices
;
14 // Encodes text into and out of UTF-32. UTF-32 is a way of writing
15 // Unicode characters with a single storage unit (32 bits) per character,
17 // The UTF-32 byte order mark is simply the Unicode byte order mark
18 // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
19 // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
20 // switch the byte orderings.
22 public sealed class UTF32Encoding
: Encoding
25 words bits UTF-32 representation
26 ----- ---- -----------------------------------
27 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
28 2 21 00000000 000xxxxx hhhhhhll llllllll
29 ----- ---- -----------------------------------
32 Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
35 // Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
36 // The initialization code will not be run until a static member of the class is referenced
37 internal static readonly UTF32Encoding s_default
= new UTF32Encoding(bigEndian
: false, byteOrderMark
: true);
38 internal static readonly UTF32Encoding s_bigEndianDefault
= new UTF32Encoding(bigEndian
: true, byteOrderMark
: true);
40 private readonly bool _emitUTF32ByteOrderMark
= false;
41 private readonly bool _isThrowException
= false;
42 private readonly bool _bigEndian
= false;
44 public UTF32Encoding() : this(false, true)
48 public UTF32Encoding(bool bigEndian
, bool byteOrderMark
) :
49 base(bigEndian
? 12001 : 12000)
51 _bigEndian
= bigEndian
;
52 _emitUTF32ByteOrderMark
= byteOrderMark
;
55 public UTF32Encoding(bool bigEndian
, bool byteOrderMark
, bool throwOnInvalidCharacters
) :
56 this(bigEndian
, byteOrderMark
)
58 _isThrowException
= throwOnInvalidCharacters
;
60 // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
61 if (_isThrowException
)
62 SetDefaultFallbacks();
65 internal override void SetDefaultFallbacks()
67 // For UTF-X encodings, we use a replacement fallback with an empty string
68 if (_isThrowException
)
70 this.encoderFallback
= EncoderFallback
.ExceptionFallback
;
71 this.decoderFallback
= DecoderFallback
.ExceptionFallback
;
75 this.encoderFallback
= new EncoderReplacementFallback("\xFFFD");
76 this.decoderFallback
= new DecoderReplacementFallback("\xFFFD");
80 // The following methods are copied from EncodingNLS.cs.
81 // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
82 // These should be kept in sync for the following classes:
83 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
85 // Returns the number of bytes required to encode a range of characters in
88 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
89 // So if you fix this, fix the others. Currently those include:
90 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
91 // parent method is safe
93 public override unsafe int GetByteCount(char[] chars
, int index
, int count
)
95 // Validate input parameters
97 throw new ArgumentNullException(nameof(chars
), SR
.ArgumentNull_Array
);
99 if (index
< 0 || count
< 0)
100 throw new ArgumentOutOfRangeException(index
< 0 ? nameof(index
) : nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
102 if (chars
.Length
- index
< count
)
103 throw new ArgumentOutOfRangeException(nameof(chars
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
105 // If no input, return 0, avoid fixed empty array problem
109 // Just call the pointer version
110 fixed (char* pChars
= chars
)
111 return GetByteCount(pChars
+ index
, count
, null);
114 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
115 // So if you fix this, fix the others. Currently those include:
116 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
117 // parent method is safe
119 public override unsafe int GetByteCount(string s
)
123 throw new ArgumentNullException(nameof(s
));
125 fixed (char* pChars
= s
)
126 return GetByteCount(pChars
, s
.Length
, null);
129 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
130 // So if you fix this, fix the others. Currently those include:
131 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
133 [CLSCompliant(false)]
134 public override unsafe int GetByteCount(char* chars
, int count
)
136 // Validate Parameters
138 throw new ArgumentNullException(nameof(chars
), SR
.ArgumentNull_Array
);
141 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
143 // Call it with empty encoder
144 return GetByteCount(chars
, count
, null);
147 // Parent method is safe.
148 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
149 // So if you fix this, fix the others. Currently those include:
150 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
152 public override unsafe int GetBytes(string s
, int charIndex
, int charCount
,
153 byte[] bytes
, int byteIndex
)
155 if (s
== null || bytes
== null)
156 throw new ArgumentNullException(s
== null ? nameof(s
) : nameof(bytes
), SR
.ArgumentNull_Array
);
158 if (charIndex
< 0 || charCount
< 0)
159 throw new ArgumentOutOfRangeException(charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
161 if (s
.Length
- charIndex
< charCount
)
162 throw new ArgumentOutOfRangeException(nameof(s
), SR
.ArgumentOutOfRange_IndexCount
);
164 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
165 throw new ArgumentOutOfRangeException(nameof(byteIndex
), SR
.ArgumentOutOfRange_Index
);
167 int byteCount
= bytes
.Length
- byteIndex
;
169 fixed (char* pChars
= s
) fixed (byte* pBytes
= &MemoryMarshal
.GetReference((Span
<byte>)bytes
))
170 return GetBytes(pChars
+ charIndex
, charCount
, pBytes
+ byteIndex
, byteCount
, null);
173 // Encodes a range of characters in a character array into a range of bytes
174 // in a byte array. An exception occurs if the byte array is not large
175 // enough to hold the complete encoding of the characters. The
176 // GetByteCount method can be used to determine the exact number of
177 // bytes that will be produced for a given range of characters.
178 // Alternatively, the GetMaxByteCount method can be used to
179 // determine the maximum number of bytes that will be produced for a given
180 // number of characters, regardless of the actual character values.
182 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
183 // So if you fix this, fix the others. Currently those include:
184 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
185 // parent method is safe
187 public override unsafe int GetBytes(char[] chars
, int charIndex
, int charCount
,
188 byte[] bytes
, int byteIndex
)
190 // Validate parameters
191 if (chars
== null || bytes
== null)
192 throw new ArgumentNullException(chars
== null ? nameof(chars
) : nameof(bytes
), SR
.ArgumentNull_Array
);
194 if (charIndex
< 0 || charCount
< 0)
195 throw new ArgumentOutOfRangeException(charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
197 if (chars
.Length
- charIndex
< charCount
)
198 throw new ArgumentOutOfRangeException(nameof(chars
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
200 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
201 throw new ArgumentOutOfRangeException(nameof(byteIndex
), SR
.ArgumentOutOfRange_Index
);
203 // If nothing to encode return 0, avoid fixed problem
207 // Just call pointer version
208 int byteCount
= bytes
.Length
- byteIndex
;
210 fixed (char* pChars
= chars
) fixed (byte* pBytes
= &MemoryMarshal
.GetReference((Span
<byte>)bytes
))
211 // Remember that byteCount is # to decode, not size of array.
212 return GetBytes(pChars
+ charIndex
, charCount
, pBytes
+ byteIndex
, byteCount
, null);
215 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
216 // So if you fix this, fix the others. Currently those include:
217 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
219 [CLSCompliant(false)]
220 public override unsafe int GetBytes(char* chars
, int charCount
, byte* bytes
, int byteCount
)
222 // Validate Parameters
223 if (bytes
== null || chars
== null)
224 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
226 if (charCount
< 0 || byteCount
< 0)
227 throw new ArgumentOutOfRangeException(charCount
< 0 ? nameof(charCount
) : nameof(byteCount
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
229 return GetBytes(chars
, charCount
, bytes
, byteCount
, null);
232 // Returns the number of characters produced by decoding a range of bytes
235 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
236 // So if you fix this, fix the others. Currently those include:
237 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
238 // parent method is safe
240 public override unsafe int GetCharCount(byte[] bytes
, int index
, int count
)
242 // Validate Parameters
244 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
246 if (index
< 0 || count
< 0)
247 throw new ArgumentOutOfRangeException(index
< 0 ? nameof(index
) : nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
249 if (bytes
.Length
- index
< count
)
250 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
252 // If no input just return 0, fixed doesn't like 0 length arrays.
256 // Just call pointer version
257 fixed (byte* pBytes
= bytes
)
258 return GetCharCount(pBytes
+ index
, count
, null);
261 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
262 // So if you fix this, fix the others. Currently those include:
263 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
265 [CLSCompliant(false)]
266 public override unsafe int GetCharCount(byte* bytes
, int count
)
268 // Validate Parameters
270 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
273 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
275 return GetCharCount(bytes
, count
, null);
278 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
279 // So if you fix this, fix the others. Currently those include:
280 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
281 // parent method is safe
283 public override unsafe int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
284 char[] chars
, int charIndex
)
286 // Validate Parameters
287 if (bytes
== null || chars
== null)
288 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
290 if (byteIndex
< 0 || byteCount
< 0)
291 throw new ArgumentOutOfRangeException(byteIndex
< 0 ? nameof(byteIndex
) : nameof(byteCount
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
293 if (bytes
.Length
- byteIndex
< byteCount
)
294 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
296 if (charIndex
< 0 || charIndex
> chars
.Length
)
297 throw new ArgumentOutOfRangeException(nameof(charIndex
), SR
.ArgumentOutOfRange_Index
);
299 // If no input, return 0 & avoid fixed problem
303 // Just call pointer version
304 int charCount
= chars
.Length
- charIndex
;
306 fixed (byte* pBytes
= bytes
) fixed (char* pChars
= &MemoryMarshal
.GetReference((Span
<char>)chars
))
307 // Remember that charCount is # to decode, not size of array
308 return GetChars(pBytes
+ byteIndex
, byteCount
, pChars
+ charIndex
, charCount
, null);
311 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
312 // So if you fix this, fix the others. Currently those include:
313 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
315 [CLSCompliant(false)]
316 public override unsafe int GetChars(byte* bytes
, int byteCount
, char* chars
, int charCount
)
318 // Validate Parameters
319 if (bytes
== null || chars
== null)
320 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
322 if (charCount
< 0 || byteCount
< 0)
323 throw new ArgumentOutOfRangeException(charCount
< 0 ? nameof(charCount
) : nameof(byteCount
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
325 return GetChars(bytes
, byteCount
, chars
, charCount
, null);
328 // Returns a string containing the decoded representation of a range of
329 // bytes in a byte array.
331 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
332 // So if you fix this, fix the others. Currently those include:
333 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
334 // parent method is safe
336 public override unsafe string GetString(byte[] bytes
, int index
, int count
)
338 // Validate Parameters
340 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
342 if (index
< 0 || count
< 0)
343 throw new ArgumentOutOfRangeException(index
< 0 ? nameof(index
) : nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
345 if (bytes
.Length
- index
< count
)
346 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
348 // Avoid problems with empty input buffer
349 if (count
== 0) return string.Empty
;
351 fixed (byte* pBytes
= bytes
)
352 return string.CreateStringFromEncoding(
353 pBytes
+ index
, count
, this);
357 // End of standard methods copied from EncodingNLS.cs
359 internal override unsafe int GetByteCount(char* chars
, int count
, EncoderNLS
? encoder
)
361 Debug
.Assert(chars
!= null, "[UTF32Encoding.GetByteCount]chars!=null");
362 Debug
.Assert(count
>= 0, "[UTF32Encoding.GetByteCount]count >=0");
364 char* end
= chars
+ count
;
365 char* charStart
= chars
;
368 char highSurrogate
= '\0';
370 // For fallback we may need a fallback buffer
371 EncoderFallbackBuffer
? fallbackBuffer
= null;
372 char* charsForFallback
;
376 highSurrogate
= encoder
._charLeftOver
;
377 fallbackBuffer
= encoder
.FallbackBuffer
;
379 // We mustn't have left over fallback data when counting
380 if (fallbackBuffer
.Remaining
> 0)
381 throw new ArgumentException(SR
.Format(SR
.Argument_EncoderFallbackNotEmpty
, this.EncodingName
, encoder
.Fallback
?.GetType().ToString() ?? string.Empty
));
385 fallbackBuffer
= this.encoderFallback
.CreateFallbackBuffer();
388 // Set our internal fallback interesting things.
389 fallbackBuffer
.InternalInitialize(charStart
, end
, encoder
, false);
394 while (((ch
= fallbackBuffer
.InternalGetNextChar()) != 0) || chars
< end
)
396 // First unwind any fallback
399 // No fallback, just get next char
404 // Do we need a low surrogate?
405 if (highSurrogate
!= '\0')
408 // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
410 if (char.IsLowSurrogate(ch
))
413 highSurrogate
= '\0';
416 // One surrogate pair will be translated into 4 bytes UTF32.
423 // We are missing our low surrogate, decrement chars and fallback the high surrogate
424 // The high surrogate may have come from the encoder, but nothing else did.
425 Debug
.Assert(chars
> charStart
,
426 "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
430 charsForFallback
= chars
;
431 fallbackBuffer
.InternalFallback(highSurrogate
, ref charsForFallback
);
432 chars
= charsForFallback
;
434 // We're going to fallback the old high surrogate.
435 highSurrogate
= '\0';
439 // Do we have another high surrogate?
440 if (char.IsHighSurrogate(ch
))
443 // We'll have a high surrogate to check next time.
449 // Check for illegal characters
450 if (char.IsLowSurrogate(ch
))
452 // We have a leading low surrogate, do the fallback
453 charsForFallback
= chars
;
454 fallbackBuffer
.InternalFallback(ch
, ref charsForFallback
);
455 chars
= charsForFallback
;
457 // Try again with fallback buffer
461 // We get to add the character (4 bytes UTF32)
465 // May have to do our last surrogate
466 if ((encoder
== null || encoder
.MustFlush
) && highSurrogate
> 0)
468 // We have to do the fallback for the lonely high surrogate
469 charsForFallback
= chars
;
470 fallbackBuffer
.InternalFallback(highSurrogate
, ref charsForFallback
);
471 chars
= charsForFallback
;
473 highSurrogate
= (char)0;
477 // Check for overflows.
479 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_GetByteCountOverflow
);
481 // Shouldn't have anything in fallback buffer for GetByteCount
482 // (don't have to check _throwOnOverflow for count)
483 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
484 "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
490 internal override unsafe int GetBytes(char* chars
, int charCount
,
491 byte* bytes
, int byteCount
, EncoderNLS
? encoder
)
493 Debug
.Assert(chars
!= null, "[UTF32Encoding.GetBytes]chars!=null");
494 Debug
.Assert(bytes
!= null, "[UTF32Encoding.GetBytes]bytes!=null");
495 Debug
.Assert(byteCount
>= 0, "[UTF32Encoding.GetBytes]byteCount >=0");
496 Debug
.Assert(charCount
>= 0, "[UTF32Encoding.GetBytes]charCount >=0");
498 char* charStart
= chars
;
499 char* charEnd
= chars
+ charCount
;
500 byte* byteStart
= bytes
;
501 byte* byteEnd
= bytes
+ byteCount
;
503 char highSurrogate
= '\0';
505 // For fallback we may need a fallback buffer
506 EncoderFallbackBuffer
? fallbackBuffer
= null;
507 char* charsForFallback
;
511 highSurrogate
= encoder
._charLeftOver
;
512 fallbackBuffer
= encoder
.FallbackBuffer
;
514 // We mustn't have left over fallback data when not converting
515 if (encoder
._throwOnOverflow
&& fallbackBuffer
.Remaining
> 0)
516 throw new ArgumentException(SR
.Format(SR
.Argument_EncoderFallbackNotEmpty
, this.EncodingName
, encoder
.Fallback
?.GetType()));
520 fallbackBuffer
= this.encoderFallback
.CreateFallbackBuffer();
523 // Set our internal fallback interesting things.
524 fallbackBuffer
.InternalInitialize(charStart
, charEnd
, encoder
, true);
529 while (((ch
= fallbackBuffer
.InternalGetNextChar()) != 0) || chars
< charEnd
)
531 // First unwind any fallback
534 // No fallback, just get next char
539 // Do we need a low surrogate?
540 if (highSurrogate
!= '\0')
543 // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
545 if (char.IsLowSurrogate(ch
))
547 // Is it a legal one?
548 uint iTemp
= GetSurrogate(highSurrogate
, ch
);
549 highSurrogate
= '\0';
552 // One surrogate pair will be translated into 4 bytes UTF32.
554 if (bytes
+ 3 >= byteEnd
)
556 // Don't have 4 bytes
557 if (fallbackBuffer
.bFallingBack
)
559 fallbackBuffer
.MovePrevious(); // Aren't using these 2 fallback chars
560 fallbackBuffer
.MovePrevious();
564 // If we don't have enough room, then either we should've advanced a while
565 // or we should have bytes==byteStart and throw below
566 Debug
.Assert(chars
> charStart
+ 1 || bytes
== byteStart
,
567 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
568 chars
-= 2; // Aren't using those 2 chars
570 ThrowBytesOverflow(encoder
, bytes
== byteStart
); // Throw maybe (if no bytes written)
571 highSurrogate
= (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
577 *(bytes
++) = (byte)(0x00);
578 *(bytes
++) = (byte)(iTemp
>> 16); // Implies & 0xFF, which isn't needed cause high are all 0
579 *(bytes
++) = (byte)(iTemp
>> 8); // Implies & 0xFF
580 *(bytes
++) = (byte)(iTemp
); // Implies & 0xFF
584 *(bytes
++) = (byte)(iTemp
); // Implies & 0xFF
585 *(bytes
++) = (byte)(iTemp
>> 8); // Implies & 0xFF
586 *(bytes
++) = (byte)(iTemp
>> 16); // Implies & 0xFF, which isn't needed cause high are all 0
587 *(bytes
++) = (byte)(0x00);
592 // We are missing our low surrogate, decrement chars and fallback the high surrogate
593 // The high surrogate may have come from the encoder, but nothing else did.
594 Debug
.Assert(chars
> charStart
,
595 "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
599 charsForFallback
= chars
;
600 fallbackBuffer
.InternalFallback(highSurrogate
, ref charsForFallback
);
601 chars
= charsForFallback
;
603 // We're going to fallback the old high surrogate.
604 highSurrogate
= '\0';
608 // Do we have another high surrogate?, if so remember it
609 if (char.IsHighSurrogate(ch
))
612 // We'll have a high surrogate to check next time.
618 // Check for illegal characters (low surrogate)
619 if (char.IsLowSurrogate(ch
))
621 // We have a leading low surrogate, do the fallback
622 charsForFallback
= chars
;
623 fallbackBuffer
.InternalFallback(ch
, ref charsForFallback
);
624 chars
= charsForFallback
;
626 // Try again with fallback buffer
630 // We get to add the character, yippee.
631 if (bytes
+ 3 >= byteEnd
)
633 // Don't have 4 bytes
634 if (fallbackBuffer
.bFallingBack
)
635 fallbackBuffer
.MovePrevious(); // Aren't using this fallback char
638 // Must've advanced already
639 Debug
.Assert(chars
> charStart
,
640 "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
641 chars
--; // Aren't using this char
643 ThrowBytesOverflow(encoder
, bytes
== byteStart
); // Throw maybe (if no bytes written)
644 break; // Didn't throw, stop
649 *(bytes
++) = (byte)(0x00);
650 *(bytes
++) = (byte)(0x00);
651 *(bytes
++) = (byte)((uint)ch
>> 8); // Implies & 0xFF
652 *(bytes
++) = (byte)(ch
); // Implies & 0xFF
656 *(bytes
++) = (byte)(ch
); // Implies & 0xFF
657 *(bytes
++) = (byte)((uint)ch
>> 8); // Implies & 0xFF
658 *(bytes
++) = (byte)(0x00);
659 *(bytes
++) = (byte)(0x00);
663 // May have to do our last surrogate
664 if ((encoder
== null || encoder
.MustFlush
) && highSurrogate
> 0)
666 // We have to do the fallback for the lonely high surrogate
667 charsForFallback
= chars
;
668 fallbackBuffer
.InternalFallback(highSurrogate
, ref charsForFallback
);
669 chars
= charsForFallback
;
671 highSurrogate
= (char)0;
675 // Fix our encoder if we have one
676 Debug
.Assert(highSurrogate
== 0 || (encoder
!= null && !encoder
.MustFlush
),
677 "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
681 // Remember our left over surrogate (or 0 if flushing)
682 encoder
._charLeftOver
= highSurrogate
;
685 encoder
._charsUsed
= (int)(chars
- charStart
);
688 // return the new length
689 return (int)(bytes
- byteStart
);
692 internal override unsafe int GetCharCount(byte* bytes
, int count
, DecoderNLS
? baseDecoder
)
694 Debug
.Assert(bytes
!= null, "[UTF32Encoding.GetCharCount]bytes!=null");
695 Debug
.Assert(count
>= 0, "[UTF32Encoding.GetCharCount]count >=0");
697 UTF32Decoder
? decoder
= (UTF32Decoder
?)baseDecoder
;
701 byte* end
= bytes
+ count
;
702 byte* byteStart
= bytes
;
708 // For fallback we may need a fallback buffer
709 DecoderFallbackBuffer
? fallbackBuffer
= null;
711 // See if there's anything in our decoder
714 readCount
= decoder
.readByteCount
;
715 iChar
= (uint)decoder
.iChar
;
716 fallbackBuffer
= decoder
.FallbackBuffer
;
718 // Shouldn't have anything in fallback buffer for GetCharCount
719 // (don't have to check _throwOnOverflow for chars or count)
720 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
721 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
725 fallbackBuffer
= this.decoderFallback
.CreateFallbackBuffer();
728 // Set our internal fallback interesting things.
729 fallbackBuffer
.InternalInitialize(byteStart
, null);
731 // Loop through our input, 4 characters at a time!
732 while (bytes
< end
&& charCount
>= 0)
734 // Get our next character
737 // Scoot left and add it to the bottom
743 // Scoot right and add it to the top
745 iChar
+= (uint)(*(bytes
++)) << 24;
750 // See if we have all the bytes yet
757 // See if its valid to encode
758 if (iChar
> 0x10FFFF || (iChar
>= 0xD800 && iChar
<= 0xDFFF))
760 // Need to fall back these 4 bytes
761 byte[] fallbackBytes
;
764 fallbackBytes
= new byte[] {
765 unchecked((byte)(iChar
>> 24)), unchecked((byte)(iChar
>> 16)),
766 unchecked((byte)(iChar
>> 8)), unchecked((byte)(iChar
)) };
770 fallbackBytes
= new byte[] {
771 unchecked((byte)(iChar
)), unchecked((byte)(iChar
>> 8)),
772 unchecked((byte)(iChar
>> 16)), unchecked((byte)(iChar
>> 24)) };
775 charCount
+= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
);
777 // Ignore the illegal character
782 // Ok, we have something we can add to our output
783 if (iChar
>= 0x10000)
789 // Add the rest of the surrogate or our normal character
792 // iChar is back to 0
796 // See if we have something left over that has to be decoded
797 if (readCount
> 0 && (decoder
== null || decoder
.MustFlush
))
799 // Oops, there's something left over with no place to go.
800 byte[] fallbackBytes
= new byte[readCount
];
803 while (readCount
> 0)
805 fallbackBytes
[--readCount
] = unchecked((byte)iChar
);
811 while (readCount
> 0)
813 fallbackBytes
[--readCount
] = unchecked((byte)(iChar
>> 24));
818 charCount
+= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
);
821 // Check for overflows.
823 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_GetByteCountOverflow
);
825 // Shouldn't have anything in fallback buffer for GetCharCount
826 // (don't have to check _throwOnOverflow for chars or count)
827 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
828 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
834 internal override unsafe int GetChars(byte* bytes
, int byteCount
,
835 char* chars
, int charCount
, DecoderNLS
? baseDecoder
)
837 Debug
.Assert(chars
!= null, "[UTF32Encoding.GetChars]chars!=null");
838 Debug
.Assert(bytes
!= null, "[UTF32Encoding.GetChars]bytes!=null");
839 Debug
.Assert(byteCount
>= 0, "[UTF32Encoding.GetChars]byteCount >=0");
840 Debug
.Assert(charCount
>= 0, "[UTF32Encoding.GetChars]charCount >=0");
842 UTF32Decoder
? decoder
= (UTF32Decoder
?)baseDecoder
;
845 char* charStart
= chars
;
846 char* charEnd
= chars
+ charCount
;
848 byte* byteStart
= bytes
;
849 byte* byteEnd
= bytes
+ byteCount
;
851 // See if there's anything in our decoder (but don't clear it yet)
855 // For fallback we may need a fallback buffer
856 DecoderFallbackBuffer
? fallbackBuffer
= null;
857 char* charsForFallback
;
859 // See if there's anything in our decoder
862 readCount
= decoder
.readByteCount
;
863 iChar
= (uint)decoder
.iChar
;
864 Debug
.Assert(baseDecoder
!= null);
865 fallbackBuffer
= baseDecoder
.FallbackBuffer
;
867 // Shouldn't have anything in fallback buffer for GetChars
868 // (don't have to check _throwOnOverflow for chars)
869 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
870 "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
874 fallbackBuffer
= this.decoderFallback
.CreateFallbackBuffer();
877 // Set our internal fallback interesting things.
878 fallbackBuffer
.InternalInitialize(bytes
, chars
+ charCount
);
880 // Loop through our input, 4 characters at a time!
881 while (bytes
< byteEnd
)
883 // Get our next character
886 // Scoot left and add it to the bottom
892 // Scoot right and add it to the top
894 iChar
+= (uint)(*(bytes
++)) << 24;
899 // See if we have all the bytes yet
906 // See if its valid to encode
907 if (iChar
> 0x10FFFF || (iChar
>= 0xD800 && iChar
<= 0xDFFF))
909 // Need to fall back these 4 bytes
910 byte[] fallbackBytes
;
913 fallbackBytes
= new byte[] {
914 unchecked((byte)(iChar
>> 24)), unchecked((byte)(iChar
>> 16)),
915 unchecked((byte)(iChar
>> 8)), unchecked((byte)(iChar
)) };
919 fallbackBytes
= new byte[] {
920 unchecked((byte)(iChar
)), unchecked((byte)(iChar
>> 8)),
921 unchecked((byte)(iChar
>> 16)), unchecked((byte)(iChar
>> 24)) };
924 // Chars won't be updated unless this works.
925 charsForFallback
= chars
;
926 bool fallbackResult
= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
, ref charsForFallback
);
927 chars
= charsForFallback
;
931 // Couldn't fallback, throw or wait til next time
932 // We either read enough bytes for bytes-=4 to work, or we're
933 // going to throw in ThrowCharsOverflow because chars == charStart
934 Debug
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
935 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
936 bytes
-= 4; // get back to where we were
937 iChar
= 0; // Remembering nothing
938 fallbackBuffer
.InternalReset();
939 ThrowCharsOverflow(decoder
, chars
== charStart
); // Might throw, if no chars output
940 break; // Stop here, didn't throw
943 // Ignore the illegal character
948 // Ok, we have something we can add to our output
949 if (iChar
>= 0x10000)
952 if (chars
>= charEnd
- 1)
954 // Throwing or stopping
955 // We either read enough bytes for bytes-=4 to work, or we're
956 // going to throw in ThrowCharsOverflow because chars == charStart
957 Debug
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
958 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
959 bytes
-= 4; // get back to where we were
960 iChar
= 0; // Remembering nothing
961 ThrowCharsOverflow(decoder
, chars
== charStart
); // Might throw, if no chars output
962 break; // Stop here, didn't throw
965 *(chars
++) = GetHighSurrogate(iChar
);
966 iChar
= GetLowSurrogate(iChar
);
968 // Bounds check for normal character
969 else if (chars
>= charEnd
)
971 // Throwing or stopping
972 // We either read enough bytes for bytes-=4 to work, or we're
973 // going to throw in ThrowCharsOverflow because chars == charStart
974 Debug
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
975 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
976 bytes
-= 4; // get back to where we were
977 iChar
= 0; // Remembering nothing
978 ThrowCharsOverflow(decoder
, chars
== charStart
); // Might throw, if no chars output
979 break; // Stop here, didn't throw
982 // Add the rest of the surrogate or our normal character
983 *(chars
++) = (char)iChar
;
985 // iChar is back to 0
989 // See if we have something left over that has to be decoded
990 if (readCount
> 0 && (decoder
== null || decoder
.MustFlush
))
992 // Oops, there's something left over with no place to go.
993 byte[] fallbackBytes
= new byte[readCount
];
994 int tempCount
= readCount
;
997 while (tempCount
> 0)
999 fallbackBytes
[--tempCount
] = unchecked((byte)iChar
);
1005 while (tempCount
> 0)
1007 fallbackBytes
[--tempCount
] = unchecked((byte)(iChar
>> 24));
1012 charsForFallback
= chars
;
1013 bool fallbackResult
= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
, ref charsForFallback
);
1014 chars
= charsForFallback
;
1016 if (!fallbackResult
)
1018 // Couldn't fallback.
1019 fallbackBuffer
.InternalReset();
1020 ThrowCharsOverflow(decoder
, chars
== charStart
); // Might throw, if no chars output
1021 // Stop here, didn't throw, backed up, so still nothing in buffer
1025 // Don't clear our decoder unless we could fall it back.
1026 // If we caught the if above, then we're a convert() and will catch this next time.
1032 // Remember any left over stuff, clearing buffer as well for MustFlush
1033 if (decoder
!= null)
1035 decoder
.iChar
= (int)iChar
;
1036 decoder
.readByteCount
= readCount
;
1037 decoder
._bytesUsed
= (int)(bytes
- byteStart
);
1040 // Shouldn't have anything in fallback buffer for GetChars
1041 // (don't have to check _throwOnOverflow for chars)
1042 Debug
.Assert(fallbackBuffer
.Remaining
== 0,
1043 "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
1046 return (int)(chars
- charStart
);
1049 private uint GetSurrogate(char cHigh
, char cLow
)
1051 return (((uint)cHigh
- 0xD800) * 0x400) + ((uint)cLow
- 0xDC00) + 0x10000;
1054 private char GetHighSurrogate(uint iChar
)
1056 return (char)((iChar
- 0x10000) / 0x400 + 0xD800);
1059 private char GetLowSurrogate(uint iChar
)
1061 return (char)((iChar
- 0x10000) % 0x400 + 0xDC00);
1064 public override Decoder
GetDecoder()
1066 return new UTF32Decoder(this);
1069 public override Encoder
GetEncoder()
1071 return new EncoderNLS(this);
1074 public override int GetMaxByteCount(int charCount
)
1077 throw new ArgumentOutOfRangeException(nameof(charCount
),
1078 SR
.ArgumentOutOfRange_NeedNonNegNum
);
1080 // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1081 long byteCount
= (long)charCount
+ 1;
1083 if (EncoderFallback
.MaxCharCount
> 1)
1084 byteCount
*= EncoderFallback
.MaxCharCount
;
1089 if (byteCount
> 0x7fffffff)
1090 throw new ArgumentOutOfRangeException(nameof(charCount
), SR
.ArgumentOutOfRange_GetByteCountOverflow
);
1092 return (int)byteCount
;
1095 public override int GetMaxCharCount(int byteCount
)
1098 throw new ArgumentOutOfRangeException(nameof(byteCount
),
1099 SR
.ArgumentOutOfRange_NeedNonNegNum
);
1101 // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
1102 // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
1103 // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
1104 int charCount
= (byteCount
/ 2) + 2;
1106 // Also consider fallback because our input bytes could be out of range of unicode.
1107 // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
1108 if (DecoderFallback
.MaxCharCount
> 2)
1110 // Multiply time fallback size
1111 charCount
*= DecoderFallback
.MaxCharCount
;
1113 // We were already figuring 2 chars per 4 bytes, but fallback will be different #
1117 if (charCount
> 0x7fffffff)
1118 throw new ArgumentOutOfRangeException(nameof(byteCount
), SR
.ArgumentOutOfRange_GetCharCountOverflow
);
1120 return (int)charCount
;
1123 public override byte[] GetPreamble()
1125 if (_emitUTF32ByteOrderMark
)
1127 // Allocate new array to prevent users from modifying it.
1130 return new byte[4] { 0x00, 0x00, 0xFE, 0xFF }
;
1134 return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }
; // 00 00 FE FF
1138 return Array
.Empty
<byte>();
1141 public override ReadOnlySpan
<byte> Preamble
=>
1142 GetType() != typeof(UTF32Encoding
) ? new ReadOnlySpan
<byte>(GetPreamble()) : // in case a derived UTF32Encoding overrode GetPreamble
1143 !_emitUTF32ByteOrderMark
? default :
1144 _bigEndian
? (ReadOnlySpan
<byte>)new byte[4] { 0x00, 0x00, 0xFE, 0xFF }
: // uses C# compiler's optimization for static byte[] data
1145 (ReadOnlySpan
<byte>)new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }
;
1147 public override bool Equals(object? value)
1149 if (value is UTF32Encoding that
)
1151 return (_emitUTF32ByteOrderMark
== that
._emitUTF32ByteOrderMark
) &&
1152 (_bigEndian
== that
._bigEndian
) &&
1153 (EncoderFallback
.Equals(that
.EncoderFallback
)) &&
1154 (DecoderFallback
.Equals(that
.DecoderFallback
));
1160 public override int GetHashCode()
1162 // Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
1163 return this.EncoderFallback
.GetHashCode() + this.DecoderFallback
.GetHashCode() +
1164 CodePage
+ (_emitUTF32ByteOrderMark
? 4 : 0) + (_bigEndian
? 8 : 0);
1167 private sealed class UTF32Decoder
: DecoderNLS
1169 // Need a place to store any extra bytes we may have picked up
1170 internal int iChar
= 0;
1171 internal int readByteCount
= 0;
1173 public UTF32Decoder(UTF32Encoding encoding
) : base(encoding
)
1178 public override void Reset()
1181 this.readByteCount
= 0;
1182 if (_fallbackBuffer
!= null)
1183 _fallbackBuffer
.Reset();
1186 // Anything left in our decoder?
1187 internal override bool HasState
=>
1188 // ReadByteCount is our flag. (iChar==0 doesn't mean much).
1189 this.readByteCount
!= 0;