Updates referencesource to .NET 4.7
[mono-project.git] / mcs / class / referencesource / mscorlib / system / text / utf32encoding.cs
blob3a80fa65d863e94a8e4e155a4c3aafc2ca057d7e
1 // ==++==
2 //
3 // Copyright (c) Microsoft Corporation. All rights reserved.
4 //
5 // ==--==
6 //
7 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
8 //
10 #if FEATURE_UTF32
12 namespace System.Text
15 using System;
16 using System.Diagnostics.Contracts;
17 using System.Globalization;
18 // Encodes text into and out of UTF-32. UTF-32 is a way of writing
19 // Unicode characters with a single storage unit (32 bits) per character,
21 // The UTF-32 byte order mark is simply the Unicode byte order mark
22 // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
23 // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
24 // switch the byte orderings.
26 [Serializable]
27 public sealed class UTF32Encoding : Encoding
30 words bits UTF-32 representation
31 ----- ---- -----------------------------------
32 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
33 2 21 00000000 000xxxxx hhhhhhll llllllll
34 ----- ---- -----------------------------------
36 Surrogate:
37 Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
41 private bool emitUTF32ByteOrderMark = false;
42 private bool isThrowException = false;
43 private bool bigEndian = false;
46 public UTF32Encoding(): this(false, true, false)
51 public UTF32Encoding(bool bigEndian, bool byteOrderMark):
52 this(bigEndian, byteOrderMark, false)
57 public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters):
58 base(bigEndian ? 12001 : 12000)
60 this.bigEndian = bigEndian;
61 this.emitUTF32ByteOrderMark = byteOrderMark;
62 this.isThrowException = throwOnInvalidCharacters;
64 // Encoding's constructor already did this, but it'll be wrong if we're throwing exceptions
65 if (this.isThrowException)
66 SetDefaultFallbacks();
69 internal override void SetDefaultFallbacks()
71 // For UTF-X encodings, we use a replacement fallback with an empty string
72 if (this.isThrowException)
74 this.encoderFallback = EncoderFallback.ExceptionFallback;
75 this.decoderFallback = DecoderFallback.ExceptionFallback;
77 else
79 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
80 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
86 // The following methods are copied from EncodingNLS.cs.
87 // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
88 // These should be kept in sync for the following classes:
89 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
92 // Returns the number of bytes required to encode a range of characters in
93 // a character array.
95 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
96 // So if you fix this, fix the others. Currently those include:
97 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
98 // parent method is safe
100 [System.Security.SecuritySafeCritical] // auto-generated
101 public override unsafe int GetByteCount(char[] chars, int index, int count)
103 // Validate input parameters
104 if (chars == null)
105 throw new ArgumentNullException("chars",
106 Environment.GetResourceString("ArgumentNull_Array"));
108 if (index < 0 || count < 0)
109 throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
110 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
112 if (chars.Length - index < count)
113 throw new ArgumentOutOfRangeException("chars",
114 Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
115 Contract.EndContractBlock();
117 // If no input, return 0, avoid fixed empty array problem
118 if (chars.Length == 0)
119 return 0;
121 // Just call the pointer version
122 fixed (char* pChars = chars)
123 return GetByteCount(pChars + index, count, null);
126 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
127 // So if you fix this, fix the others. Currently those include:
128 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
129 // parent method is safe
131 [System.Security.SecuritySafeCritical] // auto-generated
132 public override unsafe int GetByteCount(String s)
134 // Validate input
135 if (s==null)
136 throw new ArgumentNullException("s");
137 Contract.EndContractBlock();
139 fixed (char* pChars = s)
140 return GetByteCount(pChars, s.Length, null);
143 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
144 // So if you fix this, fix the others. Currently those include:
145 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
147 [System.Security.SecurityCritical] // auto-generated
148 [CLSCompliant(false)]
149 public override unsafe int GetByteCount(char* chars, int count)
151 // Validate Parameters
152 if (chars == null)
153 throw new ArgumentNullException("chars",
154 Environment.GetResourceString("ArgumentNull_Array"));
156 if (count < 0)
157 throw new ArgumentOutOfRangeException("count",
158 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
159 Contract.EndContractBlock();
161 // Call it with empty encoder
162 return GetByteCount(chars, count, null);
165 // Parent method is safe.
166 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
167 // So if you fix this, fix the others. Currently those include:
168 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
170 [System.Security.SecuritySafeCritical] // auto-generated
171 public override unsafe int GetBytes(String s, int charIndex, int charCount,
172 byte[] bytes, int byteIndex)
174 if (s == null || bytes == null)
175 throw new ArgumentNullException((s == null ? "s" : "bytes"),
176 Environment.GetResourceString("ArgumentNull_Array"));
178 if (charIndex < 0 || charCount < 0)
179 throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
180 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
182 if (s.Length - charIndex < charCount)
183 throw new ArgumentOutOfRangeException("s",
184 Environment.GetResourceString("ArgumentOutOfRange_IndexCount"));
186 if (byteIndex < 0 || byteIndex > bytes.Length)
187 throw new ArgumentOutOfRangeException("byteIndex",
188 Environment.GetResourceString("ArgumentOutOfRange_Index"));
189 Contract.EndContractBlock();
191 int byteCount = bytes.Length - byteIndex;
193 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
194 if (bytes.Length == 0)
195 bytes = new byte[1];
197 fixed (char* pChars = s)
198 fixed ( byte* pBytes = bytes)
199 return GetBytes(pChars + charIndex, charCount,
200 pBytes + byteIndex, byteCount, null);
203 // Encodes a range of characters in a character array into a range of bytes
204 // in a byte array. An exception occurs if the byte array is not large
205 // enough to hold the complete encoding of the characters. The
206 // GetByteCount method can be used to determine the exact number of
207 // bytes that will be produced for a given range of characters.
208 // Alternatively, the GetMaxByteCount method can be used to
209 // determine the maximum number of bytes that will be produced for a given
210 // number of characters, regardless of the actual character values.
212 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
213 // So if you fix this, fix the others. Currently those include:
214 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
215 // parent method is safe
217 [System.Security.SecuritySafeCritical] // auto-generated
218 public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
219 byte[] bytes, int byteIndex)
221 // Validate parameters
222 if (chars == null || bytes == null)
223 throw new ArgumentNullException((chars == null ? "chars" : "bytes"),
224 Environment.GetResourceString("ArgumentNull_Array"));
226 if (charIndex < 0 || charCount < 0)
227 throw new ArgumentOutOfRangeException((charIndex<0 ? "charIndex" : "charCount"),
228 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
230 if (chars.Length - charIndex < charCount)
231 throw new ArgumentOutOfRangeException("chars",
232 Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
234 if (byteIndex < 0 || byteIndex > bytes.Length)
235 throw new ArgumentOutOfRangeException("byteIndex",
236 Environment.GetResourceString("ArgumentOutOfRange_Index"));
237 Contract.EndContractBlock();
239 // If nothing to encode return 0, avoid fixed problem
240 if (chars.Length == 0)
241 return 0;
243 // Just call pointer version
244 int byteCount = bytes.Length - byteIndex;
246 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
247 if (bytes.Length == 0)
248 bytes = new byte[1];
250 fixed (char* pChars = chars)
251 fixed (byte* pBytes = bytes)
252 // Remember that byteCount is # to decode, not size of array.
253 return GetBytes(pChars + charIndex, charCount,
254 pBytes + byteIndex, byteCount, null);
257 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
258 // So if you fix this, fix the others. Currently those include:
259 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
261 [System.Security.SecurityCritical] // auto-generated
262 [CLSCompliant(false)]
263 public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
265 // Validate Parameters
266 if (bytes == null || chars == null)
267 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
268 Environment.GetResourceString("ArgumentNull_Array"));
270 if (charCount < 0 || byteCount < 0)
271 throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
272 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
273 Contract.EndContractBlock();
275 return GetBytes(chars, charCount, bytes, byteCount, null);
278 // Returns the number of characters produced by decoding a range of bytes
279 // in a byte array.
281 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
282 // So if you fix this, fix the others. Currently those include:
283 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
284 // parent method is safe
286 [System.Security.SecuritySafeCritical] // auto-generated
287 public override unsafe int GetCharCount(byte[] bytes, int index, int count)
289 // Validate Parameters
290 if (bytes == null)
291 throw new ArgumentNullException("bytes",
292 Environment.GetResourceString("ArgumentNull_Array"));
294 if (index < 0 || count < 0)
295 throw new ArgumentOutOfRangeException((index<0 ? "index" : "count"),
296 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
298 if (bytes.Length - index < count)
299 throw new ArgumentOutOfRangeException("bytes",
300 Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
301 Contract.EndContractBlock();
303 // If no input just return 0, fixed doesn't like 0 length arrays.
304 if (bytes.Length == 0)
305 return 0;
307 // Just call pointer version
308 fixed (byte* pBytes = bytes)
309 return GetCharCount(pBytes + index, count, null);
312 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
313 // So if you fix this, fix the others. Currently those include:
314 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
316 [System.Security.SecurityCritical] // auto-generated
317 [CLSCompliant(false)]
318 public override unsafe int GetCharCount(byte* bytes, int count)
320 // Validate Parameters
321 if (bytes == null)
322 throw new ArgumentNullException("bytes",
323 Environment.GetResourceString("ArgumentNull_Array"));
325 if (count < 0)
326 throw new ArgumentOutOfRangeException("count",
327 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
328 Contract.EndContractBlock();
330 return GetCharCount(bytes, count, null);
333 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
334 // So if you fix this, fix the others. Currently those include:
335 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
336 // parent method is safe
338 [System.Security.SecuritySafeCritical] // auto-generated
339 public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
340 char[] chars, int charIndex)
342 // Validate Parameters
343 if (bytes == null || chars == null)
344 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
345 Environment.GetResourceString("ArgumentNull_Array"));
347 if (byteIndex < 0 || byteCount < 0)
348 throw new ArgumentOutOfRangeException((byteIndex<0 ? "byteIndex" : "byteCount"),
349 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
351 if ( bytes.Length - byteIndex < byteCount)
352 throw new ArgumentOutOfRangeException("bytes",
353 Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
355 if (charIndex < 0 || charIndex > chars.Length)
356 throw new ArgumentOutOfRangeException("charIndex",
357 Environment.GetResourceString("ArgumentOutOfRange_Index"));
358 Contract.EndContractBlock();
360 // If no input, return 0 & avoid fixed problem
361 if (bytes.Length == 0)
362 return 0;
364 // Just call pointer version
365 int charCount = chars.Length - charIndex;
367 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
368 if (chars.Length == 0)
369 chars = new char[1];
371 fixed (byte* pBytes = bytes)
372 fixed (char* pChars = chars)
373 // Remember that charCount is # to decode, not size of array
374 return GetChars(pBytes + byteIndex, byteCount,
375 pChars + charIndex, charCount, null);
378 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
379 // So if you fix this, fix the others. Currently those include:
380 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
382 [System.Security.SecurityCritical] // auto-generated
383 [CLSCompliant(false)]
384 public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
386 // Validate Parameters
387 if (bytes == null || chars == null)
388 throw new ArgumentNullException(bytes == null ? "bytes" : "chars",
389 Environment.GetResourceString("ArgumentNull_Array"));
391 if (charCount < 0 || byteCount < 0)
392 throw new ArgumentOutOfRangeException((charCount<0 ? "charCount" : "byteCount"),
393 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
394 Contract.EndContractBlock();
396 return GetChars(bytes, byteCount, chars, charCount, null);
399 // Returns a string containing the decoded representation of a range of
400 // bytes in a byte array.
402 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
403 // So if you fix this, fix the others. Currently those include:
404 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
405 // parent method is safe
407 [System.Security.SecuritySafeCritical] // auto-generated
408 public override unsafe String GetString(byte[] bytes, int index, int count)
410 // Validate Parameters
411 if (bytes == null)
412 throw new ArgumentNullException("bytes",
413 Environment.GetResourceString("ArgumentNull_Array"));
415 if (index < 0 || count < 0)
416 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"),
417 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
419 if (bytes.Length - index < count)
420 throw new ArgumentOutOfRangeException("bytes",
421 Environment.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
422 Contract.EndContractBlock();
424 // Avoid problems with empty input buffer
425 if (bytes.Length == 0) return String.Empty;
427 fixed (byte* pBytes = bytes)
428 return String.CreateStringFromEncoding(
429 pBytes + index, count, this);
433 // End of standard methods copied from EncodingNLS.cs
436 [System.Security.SecurityCritical] // auto-generated
437 internal override unsafe int GetByteCount(char *chars, int count, EncoderNLS encoder)
439 Contract.Assert(chars!=null, "[UTF32Encoding.GetByteCount]chars!=null");
440 Contract.Assert(count >=0, "[UTF32Encoding.GetByteCount]count >=0");
442 char* end = chars + count;
443 char* charStart = chars;
444 int byteCount = 0;
446 char highSurrogate = '\0';
448 // For fallback we may need a fallback buffer
449 EncoderFallbackBuffer fallbackBuffer = null;
450 if (encoder != null)
452 highSurrogate = encoder.charLeftOver;
453 fallbackBuffer = encoder.FallbackBuffer;
455 // We mustn't have left over fallback data when counting
456 if (fallbackBuffer.Remaining > 0)
457 throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
458 this.EncodingName, encoder.Fallback.GetType()));
460 else
462 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
465 // Set our internal fallback interesting things.
466 fallbackBuffer.InternalInitialize(charStart, end, encoder, false);
468 char ch;
469 TryAgain:
471 while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
473 // First unwind any fallback
474 if (ch == 0)
476 // No fallback, just get next char
477 ch = *chars;
478 chars++;
481 // Do we need a low surrogate?
482 if (highSurrogate != '\0')
485 // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
487 if (Char.IsLowSurrogate(ch))
489 // They're all legal
490 highSurrogate = '\0';
493 // One surrogate pair will be translated into 4 bytes UTF32.
496 byteCount += 4;
497 continue;
500 // We are missing our low surrogate, decrement chars and fallback the high surrogate
501 // The high surrogate may have come from the encoder, but nothing else did.
502 Contract.Assert(chars > charStart,
503 "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
504 chars--;
506 // Do the fallback
507 fallbackBuffer.InternalFallback(highSurrogate, ref chars);
509 // We're going to fallback the old high surrogate.
510 highSurrogate = '\0';
511 continue;
515 // Do we have another high surrogate?
516 if (Char.IsHighSurrogate(ch))
519 // We'll have a high surrogate to check next time.
521 highSurrogate = ch;
522 continue;
525 // Check for illegal characters
526 if (Char.IsLowSurrogate(ch))
528 // We have a leading low surrogate, do the fallback
529 fallbackBuffer.InternalFallback(ch, ref chars);
531 // Try again with fallback buffer
532 continue;
535 // We get to add the character (4 bytes UTF32)
536 byteCount += 4;
539 // May have to do our last surrogate
540 if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
542 // We have to do the fallback for the lonely high surrogate
543 fallbackBuffer.InternalFallback(highSurrogate, ref chars);
544 highSurrogate = (char)0;
545 goto TryAgain;
548 // Check for overflows.
549 if (byteCount < 0)
550 throw new ArgumentOutOfRangeException("count", Environment.GetResourceString(
551 "ArgumentOutOfRange_GetByteCountOverflow"));
553 // Shouldn't have anything in fallback buffer for GetByteCount
554 // (don't have to check m_throwOnOverflow for count)
555 Contract.Assert(fallbackBuffer.Remaining == 0,
556 "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
558 // Return our count
559 return byteCount;
562 [System.Security.SecurityCritical] // auto-generated
563 internal override unsafe int GetBytes(char *chars, int charCount,
564 byte* bytes, int byteCount, EncoderNLS encoder)
566 Contract.Assert(chars!=null, "[UTF32Encoding.GetBytes]chars!=null");
567 Contract.Assert(bytes!=null, "[UTF32Encoding.GetBytes]bytes!=null");
568 Contract.Assert(byteCount >=0, "[UTF32Encoding.GetBytes]byteCount >=0");
569 Contract.Assert(charCount >=0, "[UTF32Encoding.GetBytes]charCount >=0");
571 char* charStart = chars;
572 char* charEnd = chars + charCount;
573 byte* byteStart = bytes;
574 byte* byteEnd = bytes + byteCount;
576 char highSurrogate = '\0';
578 // For fallback we may need a fallback buffer
579 EncoderFallbackBuffer fallbackBuffer = null;
580 if (encoder != null)
582 highSurrogate = encoder.charLeftOver;
583 fallbackBuffer = encoder.FallbackBuffer;
585 // We mustn't have left over fallback data when not converting
586 if (encoder.m_throwOnOverflow && fallbackBuffer.Remaining > 0)
587 throw new ArgumentException(Environment.GetResourceString("Argument_EncoderFallbackNotEmpty",
588 this.EncodingName, encoder.Fallback.GetType()));
590 else
592 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
595 // Set our internal fallback interesting things.
596 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
598 char ch;
599 TryAgain:
601 while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
603 // First unwind any fallback
604 if (ch == 0)
606 // No fallback, just get next char
607 ch = *chars;
608 chars++;
611 // Do we need a low surrogate?
612 if (highSurrogate != '\0')
615 // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
617 if (Char.IsLowSurrogate(ch))
619 // Is it a legal one?
620 uint iTemp = GetSurrogate(highSurrogate, ch);
621 highSurrogate = '\0';
624 // One surrogate pair will be translated into 4 bytes UTF32.
626 if (bytes+3 >= byteEnd)
628 // Don't have 4 bytes
629 if (fallbackBuffer.bFallingBack)
631 fallbackBuffer.MovePrevious(); // Aren't using these 2 fallback chars
632 fallbackBuffer.MovePrevious();
634 else
636 // If we don't have enough room, then either we should've advanced a while
637 // or we should have bytes==byteStart and throw below
638 Contract.Assert(chars > charStart + 1 || bytes == byteStart,
639 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
640 chars-=2; // Aren't using those 2 chars
642 ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
643 highSurrogate = (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
644 break;
647 if (bigEndian)
649 *(bytes++) = (byte)(0x00);
650 *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
651 *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
652 *(bytes++) = (byte)(iTemp); // Implies & 0xFF
654 else
656 *(bytes++) = (byte)(iTemp); // Implies & 0xFF
657 *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
658 *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
659 *(bytes++) = (byte)(0x00);
661 continue;
664 // We are missing our low surrogate, decrement chars and fallback the high surrogate
665 // The high surrogate may have come from the encoder, but nothing else did.
666 Contract.Assert(chars > charStart,
667 "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
668 chars--;
670 // Do the fallback
671 fallbackBuffer.InternalFallback(highSurrogate, ref chars);
673 // We're going to fallback the old high surrogate.
674 highSurrogate = '\0';
675 continue;
678 // Do we have another high surrogate?, if so remember it
679 if (Char.IsHighSurrogate(ch))
682 // We'll have a high surrogate to check next time.
684 highSurrogate = ch;
685 continue;
688 // Check for illegal characters (low surrogate)
689 if (Char.IsLowSurrogate(ch))
691 // We have a leading low surrogate, do the fallback
692 fallbackBuffer.InternalFallback(ch, ref chars);
694 // Try again with fallback buffer
695 continue;
698 // We get to add the character, yippee.
699 if (bytes+3 >= byteEnd)
701 // Don't have 4 bytes
702 if (fallbackBuffer.bFallingBack)
703 fallbackBuffer.MovePrevious(); // Aren't using this fallback char
704 else
706 // Must've advanced already
707 Contract.Assert(chars > charStart,
708 "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
709 chars--; // Aren't using this char
711 ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
712 break; // Didn't throw, stop
715 if (bigEndian)
717 *(bytes++) = (byte)(0x00);
718 *(bytes++) = (byte)(0x00);
719 *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
720 *(bytes++) = (byte)(ch); // Implies & 0xFF
722 else
724 *(bytes++) = (byte)(ch); // Implies & 0xFF
725 *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
726 *(bytes++) = (byte)(0x00);
727 *(bytes++) = (byte)(0x00);
731 // May have to do our last surrogate
732 if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
734 // We have to do the fallback for the lonely high surrogate
735 fallbackBuffer.InternalFallback(highSurrogate, ref chars);
736 highSurrogate = (char)0;
737 goto TryAgain;
740 // Fix our encoder if we have one
741 Contract.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
742 "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
744 if (encoder != null)
746 // Remember our left over surrogate (or 0 if flushing)
747 encoder.charLeftOver = highSurrogate;
749 // Need # chars used
750 encoder.m_charsUsed = (int)(chars-charStart);
753 // return the new length
754 return (int)(bytes - byteStart);
757 [System.Security.SecurityCritical] // auto-generated
758 internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
760 Contract.Assert(bytes!=null, "[UTF32Encoding.GetCharCount]bytes!=null");
761 Contract.Assert(count >=0, "[UTF32Encoding.GetCharCount]count >=0");
763 UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
765 // None so far!
766 int charCount = 0;
767 byte* end = bytes + count;
768 byte* byteStart = bytes;
770 // Set up decoder
771 int readCount = 0;
772 uint iChar = 0;
774 // For fallback we may need a fallback buffer
775 DecoderFallbackBuffer fallbackBuffer = null;
777 // See if there's anything in our decoder
778 if (decoder != null)
780 readCount = decoder.readByteCount;
781 iChar = (uint)decoder.iChar;
782 fallbackBuffer = decoder.FallbackBuffer;
784 // Shouldn't have anything in fallback buffer for GetCharCount
785 // (don't have to check m_throwOnOverflow for chars or count)
786 Contract.Assert(fallbackBuffer.Remaining == 0,
787 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
789 else
791 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
794 // Set our internal fallback interesting things.
795 fallbackBuffer.InternalInitialize(byteStart, null);
797 // Loop through our input, 4 characters at a time!
798 while (bytes < end && charCount >= 0)
800 // Get our next character
801 if(bigEndian)
803 // Scoot left and add it to the bottom
804 iChar <<= 8;
805 iChar += *(bytes++);
807 else
809 // Scoot right and add it to the top
810 iChar >>= 8;
811 iChar += (uint)(*(bytes++)) << 24;
814 readCount++;
816 // See if we have all the bytes yet
817 if (readCount < 4)
818 continue;
820 // Have the bytes
821 readCount = 0;
823 // See if its valid to encode
824 if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
826 // Need to fall back these 4 bytes
827 byte[] fallbackBytes;
828 if (this.bigEndian)
830 fallbackBytes = new byte[] {
831 unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
832 unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
834 else
836 fallbackBytes = new byte[] {
837 unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
838 unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
841 charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
843 // Ignore the illegal character
844 iChar = 0;
845 continue;
848 // Ok, we have something we can add to our output
849 if (iChar >= 0x10000)
851 // Surrogates take 2
852 charCount++;
855 // Add the rest of the surrogate or our normal character
856 charCount++;
858 // iChar is back to 0
859 iChar = 0;
862 // See if we have something left over that has to be decoded
863 if (readCount > 0 && (decoder == null || decoder.MustFlush))
865 // Oops, there's something left over with no place to go.
866 byte[] fallbackBytes = new byte[readCount];
867 if (this.bigEndian)
869 while(readCount > 0)
871 fallbackBytes[--readCount] = unchecked((byte)iChar);
872 iChar >>= 8;
875 else
877 while (readCount > 0)
879 fallbackBytes[--readCount] = unchecked((byte)(iChar>>24));
880 iChar <<= 8;
884 charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
887 // Check for overflows.
888 if (charCount < 0)
889 throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
891 // Shouldn't have anything in fallback buffer for GetCharCount
892 // (don't have to check m_throwOnOverflow for chars or count)
893 Contract.Assert(fallbackBuffer.Remaining == 0,
894 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
896 // Return our count
897 return charCount;
900 [System.Security.SecurityCritical] // auto-generated
901 internal override unsafe int GetChars(byte* bytes, int byteCount,
902 char* chars, int charCount, DecoderNLS baseDecoder)
904 Contract.Assert(chars!=null, "[UTF32Encoding.GetChars]chars!=null");
905 Contract.Assert(bytes!=null, "[UTF32Encoding.GetChars]bytes!=null");
906 Contract.Assert(byteCount >=0, "[UTF32Encoding.GetChars]byteCount >=0");
907 Contract.Assert(charCount >=0, "[UTF32Encoding.GetChars]charCount >=0");
909 UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
911 // None so far!
912 char* charStart = chars;
913 char* charEnd = chars + charCount;
915 byte* byteStart = bytes;
916 byte* byteEnd = bytes + byteCount;
918 // See if there's anything in our decoder (but don't clear it yet)
919 int readCount = 0;
920 uint iChar = 0;
922 // For fallback we may need a fallback buffer
923 DecoderFallbackBuffer fallbackBuffer = null;
925 // See if there's anything in our decoder
926 if (decoder != null)
928 readCount = decoder.readByteCount;
929 iChar = (uint)decoder.iChar;
930 fallbackBuffer = baseDecoder.FallbackBuffer;
932 // Shouldn't have anything in fallback buffer for GetChars
933 // (don't have to check m_throwOnOverflow for chars)
934 Contract.Assert(fallbackBuffer.Remaining == 0,
935 "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
937 else
939 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
942 // Set our internal fallback interesting things.
943 fallbackBuffer.InternalInitialize(bytes, chars + charCount);
945 // Loop through our input, 4 characters at a time!
946 while (bytes < byteEnd)
948 // Get our next character
949 if(bigEndian)
951 // Scoot left and add it to the bottom
952 iChar <<= 8;
953 iChar += *(bytes++);
955 else
957 // Scoot right and add it to the top
958 iChar >>= 8;
959 iChar += (uint)(*(bytes++)) << 24;
962 readCount++;
964 // See if we have all the bytes yet
965 if (readCount < 4)
966 continue;
968 // Have the bytes
969 readCount = 0;
971 // See if its valid to encode
972 if ( iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
974 // Need to fall back these 4 bytes
975 byte[] fallbackBytes;
976 if (this.bigEndian)
978 fallbackBytes = new byte[] {
979 unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
980 unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
982 else
984 fallbackBytes = new byte[] {
985 unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
986 unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
989 // Chars won't be updated unless this works.
990 if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars))
992 // Couldn't fallback, throw or wait til next time
993 // We either read enough bytes for bytes-=4 to work, or we're
994 // going to throw in ThrowCharsOverflow because chars == charStart
995 Contract.Assert(bytes >= byteStart + 4 || chars == charStart,
996 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
997 bytes-=4; // get back to where we were
998 iChar=0; // Remembering nothing
999 fallbackBuffer.InternalReset();
1000 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1001 break; // Stop here, didn't throw
1004 // Ignore the illegal character
1005 iChar = 0;
1006 continue;
1010 // Ok, we have something we can add to our output
1011 if (iChar >= 0x10000)
1013 // Surrogates take 2
1014 if (chars >= charEnd - 1)
1016 // Throwing or stopping
1017 // We either read enough bytes for bytes-=4 to work, or we're
1018 // going to throw in ThrowCharsOverflow because chars == charStart
1019 Contract.Assert(bytes >= byteStart + 4 || chars == charStart,
1020 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
1021 bytes-=4; // get back to where we were
1022 iChar=0; // Remembering nothing
1023 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1024 break; // Stop here, didn't throw
1027 *(chars++) = GetHighSurrogate(iChar);
1028 iChar = GetLowSurrogate(iChar);
1030 // Bounds check for normal character
1031 else if (chars >= charEnd)
1033 // Throwing or stopping
1034 // We either read enough bytes for bytes-=4 to work, or we're
1035 // going to throw in ThrowCharsOverflow because chars == charStart
1036 Contract.Assert(bytes >= byteStart + 4 || chars == charStart,
1037 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
1038 bytes-=4; // get back to where we were
1039 iChar=0; // Remembering nothing
1040 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1041 break; // Stop here, didn't throw
1044 // Add the rest of the surrogate or our normal character
1045 *(chars++) = (char)iChar;
1047 // iChar is back to 0
1048 iChar = 0;
1051 // See if we have something left over that has to be decoded
1052 if (readCount > 0 && (decoder == null || decoder.MustFlush))
1054 // Oops, there's something left over with no place to go.
1055 byte[] fallbackBytes = new byte[readCount];
1056 int tempCount = readCount;
1057 if (this.bigEndian)
1059 while(tempCount > 0)
1061 fallbackBytes[--tempCount] = unchecked((byte)iChar);
1062 iChar >>= 8;
1065 else
1067 while (tempCount > 0)
1069 fallbackBytes[--tempCount] = unchecked((byte)(iChar>>24));
1070 iChar <<= 8;
1074 if (!fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref chars))
1076 // Couldn't fallback.
1077 fallbackBuffer.InternalReset();
1078 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1079 // Stop here, didn't throw, backed up, so still nothing in buffer
1081 else
1083 // Don't clear our decoder unless we could fall it back.
1084 // If we caught the if above, then we're a convert() and will catch this next time.
1085 readCount = 0;
1086 iChar = 0;
1090 // Remember any left over stuff, clearing buffer as well for MustFlush
1091 if (decoder != null)
1093 decoder.iChar = (int)iChar;
1094 decoder.readByteCount = readCount;
1095 decoder.m_bytesUsed = (int)(bytes - byteStart);
1098 // Shouldn't have anything in fallback buffer for GetChars
1099 // (don't have to check m_throwOnOverflow for chars)
1100 Contract.Assert(fallbackBuffer.Remaining == 0,
1101 "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
1103 // Return our count
1104 return (int)(chars - charStart);
1108 private uint GetSurrogate(char cHigh, char cLow)
1110 return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
1113 private char GetHighSurrogate(uint iChar)
1115 return (char)((iChar - 0x10000) / 0x400 + 0xD800);
1118 private char GetLowSurrogate(uint iChar)
1120 return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
1124 public override Decoder GetDecoder()
1126 return new UTF32Decoder(this);
1130 public override Encoder GetEncoder()
1132 return new EncoderNLS(this);
1136 public override int GetMaxByteCount(int charCount)
1138 if (charCount < 0)
1139 throw new ArgumentOutOfRangeException("charCount",
1140 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1141 Contract.EndContractBlock();
1143 // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1144 long byteCount = (long)charCount + 1;
1146 if (EncoderFallback.MaxCharCount > 1)
1147 byteCount *= EncoderFallback.MaxCharCount;
1149 // 4 bytes per char
1150 byteCount *= 4;
1152 if (byteCount > 0x7fffffff)
1153 throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
1155 return (int)byteCount;
1159 public override int GetMaxCharCount(int byteCount)
1161 if (byteCount < 0)
1162 throw new ArgumentOutOfRangeException("byteCount",
1163 Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1164 Contract.EndContractBlock();
1166 // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
1167 // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
1168 // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
1169 int charCount = (byteCount / 2) + 2;
1171 // Also consider fallback because our input bytes could be out of range of unicode.
1172 // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
1173 if (DecoderFallback.MaxCharCount > 2)
1175 // Multiply time fallback size
1176 charCount *= DecoderFallback.MaxCharCount;
1178 // We were already figuring 2 chars per 4 bytes, but fallback will be different #
1179 charCount /= 2;
1182 if (charCount > 0x7fffffff)
1183 throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
1185 return (int)charCount;
1189 public override byte[] GetPreamble()
1191 if (emitUTF32ByteOrderMark)
1193 // Allocate new array to prevent users from modifying it.
1194 if (bigEndian)
1196 return new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
1198 else
1200 return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
1203 else
1204 return EmptyArray<Byte>.Value;
1208 public override bool Equals(Object value)
1210 UTF32Encoding that = value as UTF32Encoding;
1211 if (that != null)
1213 return (emitUTF32ByteOrderMark == that.emitUTF32ByteOrderMark) &&
1214 (bigEndian == that.bigEndian) &&
1215 // (isThrowException == that.isThrowException) && // same as encoder/decoderfallback being exceptions
1216 (EncoderFallback.Equals(that.EncoderFallback)) &&
1217 (DecoderFallback.Equals(that.DecoderFallback));
1219 return (false);
1223 public override int GetHashCode()
1225 //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
1226 return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1227 CodePage + (emitUTF32ByteOrderMark?4:0) + (bigEndian?8:0);
1230 [Serializable]
1231 internal class UTF32Decoder : DecoderNLS
1233 // Need a place to store any extra bytes we may have picked up
1234 internal int iChar = 0;
1235 internal int readByteCount = 0;
1237 public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
1239 // base calls reset
1242 public override void Reset()
1244 this.iChar = 0;
1245 this.readByteCount = 0;
1246 if (m_fallbackBuffer != null)
1247 m_fallbackBuffer.Reset();
1250 // Anything left in our decoder?
1251 internal override bool HasState
1255 // ReadByteCount is our flag. (iChar==0 doesn't mean much).
1256 return (this.readByteCount != 0);
1263 #endif // FEATURE_UTF32