3 // Copyright (c) Microsoft Corporation. All rights reserved.
7 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
16 using System
.Diagnostics
.Contracts
;
17 using System
.Globalization
;
18 // Encodes text into and out of UTF-32. UTF-32 is a way of writing
19 // Unicode characters with a single storage unit (32 bits) per character,
21 // The UTF-32 byte order mark is simply the Unicode byte order mark
22 // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
23 // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
24 // switch the byte orderings.
27 public sealed class UTF32Encoding
: Encoding
30 words bits UTF-32 representation
31 ----- ---- -----------------------------------
32 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
33 2 21 00000000 000xxxxx hhhhhhll llllllll
34 ----- ---- -----------------------------------
37 Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
41 private bool emitUTF32ByteOrderMark
= false;
42 private bool isThrowException
= false;
43 private bool bigEndian
= false;
46 public UTF32Encoding(): this(false, true, false)
51 public UTF32Encoding(bool bigEndian
, bool byteOrderMark
):
52 this(bigEndian
, byteOrderMark
, false)
57 public UTF32Encoding(bool bigEndian
, bool byteOrderMark
, bool throwOnInvalidCharacters
):
58 base(bigEndian
? 12001 : 12000)
60 this.bigEndian
= bigEndian
;
61 this.emitUTF32ByteOrderMark
= byteOrderMark
;
62 this.isThrowException
= throwOnInvalidCharacters
;
64 // Encoding's constructor already did this, but it'll be wrong if we're throwing exceptions
65 if (this.isThrowException
)
66 SetDefaultFallbacks();
69 internal override void SetDefaultFallbacks()
71 // For UTF-X encodings, we use a replacement fallback with an empty string
72 if (this.isThrowException
)
74 this.encoderFallback
= EncoderFallback
.ExceptionFallback
;
75 this.decoderFallback
= DecoderFallback
.ExceptionFallback
;
79 this.encoderFallback
= new EncoderReplacementFallback("\xFFFD");
80 this.decoderFallback
= new DecoderReplacementFallback("\xFFFD");
86 // The following methods are copied from EncodingNLS.cs.
87 // Unfortunately EncodingNLS.cs is internal and we're public, so we have to reimpliment them here.
88 // These should be kept in sync for the following classes:
89 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
92 // Returns the number of bytes required to encode a range of characters in
95 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
96 // So if you fix this, fix the others. Currently those include:
97 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
98 // parent method is safe
100 [System
.Security
.SecuritySafeCritical
] // auto-generated
101 public override unsafe int GetByteCount(char[] chars
, int index
, int count
)
103 // Validate input parameters
105 throw new ArgumentNullException("chars",
106 Environment
.GetResourceString("ArgumentNull_Array"));
108 if (index
< 0 || count
< 0)
109 throw new ArgumentOutOfRangeException((index
<0 ? "index" : "count"),
110 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
112 if (chars
.Length
- index
< count
)
113 throw new ArgumentOutOfRangeException("chars",
114 Environment
.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
115 Contract
.EndContractBlock();
117 // If no input, return 0, avoid fixed empty array problem
118 if (chars
.Length
== 0)
121 // Just call the pointer version
122 fixed (char* pChars
= chars
)
123 return GetByteCount(pChars
+ index
, count
, null);
126 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
127 // So if you fix this, fix the others. Currently those include:
128 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
129 // parent method is safe
131 [System
.Security
.SecuritySafeCritical
] // auto-generated
132 public override unsafe int GetByteCount(String s
)
136 throw new ArgumentNullException("s");
137 Contract
.EndContractBlock();
139 fixed (char* pChars
= s
)
140 return GetByteCount(pChars
, s
.Length
, null);
143 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
144 // So if you fix this, fix the others. Currently those include:
145 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
147 [System
.Security
.SecurityCritical
] // auto-generated
148 [CLSCompliant(false)]
149 public override unsafe int GetByteCount(char* chars
, int count
)
151 // Validate Parameters
153 throw new ArgumentNullException("chars",
154 Environment
.GetResourceString("ArgumentNull_Array"));
157 throw new ArgumentOutOfRangeException("count",
158 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
159 Contract
.EndContractBlock();
161 // Call it with empty encoder
162 return GetByteCount(chars
, count
, null);
165 // Parent method is safe.
166 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
167 // So if you fix this, fix the others. Currently those include:
168 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
170 [System
.Security
.SecuritySafeCritical
] // auto-generated
171 public override unsafe int GetBytes(String s
, int charIndex
, int charCount
,
172 byte[] bytes
, int byteIndex
)
174 if (s
== null || bytes
== null)
175 throw new ArgumentNullException((s
== null ? "s" : "bytes"),
176 Environment
.GetResourceString("ArgumentNull_Array"));
178 if (charIndex
< 0 || charCount
< 0)
179 throw new ArgumentOutOfRangeException((charIndex
<0 ? "charIndex" : "charCount"),
180 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
182 if (s
.Length
- charIndex
< charCount
)
183 throw new ArgumentOutOfRangeException("s",
184 Environment
.GetResourceString("ArgumentOutOfRange_IndexCount"));
186 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
187 throw new ArgumentOutOfRangeException("byteIndex",
188 Environment
.GetResourceString("ArgumentOutOfRange_Index"));
189 Contract
.EndContractBlock();
191 int byteCount
= bytes
.Length
- byteIndex
;
193 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
194 if (bytes
.Length
== 0)
197 fixed (char* pChars
= s
)
198 fixed ( byte* pBytes
= bytes
)
199 return GetBytes(pChars
+ charIndex
, charCount
,
200 pBytes
+ byteIndex
, byteCount
, null);
203 // Encodes a range of characters in a character array into a range of bytes
204 // in a byte array. An exception occurs if the byte array is not large
205 // enough to hold the complete encoding of the characters. The
206 // GetByteCount method can be used to determine the exact number of
207 // bytes that will be produced for a given range of characters.
208 // Alternatively, the GetMaxByteCount method can be used to
209 // determine the maximum number of bytes that will be produced for a given
210 // number of characters, regardless of the actual character values.
212 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
213 // So if you fix this, fix the others. Currently those include:
214 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
215 // parent method is safe
217 [System
.Security
.SecuritySafeCritical
] // auto-generated
218 public override unsafe int GetBytes(char[] chars
, int charIndex
, int charCount
,
219 byte[] bytes
, int byteIndex
)
221 // Validate parameters
222 if (chars
== null || bytes
== null)
223 throw new ArgumentNullException((chars
== null ? "chars" : "bytes"),
224 Environment
.GetResourceString("ArgumentNull_Array"));
226 if (charIndex
< 0 || charCount
< 0)
227 throw new ArgumentOutOfRangeException((charIndex
<0 ? "charIndex" : "charCount"),
228 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
230 if (chars
.Length
- charIndex
< charCount
)
231 throw new ArgumentOutOfRangeException("chars",
232 Environment
.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
234 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
235 throw new ArgumentOutOfRangeException("byteIndex",
236 Environment
.GetResourceString("ArgumentOutOfRange_Index"));
237 Contract
.EndContractBlock();
239 // If nothing to encode return 0, avoid fixed problem
240 if (chars
.Length
== 0)
243 // Just call pointer version
244 int byteCount
= bytes
.Length
- byteIndex
;
246 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
247 if (bytes
.Length
== 0)
250 fixed (char* pChars
= chars
)
251 fixed (byte* pBytes
= bytes
)
252 // Remember that byteCount is # to decode, not size of array.
253 return GetBytes(pChars
+ charIndex
, charCount
,
254 pBytes
+ byteIndex
, byteCount
, null);
257 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
258 // So if you fix this, fix the others. Currently those include:
259 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
261 [System
.Security
.SecurityCritical
] // auto-generated
262 [CLSCompliant(false)]
263 public override unsafe int GetBytes(char* chars
, int charCount
, byte* bytes
, int byteCount
)
265 // Validate Parameters
266 if (bytes
== null || chars
== null)
267 throw new ArgumentNullException(bytes
== null ? "bytes" : "chars",
268 Environment
.GetResourceString("ArgumentNull_Array"));
270 if (charCount
< 0 || byteCount
< 0)
271 throw new ArgumentOutOfRangeException((charCount
<0 ? "charCount" : "byteCount"),
272 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
273 Contract
.EndContractBlock();
275 return GetBytes(chars
, charCount
, bytes
, byteCount
, null);
278 // Returns the number of characters produced by decoding a range of bytes
281 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
282 // So if you fix this, fix the others. Currently those include:
283 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
284 // parent method is safe
286 [System
.Security
.SecuritySafeCritical
] // auto-generated
287 public override unsafe int GetCharCount(byte[] bytes
, int index
, int count
)
289 // Validate Parameters
291 throw new ArgumentNullException("bytes",
292 Environment
.GetResourceString("ArgumentNull_Array"));
294 if (index
< 0 || count
< 0)
295 throw new ArgumentOutOfRangeException((index
<0 ? "index" : "count"),
296 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
298 if (bytes
.Length
- index
< count
)
299 throw new ArgumentOutOfRangeException("bytes",
300 Environment
.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
301 Contract
.EndContractBlock();
303 // If no input just return 0, fixed doesn't like 0 length arrays.
304 if (bytes
.Length
== 0)
307 // Just call pointer version
308 fixed (byte* pBytes
= bytes
)
309 return GetCharCount(pBytes
+ index
, count
, null);
312 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
313 // So if you fix this, fix the others. Currently those include:
314 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
316 [System
.Security
.SecurityCritical
] // auto-generated
317 [CLSCompliant(false)]
318 public override unsafe int GetCharCount(byte* bytes
, int count
)
320 // Validate Parameters
322 throw new ArgumentNullException("bytes",
323 Environment
.GetResourceString("ArgumentNull_Array"));
326 throw new ArgumentOutOfRangeException("count",
327 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
328 Contract
.EndContractBlock();
330 return GetCharCount(bytes
, count
, null);
333 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
334 // So if you fix this, fix the others. Currently those include:
335 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
336 // parent method is safe
338 [System
.Security
.SecuritySafeCritical
] // auto-generated
339 public override unsafe int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
340 char[] chars
, int charIndex
)
342 // Validate Parameters
343 if (bytes
== null || chars
== null)
344 throw new ArgumentNullException(bytes
== null ? "bytes" : "chars",
345 Environment
.GetResourceString("ArgumentNull_Array"));
347 if (byteIndex
< 0 || byteCount
< 0)
348 throw new ArgumentOutOfRangeException((byteIndex
<0 ? "byteIndex" : "byteCount"),
349 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
351 if ( bytes
.Length
- byteIndex
< byteCount
)
352 throw new ArgumentOutOfRangeException("bytes",
353 Environment
.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
355 if (charIndex
< 0 || charIndex
> chars
.Length
)
356 throw new ArgumentOutOfRangeException("charIndex",
357 Environment
.GetResourceString("ArgumentOutOfRange_Index"));
358 Contract
.EndContractBlock();
360 // If no input, return 0 & avoid fixed problem
361 if (bytes
.Length
== 0)
364 // Just call pointer version
365 int charCount
= chars
.Length
- charIndex
;
367 // Fix our input array if 0 length because fixed doesn't like 0 length arrays
368 if (chars
.Length
== 0)
371 fixed (byte* pBytes
= bytes
)
372 fixed (char* pChars
= chars
)
373 // Remember that charCount is # to decode, not size of array
374 return GetChars(pBytes
+ byteIndex
, byteCount
,
375 pChars
+ charIndex
, charCount
, null);
378 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
379 // So if you fix this, fix the others. Currently those include:
380 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
382 [System
.Security
.SecurityCritical
] // auto-generated
383 [CLSCompliant(false)]
384 public unsafe override int GetChars(byte* bytes
, int byteCount
, char* chars
, int charCount
)
386 // Validate Parameters
387 if (bytes
== null || chars
== null)
388 throw new ArgumentNullException(bytes
== null ? "bytes" : "chars",
389 Environment
.GetResourceString("ArgumentNull_Array"));
391 if (charCount
< 0 || byteCount
< 0)
392 throw new ArgumentOutOfRangeException((charCount
<0 ? "charCount" : "byteCount"),
393 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
394 Contract
.EndContractBlock();
396 return GetChars(bytes
, byteCount
, chars
, charCount
, null);
399 // Returns a string containing the decoded representation of a range of
400 // bytes in a byte array.
402 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
403 // So if you fix this, fix the others. Currently those include:
404 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
405 // parent method is safe
407 [System
.Security
.SecuritySafeCritical
] // auto-generated
408 public override unsafe String
GetString(byte[] bytes
, int index
, int count
)
410 // Validate Parameters
412 throw new ArgumentNullException("bytes",
413 Environment
.GetResourceString("ArgumentNull_Array"));
415 if (index
< 0 || count
< 0)
416 throw new ArgumentOutOfRangeException((index
< 0 ? "index" : "count"),
417 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
419 if (bytes
.Length
- index
< count
)
420 throw new ArgumentOutOfRangeException("bytes",
421 Environment
.GetResourceString("ArgumentOutOfRange_IndexCountBuffer"));
422 Contract
.EndContractBlock();
424 // Avoid problems with empty input buffer
425 if (bytes
.Length
== 0) return String
.Empty
;
427 fixed (byte* pBytes
= bytes
)
428 return String
.CreateStringFromEncoding(
429 pBytes
+ index
, count
, this);
433 // End of standard methods copied from EncodingNLS.cs
436 [System
.Security
.SecurityCritical
] // auto-generated
437 internal override unsafe int GetByteCount(char *chars
, int count
, EncoderNLS encoder
)
439 Contract
.Assert(chars
!=null, "[UTF32Encoding.GetByteCount]chars!=null");
440 Contract
.Assert(count
>=0, "[UTF32Encoding.GetByteCount]count >=0");
442 char* end
= chars
+ count
;
443 char* charStart
= chars
;
446 char highSurrogate
= '\0';
448 // For fallback we may need a fallback buffer
449 EncoderFallbackBuffer fallbackBuffer
= null;
452 highSurrogate
= encoder
.charLeftOver
;
453 fallbackBuffer
= encoder
.FallbackBuffer
;
455 // We mustn't have left over fallback data when counting
456 if (fallbackBuffer
.Remaining
> 0)
457 throw new ArgumentException(Environment
.GetResourceString("Argument_EncoderFallbackNotEmpty",
458 this.EncodingName
, encoder
.Fallback
.GetType()));
462 fallbackBuffer
= this.encoderFallback
.CreateFallbackBuffer();
465 // Set our internal fallback interesting things.
466 fallbackBuffer
.InternalInitialize(charStart
, end
, encoder
, false);
471 while (((ch
= fallbackBuffer
.InternalGetNextChar()) != 0) || chars
< end
)
473 // First unwind any fallback
476 // No fallback, just get next char
481 // Do we need a low surrogate?
482 if (highSurrogate
!= '\0')
485 // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
487 if (Char
.IsLowSurrogate(ch
))
490 highSurrogate
= '\0';
493 // One surrogate pair will be translated into 4 bytes UTF32.
500 // We are missing our low surrogate, decrement chars and fallback the high surrogate
501 // The high surrogate may have come from the encoder, but nothing else did.
502 Contract
.Assert(chars
> charStart
,
503 "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
507 fallbackBuffer
.InternalFallback(highSurrogate
, ref chars
);
509 // We're going to fallback the old high surrogate.
510 highSurrogate
= '\0';
515 // Do we have another high surrogate?
516 if (Char
.IsHighSurrogate(ch
))
519 // We'll have a high surrogate to check next time.
525 // Check for illegal characters
526 if (Char
.IsLowSurrogate(ch
))
528 // We have a leading low surrogate, do the fallback
529 fallbackBuffer
.InternalFallback(ch
, ref chars
);
531 // Try again with fallback buffer
535 // We get to add the character (4 bytes UTF32)
539 // May have to do our last surrogate
540 if ((encoder
== null || encoder
.MustFlush
) && highSurrogate
> 0)
542 // We have to do the fallback for the lonely high surrogate
543 fallbackBuffer
.InternalFallback(highSurrogate
, ref chars
);
544 highSurrogate
= (char)0;
548 // Check for overflows.
550 throw new ArgumentOutOfRangeException("count", Environment
.GetResourceString(
551 "ArgumentOutOfRange_GetByteCountOverflow"));
553 // Shouldn't have anything in fallback buffer for GetByteCount
554 // (don't have to check m_throwOnOverflow for count)
555 Contract
.Assert(fallbackBuffer
.Remaining
== 0,
556 "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
562 [System
.Security
.SecurityCritical
] // auto-generated
563 internal override unsafe int GetBytes(char *chars
, int charCount
,
564 byte* bytes
, int byteCount
, EncoderNLS encoder
)
566 Contract
.Assert(chars
!=null, "[UTF32Encoding.GetBytes]chars!=null");
567 Contract
.Assert(bytes
!=null, "[UTF32Encoding.GetBytes]bytes!=null");
568 Contract
.Assert(byteCount
>=0, "[UTF32Encoding.GetBytes]byteCount >=0");
569 Contract
.Assert(charCount
>=0, "[UTF32Encoding.GetBytes]charCount >=0");
571 char* charStart
= chars
;
572 char* charEnd
= chars
+ charCount
;
573 byte* byteStart
= bytes
;
574 byte* byteEnd
= bytes
+ byteCount
;
576 char highSurrogate
= '\0';
578 // For fallback we may need a fallback buffer
579 EncoderFallbackBuffer fallbackBuffer
= null;
582 highSurrogate
= encoder
.charLeftOver
;
583 fallbackBuffer
= encoder
.FallbackBuffer
;
585 // We mustn't have left over fallback data when not converting
586 if (encoder
.m_throwOnOverflow
&& fallbackBuffer
.Remaining
> 0)
587 throw new ArgumentException(Environment
.GetResourceString("Argument_EncoderFallbackNotEmpty",
588 this.EncodingName
, encoder
.Fallback
.GetType()));
592 fallbackBuffer
= this.encoderFallback
.CreateFallbackBuffer();
595 // Set our internal fallback interesting things.
596 fallbackBuffer
.InternalInitialize(charStart
, charEnd
, encoder
, true);
601 while (((ch
= fallbackBuffer
.InternalGetNextChar()) != 0) || chars
< charEnd
)
603 // First unwind any fallback
606 // No fallback, just get next char
611 // Do we need a low surrogate?
612 if (highSurrogate
!= '\0')
615 // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
617 if (Char
.IsLowSurrogate(ch
))
619 // Is it a legal one?
620 uint iTemp
= GetSurrogate(highSurrogate
, ch
);
621 highSurrogate
= '\0';
624 // One surrogate pair will be translated into 4 bytes UTF32.
626 if (bytes
+3 >= byteEnd
)
628 // Don't have 4 bytes
629 if (fallbackBuffer
.bFallingBack
)
631 fallbackBuffer
.MovePrevious(); // Aren't using these 2 fallback chars
632 fallbackBuffer
.MovePrevious();
636 // If we don't have enough room, then either we should've advanced a while
637 // or we should have bytes==byteStart and throw below
638 Contract
.Assert(chars
> charStart
+ 1 || bytes
== byteStart
,
639 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
640 chars
-=2; // Aren't using those 2 chars
642 ThrowBytesOverflow(encoder
, bytes
== byteStart
); // Throw maybe (if no bytes written)
643 highSurrogate
= (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
649 *(bytes
++) = (byte)(0x00);
650 *(bytes
++) = (byte)(iTemp
>> 16); // Implies & 0xFF, which isn't needed cause high are all 0
651 *(bytes
++) = (byte)(iTemp
>> 8); // Implies & 0xFF
652 *(bytes
++) = (byte)(iTemp
); // Implies & 0xFF
656 *(bytes
++) = (byte)(iTemp
); // Implies & 0xFF
657 *(bytes
++) = (byte)(iTemp
>> 8); // Implies & 0xFF
658 *(bytes
++) = (byte)(iTemp
>> 16); // Implies & 0xFF, which isn't needed cause high are all 0
659 *(bytes
++) = (byte)(0x00);
664 // We are missing our low surrogate, decrement chars and fallback the high surrogate
665 // The high surrogate may have come from the encoder, but nothing else did.
666 Contract
.Assert(chars
> charStart
,
667 "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
671 fallbackBuffer
.InternalFallback(highSurrogate
, ref chars
);
673 // We're going to fallback the old high surrogate.
674 highSurrogate
= '\0';
678 // Do we have another high surrogate?, if so remember it
679 if (Char
.IsHighSurrogate(ch
))
682 // We'll have a high surrogate to check next time.
688 // Check for illegal characters (low surrogate)
689 if (Char
.IsLowSurrogate(ch
))
691 // We have a leading low surrogate, do the fallback
692 fallbackBuffer
.InternalFallback(ch
, ref chars
);
694 // Try again with fallback buffer
698 // We get to add the character, yippee.
699 if (bytes
+3 >= byteEnd
)
701 // Don't have 4 bytes
702 if (fallbackBuffer
.bFallingBack
)
703 fallbackBuffer
.MovePrevious(); // Aren't using this fallback char
706 // Must've advanced already
707 Contract
.Assert(chars
> charStart
,
708 "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
709 chars
--; // Aren't using this char
711 ThrowBytesOverflow(encoder
, bytes
== byteStart
); // Throw maybe (if no bytes written)
712 break; // Didn't throw, stop
717 *(bytes
++) = (byte)(0x00);
718 *(bytes
++) = (byte)(0x00);
719 *(bytes
++) = (byte)((uint)ch
>> 8); // Implies & 0xFF
720 *(bytes
++) = (byte)(ch
); // Implies & 0xFF
724 *(bytes
++) = (byte)(ch
); // Implies & 0xFF
725 *(bytes
++) = (byte)((uint)ch
>> 8); // Implies & 0xFF
726 *(bytes
++) = (byte)(0x00);
727 *(bytes
++) = (byte)(0x00);
731 // May have to do our last surrogate
732 if ((encoder
== null || encoder
.MustFlush
) && highSurrogate
> 0)
734 // We have to do the fallback for the lonely high surrogate
735 fallbackBuffer
.InternalFallback(highSurrogate
, ref chars
);
736 highSurrogate
= (char)0;
740 // Fix our encoder if we have one
741 Contract
.Assert(highSurrogate
== 0 || (encoder
!= null && !encoder
.MustFlush
),
742 "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
746 // Remember our left over surrogate (or 0 if flushing)
747 encoder
.charLeftOver
= highSurrogate
;
750 encoder
.m_charsUsed
= (int)(chars
-charStart
);
753 // return the new length
754 return (int)(bytes
- byteStart
);
757 [System
.Security
.SecurityCritical
] // auto-generated
758 internal override unsafe int GetCharCount(byte* bytes
, int count
, DecoderNLS baseDecoder
)
760 Contract
.Assert(bytes
!=null, "[UTF32Encoding.GetCharCount]bytes!=null");
761 Contract
.Assert(count
>=0, "[UTF32Encoding.GetCharCount]count >=0");
763 UTF32Decoder decoder
= (UTF32Decoder
)baseDecoder
;
767 byte* end
= bytes
+ count
;
768 byte* byteStart
= bytes
;
774 // For fallback we may need a fallback buffer
775 DecoderFallbackBuffer fallbackBuffer
= null;
777 // See if there's anything in our decoder
780 readCount
= decoder
.readByteCount
;
781 iChar
= (uint)decoder
.iChar
;
782 fallbackBuffer
= decoder
.FallbackBuffer
;
784 // Shouldn't have anything in fallback buffer for GetCharCount
785 // (don't have to check m_throwOnOverflow for chars or count)
786 Contract
.Assert(fallbackBuffer
.Remaining
== 0,
787 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
791 fallbackBuffer
= this.decoderFallback
.CreateFallbackBuffer();
794 // Set our internal fallback interesting things.
795 fallbackBuffer
.InternalInitialize(byteStart
, null);
797 // Loop through our input, 4 characters at a time!
798 while (bytes
< end
&& charCount
>= 0)
800 // Get our next character
803 // Scoot left and add it to the bottom
809 // Scoot right and add it to the top
811 iChar
+= (uint)(*(bytes
++)) << 24;
816 // See if we have all the bytes yet
823 // See if its valid to encode
824 if ( iChar
> 0x10FFFF || (iChar
>= 0xD800 && iChar
<= 0xDFFF))
826 // Need to fall back these 4 bytes
827 byte[] fallbackBytes
;
830 fallbackBytes
= new byte[] {
831 unchecked((byte)(iChar
>>24)), unchecked((byte)(iChar
>>16)),
832 unchecked((byte)(iChar
>>8)), unchecked((byte)(iChar
)) };
836 fallbackBytes
= new byte[] {
837 unchecked((byte)(iChar
)), unchecked((byte)(iChar
>>8)),
838 unchecked((byte)(iChar
>>16)), unchecked((byte)(iChar
>>24)) };
841 charCount
+= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
);
843 // Ignore the illegal character
848 // Ok, we have something we can add to our output
849 if (iChar
>= 0x10000)
855 // Add the rest of the surrogate or our normal character
858 // iChar is back to 0
862 // See if we have something left over that has to be decoded
863 if (readCount
> 0 && (decoder
== null || decoder
.MustFlush
))
865 // Oops, there's something left over with no place to go.
866 byte[] fallbackBytes
= new byte[readCount
];
871 fallbackBytes
[--readCount
] = unchecked((byte)iChar
);
877 while (readCount
> 0)
879 fallbackBytes
[--readCount
] = unchecked((byte)(iChar
>>24));
884 charCount
+= fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
);
887 // Check for overflows.
889 throw new ArgumentOutOfRangeException("count", Environment
.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
891 // Shouldn't have anything in fallback buffer for GetCharCount
892 // (don't have to check m_throwOnOverflow for chars or count)
893 Contract
.Assert(fallbackBuffer
.Remaining
== 0,
894 "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
900 [System
.Security
.SecurityCritical
] // auto-generated
901 internal override unsafe int GetChars(byte* bytes
, int byteCount
,
902 char* chars
, int charCount
, DecoderNLS baseDecoder
)
904 Contract
.Assert(chars
!=null, "[UTF32Encoding.GetChars]chars!=null");
905 Contract
.Assert(bytes
!=null, "[UTF32Encoding.GetChars]bytes!=null");
906 Contract
.Assert(byteCount
>=0, "[UTF32Encoding.GetChars]byteCount >=0");
907 Contract
.Assert(charCount
>=0, "[UTF32Encoding.GetChars]charCount >=0");
909 UTF32Decoder decoder
= (UTF32Decoder
)baseDecoder
;
912 char* charStart
= chars
;
913 char* charEnd
= chars
+ charCount
;
915 byte* byteStart
= bytes
;
916 byte* byteEnd
= bytes
+ byteCount
;
918 // See if there's anything in our decoder (but don't clear it yet)
922 // For fallback we may need a fallback buffer
923 DecoderFallbackBuffer fallbackBuffer
= null;
925 // See if there's anything in our decoder
928 readCount
= decoder
.readByteCount
;
929 iChar
= (uint)decoder
.iChar
;
930 fallbackBuffer
= baseDecoder
.FallbackBuffer
;
932 // Shouldn't have anything in fallback buffer for GetChars
933 // (don't have to check m_throwOnOverflow for chars)
934 Contract
.Assert(fallbackBuffer
.Remaining
== 0,
935 "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
939 fallbackBuffer
= this.decoderFallback
.CreateFallbackBuffer();
942 // Set our internal fallback interesting things.
943 fallbackBuffer
.InternalInitialize(bytes
, chars
+ charCount
);
945 // Loop through our input, 4 characters at a time!
946 while (bytes
< byteEnd
)
948 // Get our next character
951 // Scoot left and add it to the bottom
957 // Scoot right and add it to the top
959 iChar
+= (uint)(*(bytes
++)) << 24;
964 // See if we have all the bytes yet
971 // See if its valid to encode
972 if ( iChar
> 0x10FFFF || (iChar
>= 0xD800 && iChar
<= 0xDFFF))
974 // Need to fall back these 4 bytes
975 byte[] fallbackBytes
;
978 fallbackBytes
= new byte[] {
979 unchecked((byte)(iChar
>>24)), unchecked((byte)(iChar
>>16)),
980 unchecked((byte)(iChar
>>8)), unchecked((byte)(iChar
)) };
984 fallbackBytes
= new byte[] {
985 unchecked((byte)(iChar
)), unchecked((byte)(iChar
>>8)),
986 unchecked((byte)(iChar
>>16)), unchecked((byte)(iChar
>>24)) };
989 // Chars won't be updated unless this works.
990 if (!fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
, ref chars
))
992 // Couldn't fallback, throw or wait til next time
993 // We either read enough bytes for bytes-=4 to work, or we're
994 // going to throw in ThrowCharsOverflow because chars == charStart
995 Contract
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
996 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
997 bytes
-=4; // get back to where we were
998 iChar
=0; // Remembering nothing
999 fallbackBuffer
.InternalReset();
1000 ThrowCharsOverflow(decoder
, chars
== charStart
);// Might throw, if no chars output
1001 break; // Stop here, didn't throw
1004 // Ignore the illegal character
1010 // Ok, we have something we can add to our output
1011 if (iChar
>= 0x10000)
1013 // Surrogates take 2
1014 if (chars
>= charEnd
- 1)
1016 // Throwing or stopping
1017 // We either read enough bytes for bytes-=4 to work, or we're
1018 // going to throw in ThrowCharsOverflow because chars == charStart
1019 Contract
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
1020 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
1021 bytes
-=4; // get back to where we were
1022 iChar
=0; // Remembering nothing
1023 ThrowCharsOverflow(decoder
, chars
== charStart
);// Might throw, if no chars output
1024 break; // Stop here, didn't throw
1027 *(chars
++) = GetHighSurrogate(iChar
);
1028 iChar
= GetLowSurrogate(iChar
);
1030 // Bounds check for normal character
1031 else if (chars
>= charEnd
)
1033 // Throwing or stopping
1034 // We either read enough bytes for bytes-=4 to work, or we're
1035 // going to throw in ThrowCharsOverflow because chars == charStart
1036 Contract
.Assert(bytes
>= byteStart
+ 4 || chars
== charStart
,
1037 "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
1038 bytes
-=4; // get back to where we were
1039 iChar
=0; // Remembering nothing
1040 ThrowCharsOverflow(decoder
, chars
== charStart
);// Might throw, if no chars output
1041 break; // Stop here, didn't throw
1044 // Add the rest of the surrogate or our normal character
1045 *(chars
++) = (char)iChar
;
1047 // iChar is back to 0
1051 // See if we have something left over that has to be decoded
1052 if (readCount
> 0 && (decoder
== null || decoder
.MustFlush
))
1054 // Oops, there's something left over with no place to go.
1055 byte[] fallbackBytes
= new byte[readCount
];
1056 int tempCount
= readCount
;
1059 while(tempCount
> 0)
1061 fallbackBytes
[--tempCount
] = unchecked((byte)iChar
);
1067 while (tempCount
> 0)
1069 fallbackBytes
[--tempCount
] = unchecked((byte)(iChar
>>24));
1074 if (!fallbackBuffer
.InternalFallback(fallbackBytes
, bytes
, ref chars
))
1076 // Couldn't fallback.
1077 fallbackBuffer
.InternalReset();
1078 ThrowCharsOverflow(decoder
, chars
== charStart
);// Might throw, if no chars output
1079 // Stop here, didn't throw, backed up, so still nothing in buffer
1083 // Don't clear our decoder unless we could fall it back.
1084 // If we caught the if above, then we're a convert() and will catch this next time.
1090 // Remember any left over stuff, clearing buffer as well for MustFlush
1091 if (decoder
!= null)
1093 decoder
.iChar
= (int)iChar
;
1094 decoder
.readByteCount
= readCount
;
1095 decoder
.m_bytesUsed
= (int)(bytes
- byteStart
);
1098 // Shouldn't have anything in fallback buffer for GetChars
1099 // (don't have to check m_throwOnOverflow for chars)
1100 Contract
.Assert(fallbackBuffer
.Remaining
== 0,
1101 "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
1104 return (int)(chars
- charStart
);
1108 private uint GetSurrogate(char cHigh
, char cLow
)
1110 return (((uint)cHigh
- 0xD800) * 0x400) + ((uint)cLow
- 0xDC00) + 0x10000;
1113 private char GetHighSurrogate(uint iChar
)
1115 return (char)((iChar
- 0x10000) / 0x400 + 0xD800);
1118 private char GetLowSurrogate(uint iChar
)
1120 return (char)((iChar
- 0x10000) % 0x400 + 0xDC00);
1124 public override Decoder
GetDecoder()
1126 return new UTF32Decoder(this);
1130 public override Encoder
GetEncoder()
1132 return new EncoderNLS(this);
1136 public override int GetMaxByteCount(int charCount
)
1139 throw new ArgumentOutOfRangeException("charCount",
1140 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1141 Contract
.EndContractBlock();
1143 // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1144 long byteCount
= (long)charCount
+ 1;
1146 if (EncoderFallback
.MaxCharCount
> 1)
1147 byteCount
*= EncoderFallback
.MaxCharCount
;
1152 if (byteCount
> 0x7fffffff)
1153 throw new ArgumentOutOfRangeException("charCount", Environment
.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
1155 return (int)byteCount
;
1159 public override int GetMaxCharCount(int byteCount
)
1162 throw new ArgumentOutOfRangeException("byteCount",
1163 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1164 Contract
.EndContractBlock();
1166 // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
1167 // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
1168 // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
1169 int charCount
= (byteCount
/ 2) + 2;
1171 // Also consider fallback because our input bytes could be out of range of unicode.
1172 // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
1173 if (DecoderFallback
.MaxCharCount
> 2)
1175 // Multiply time fallback size
1176 charCount
*= DecoderFallback
.MaxCharCount
;
1178 // We were already figuring 2 chars per 4 bytes, but fallback will be different #
1182 if (charCount
> 0x7fffffff)
1183 throw new ArgumentOutOfRangeException("byteCount", Environment
.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
1185 return (int)charCount
;
1189 public override byte[] GetPreamble()
1191 if (emitUTF32ByteOrderMark
)
1193 // Allocate new array to prevent users from modifying it.
1196 return new byte[4] { 0x00, 0x00, 0xFE, 0xFF }
;
1200 return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }
; // 00 00 FE FF
1204 return EmptyArray
<Byte
>.Value
;
1208 public override bool Equals(Object
value)
1210 UTF32Encoding that
= value as UTF32Encoding
;
1213 return (emitUTF32ByteOrderMark
== that
.emitUTF32ByteOrderMark
) &&
1214 (bigEndian
== that
.bigEndian
) &&
1215 // (isThrowException == that.isThrowException) && // same as encoder/decoderfallback being exceptions
1216 (EncoderFallback
.Equals(that
.EncoderFallback
)) &&
1217 (DecoderFallback
.Equals(that
.DecoderFallback
));
1223 public override int GetHashCode()
1225 //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
1226 return this.EncoderFallback
.GetHashCode() + this.DecoderFallback
.GetHashCode() +
1227 CodePage
+ (emitUTF32ByteOrderMark
?4:0) + (bigEndian
?8:0);
1231 internal class UTF32Decoder
: DecoderNLS
1233 // Need a place to store any extra bytes we may have picked up
1234 internal int iChar
= 0;
1235 internal int readByteCount
= 0;
1237 public UTF32Decoder(UTF32Encoding encoding
) : base(encoding
)
1242 public override void Reset()
1245 this.readByteCount
= 0;
1246 if (m_fallbackBuffer
!= null)
1247 m_fallbackBuffer
.Reset();
1250 // Anything left in our decoder?
1251 internal override bool HasState
1255 // ReadByteCount is our flag. (iChar==0 doesn't mean much).
1256 return (this.readByteCount
!= 0);
1263 #endif // FEATURE_UTF32