1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
10 using System
.Diagnostics
;
11 using System
.Runtime
.InteropServices
;
15 public class UTF7Encoding
: Encoding
17 private const string base64Chars
=
18 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
19 // 0123456789111111111122222222223333333333444444444455555555556666
20 // 012345678901234567890123456789012345678901234567890123
22 // These are the characters that can be directly encoded in UTF7.
23 private const string directChars
=
24 "\t\n\r '(),-./0123456789:?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
26 // These are the characters that can be optionally directly encoded in UTF7.
27 private const string optionalChars
=
28 "!\"#$%&*;<=>@[]^_`{|}";
30 // Used by Encoding.UTF7 for lazy initialization
31 // The initialization code will not be run until a static member of the class is referenced
32 internal static readonly UTF7Encoding s_default
= new UTF7Encoding();
34 // The set of base 64 characters.
35 private byte[] _base64Bytes
= null!;
36 // The decoded bits for every base64 values. This array has a size of 128 elements.
37 // The index is the code point value of the base 64 characters. The value is -1 if
38 // the code point is not a valid base 64 character. Otherwise, the value is a value
40 private sbyte[] _base64Values
= null!;
41 // The array to decide if a Unicode code point below 0x80 can be directly encoded in UTF7.
42 // This array has a size of 128.
43 private bool[] _directEncode
= null!;
45 private readonly bool _allowOptionals
;
47 private const int UTF7_CODEPAGE
= 65000;
55 public UTF7Encoding(bool allowOptionals
)
56 : base(UTF7_CODEPAGE
) //Set the data item.
58 // Allowing optionals?
59 _allowOptionals
= allowOptionals
;
65 private void MakeTables()
68 _base64Bytes
= new byte[64];
69 for (int i
= 0; i
< 64; i
++) _base64Bytes
[i
] = (byte)base64Chars
[i
];
70 _base64Values
= new sbyte[128];
71 for (int i
= 0; i
< 128; i
++) _base64Values
[i
] = -1;
72 for (int i
= 0; i
< 64; i
++) _base64Values
[_base64Bytes
[i
]] = (sbyte)i
;
73 _directEncode
= new bool[128];
74 int count
= directChars
.Length
;
75 for (int i
= 0; i
< count
; i
++)
77 _directEncode
[directChars
[i
]] = true;
82 count
= optionalChars
.Length
;
83 for (int i
= 0; i
< count
; i
++)
85 _directEncode
[optionalChars
[i
]] = true;
90 // We go ahead and set this because Encoding expects it, however nothing can fall back in UTF7.
91 internal sealed override void SetDefaultFallbacks()
93 // UTF7 had an odd decoderFallback behavior, and the Encoder fallback
94 // is irrelevant because we encode surrogates individually and never check for unmatched ones
95 // (so nothing can fallback during encoding)
96 this.encoderFallback
= new EncoderReplacementFallback(string.Empty
);
97 this.decoderFallback
= new DecoderUTF7Fallback();
100 public override bool Equals(object? value)
102 if (value is UTF7Encoding that
)
104 return (_allowOptionals
== that
._allowOptionals
) &&
105 (EncoderFallback
.Equals(that
.EncoderFallback
)) &&
106 (DecoderFallback
.Equals(that
.DecoderFallback
));
111 // Compared to all the other encodings, variations of UTF7 are unlikely
113 public override int GetHashCode()
115 return this.CodePage
+ this.EncoderFallback
.GetHashCode() + this.DecoderFallback
.GetHashCode();
118 // The following methods are copied from EncodingNLS.cs.
119 // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
120 // These should be kept in sync for the following classes:
121 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
123 // Returns the number of bytes required to encode a range of characters in
124 // a character array.
126 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
127 // So if you fix this, fix the others. Currently those include:
128 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
129 // parent method is safe
131 public override unsafe int GetByteCount(char[] chars
, int index
, int count
)
133 // Validate input parameters
135 throw new ArgumentNullException(nameof(chars
), SR
.ArgumentNull_Array
);
137 if (index
< 0 || count
< 0)
138 throw new ArgumentOutOfRangeException((index
< 0 ? nameof(index
) : nameof(count
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
140 if (chars
.Length
- index
< count
)
141 throw new ArgumentOutOfRangeException(nameof(chars
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
143 // If no input, return 0, avoid fixed empty array problem
147 // Just call the pointer version
148 fixed (char* pChars
= chars
)
149 return GetByteCount(pChars
+ index
, count
, null);
152 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
153 // So if you fix this, fix the others. Currently those include:
154 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
155 // parent method is safe
157 public override unsafe int GetByteCount(string s
)
161 throw new ArgumentNullException(nameof(s
));
163 fixed (char* pChars
= s
)
164 return GetByteCount(pChars
, s
.Length
, null);
167 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
168 // So if you fix this, fix the others. Currently those include:
169 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
171 [CLSCompliant(false)]
172 public override unsafe int GetByteCount(char* chars
, int count
)
174 // Validate Parameters
176 throw new ArgumentNullException(nameof(chars
), SR
.ArgumentNull_Array
);
179 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
181 // Call it with empty encoder
182 return GetByteCount(chars
, count
, null);
185 // Parent method is safe.
186 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
187 // So if you fix this, fix the others. Currently those include:
188 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
190 public override unsafe int GetBytes(string s
, int charIndex
, int charCount
,
191 byte[] bytes
, int byteIndex
)
193 if (s
== null || bytes
== null)
194 throw new ArgumentNullException((s
== null ? nameof(s
) : nameof(bytes
)), SR
.ArgumentNull_Array
);
196 if (charIndex
< 0 || charCount
< 0)
197 throw new ArgumentOutOfRangeException((charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
199 if (s
.Length
- charIndex
< charCount
)
200 throw new ArgumentOutOfRangeException(nameof(s
), SR
.ArgumentOutOfRange_IndexCount
);
202 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
203 throw new ArgumentOutOfRangeException(nameof(byteIndex
), SR
.ArgumentOutOfRange_Index
);
205 int byteCount
= bytes
.Length
- byteIndex
;
207 fixed (char* pChars
= s
) fixed (byte* pBytes
= &MemoryMarshal
.GetReference((Span
<byte>)bytes
))
208 return GetBytes(pChars
+ charIndex
, charCount
, pBytes
+ byteIndex
, byteCount
, null);
211 // Encodes a range of characters in a character array into a range of bytes
212 // in a byte array. An exception occurs if the byte array is not large
213 // enough to hold the complete encoding of the characters. The
214 // GetByteCount method can be used to determine the exact number of
215 // bytes that will be produced for a given range of characters.
216 // Alternatively, the GetMaxByteCount method can be used to
217 // determine the maximum number of bytes that will be produced for a given
218 // number of characters, regardless of the actual character values.
220 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
221 // So if you fix this, fix the others. Currently those include:
222 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
223 // parent method is safe
225 public override unsafe int GetBytes(char[] chars
, int charIndex
, int charCount
,
226 byte[] bytes
, int byteIndex
)
228 // Validate parameters
229 if (chars
== null || bytes
== null)
230 throw new ArgumentNullException((chars
== null ? nameof(chars
) : nameof(bytes
)), SR
.ArgumentNull_Array
);
232 if (charIndex
< 0 || charCount
< 0)
233 throw new ArgumentOutOfRangeException((charIndex
< 0 ? nameof(charIndex
) : nameof(charCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
235 if (chars
.Length
- charIndex
< charCount
)
236 throw new ArgumentOutOfRangeException(nameof(chars
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
238 if (byteIndex
< 0 || byteIndex
> bytes
.Length
)
239 throw new ArgumentOutOfRangeException(nameof(byteIndex
), SR
.ArgumentOutOfRange_Index
);
241 // If nothing to encode return 0, avoid fixed problem
245 // Just call pointer version
246 int byteCount
= bytes
.Length
- byteIndex
;
248 fixed (char* pChars
= chars
) fixed (byte* pBytes
= &MemoryMarshal
.GetReference((Span
<byte>)bytes
))
249 // Remember that byteCount is # to decode, not size of array.
250 return GetBytes(pChars
+ charIndex
, charCount
, pBytes
+ byteIndex
, byteCount
, null);
253 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
254 // So if you fix this, fix the others. Currently those include:
255 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
257 [CLSCompliant(false)]
258 public override unsafe int GetBytes(char* chars
, int charCount
, byte* bytes
, int byteCount
)
260 // Validate Parameters
261 if (bytes
== null || chars
== null)
262 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
264 if (charCount
< 0 || byteCount
< 0)
265 throw new ArgumentOutOfRangeException((charCount
< 0 ? nameof(charCount
) : nameof(byteCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
267 return GetBytes(chars
, charCount
, bytes
, byteCount
, null);
270 // Returns the number of characters produced by decoding a range of bytes
273 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
274 // So if you fix this, fix the others. Currently those include:
275 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
276 // parent method is safe
278 public override unsafe int GetCharCount(byte[] bytes
, int index
, int count
)
280 // Validate Parameters
282 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
284 if (index
< 0 || count
< 0)
285 throw new ArgumentOutOfRangeException((index
< 0 ? nameof(index
) : nameof(count
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
287 if (bytes
.Length
- index
< count
)
288 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
290 // If no input just return 0, fixed doesn't like 0 length arrays.
294 // Just call pointer version
295 fixed (byte* pBytes
= bytes
)
296 return GetCharCount(pBytes
+ index
, count
, null);
299 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
300 // So if you fix this, fix the others. Currently those include:
301 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
303 [CLSCompliant(false)]
304 public override unsafe int GetCharCount(byte* bytes
, int count
)
306 // Validate Parameters
308 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
311 throw new ArgumentOutOfRangeException(nameof(count
), SR
.ArgumentOutOfRange_NeedNonNegNum
);
313 return GetCharCount(bytes
, count
, null);
316 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
317 // So if you fix this, fix the others. Currently those include:
318 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
319 // parent method is safe
321 public override unsafe int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
322 char[] chars
, int charIndex
)
324 // Validate Parameters
325 if (bytes
== null || chars
== null)
326 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
328 if (byteIndex
< 0 || byteCount
< 0)
329 throw new ArgumentOutOfRangeException((byteIndex
< 0 ? nameof(byteIndex
) : nameof(byteCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
331 if ( bytes
.Length
- byteIndex
< byteCount
)
332 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
334 if (charIndex
< 0 || charIndex
> chars
.Length
)
335 throw new ArgumentOutOfRangeException(nameof(charIndex
), SR
.ArgumentOutOfRange_Index
);
337 // If no input, return 0 & avoid fixed problem
341 // Just call pointer version
342 int charCount
= chars
.Length
- charIndex
;
344 fixed (byte* pBytes
= bytes
) fixed (char* pChars
= &MemoryMarshal
.GetReference((Span
<char>)chars
))
345 // Remember that charCount is # to decode, not size of array
346 return GetChars(pBytes
+ byteIndex
, byteCount
, pChars
+ charIndex
, charCount
, null);
349 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
350 // So if you fix this, fix the others. Currently those include:
351 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
353 [CLSCompliant(false)]
354 public override unsafe int GetChars(byte* bytes
, int byteCount
, char* chars
, int charCount
)
356 // Validate Parameters
357 if (bytes
== null || chars
== null)
358 throw new ArgumentNullException(bytes
== null ? nameof(bytes
) : nameof(chars
), SR
.ArgumentNull_Array
);
360 if (charCount
< 0 || byteCount
< 0)
361 throw new ArgumentOutOfRangeException((charCount
< 0 ? nameof(charCount
) : nameof(byteCount
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
363 return GetChars(bytes
, byteCount
, chars
, charCount
, null);
366 // Returns a string containing the decoded representation of a range of
367 // bytes in a byte array.
369 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
370 // So if you fix this, fix the others. Currently those include:
371 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
372 // parent method is safe
374 public override unsafe string GetString(byte[] bytes
, int index
, int count
)
376 // Validate Parameters
378 throw new ArgumentNullException(nameof(bytes
), SR
.ArgumentNull_Array
);
380 if (index
< 0 || count
< 0)
381 throw new ArgumentOutOfRangeException((index
< 0 ? nameof(index
) : nameof(count
)), SR
.ArgumentOutOfRange_NeedNonNegNum
);
383 if (bytes
.Length
- index
< count
)
384 throw new ArgumentOutOfRangeException(nameof(bytes
), SR
.ArgumentOutOfRange_IndexCountBuffer
);
386 // Avoid problems with empty input buffer
387 if (count
== 0) return string.Empty
;
389 fixed (byte* pBytes
= bytes
)
390 return string.CreateStringFromEncoding(
391 pBytes
+ index
, count
, this);
395 // End of standard methods copied from EncodingNLS.cs
397 internal sealed override unsafe int GetByteCount(char* chars
, int count
, EncoderNLS
? baseEncoder
)
399 Debug
.Assert(chars
!= null, "[UTF7Encoding.GetByteCount]chars!=null");
400 Debug
.Assert(count
>= 0, "[UTF7Encoding.GetByteCount]count >=0");
402 // Just call GetBytes with bytes == null
403 return GetBytes(chars
, count
, null, 0, baseEncoder
);
406 internal sealed override unsafe int GetBytes(
407 char* chars
, int charCount
, byte* bytes
, int byteCount
, EncoderNLS
? baseEncoder
)
409 Debug
.Assert(byteCount
>= 0, "[UTF7Encoding.GetBytes]byteCount >=0");
410 Debug
.Assert(chars
!= null, "[UTF7Encoding.GetBytes]chars!=null");
411 Debug
.Assert(charCount
>= 0, "[UTF7Encoding.GetBytes]charCount >=0");
414 UTF7Encoding
.Encoder
? encoder
= (UTF7Encoding
.Encoder
?)baseEncoder
;
416 // Default bits & count
420 // prepare our helpers
421 Encoding
.EncodingByteBuffer buffer
= new Encoding
.EncodingByteBuffer(
422 this, encoder
, bytes
, byteCount
, chars
, charCount
);
427 bitCount
= encoder
.bitCount
;
429 // May have had too many left over
430 while (bitCount
>= 6)
433 // If we fail we'll never really have enough room
434 if (!buffer
.AddByte(_base64Bytes
[(bits
>> bitCount
) & 0x3F]))
435 ThrowBytesOverflow(encoder
, buffer
.Count
== 0);
439 while (buffer
.MoreData
)
441 char currentChar
= buffer
.GetNextChar();
443 if (currentChar
< 0x80 && _directEncode
[currentChar
])
449 // Try to add the next byte
450 if (!buffer
.AddByte(_base64Bytes
[bits
<< 6 - bitCount
& 0x3F]))
451 break; // Stop here, didn't throw
456 // Need to get emit '-' and our char, 2 bytes total
457 if (!buffer
.AddByte((byte)'-'))
458 break; // Stop here, didn't throw
463 // Need to emit our char
464 if (!buffer
.AddByte((byte)currentChar
))
465 break; // Stop here, didn't throw
467 else if (bitCount
< 0 && currentChar
== '+')
469 if (!buffer
.AddByte((byte)'+', (byte)'-'))
470 break; // Stop here, didn't throw
476 // Need to emit a + and 12 bits (3 bytes)
477 // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time
478 if (!buffer
.AddByte((byte)'+'))
479 break; // Stop here, didn't throw
481 // We're now in bit mode, but haven't stored data yet
486 bits
= bits
<< 16 | currentChar
;
489 while (bitCount
>= 6)
492 if (!buffer
.AddByte(_base64Bytes
[(bits
>> bitCount
) & 0x3F]))
494 bitCount
+= 6; // We didn't use these bits
495 buffer
.GetNextChar(); // We're processing this char still, but AddByte
496 // --'d it when we ran out of space
497 break; // Stop here, not enough room for bytes
502 break; // Didn't have room to encode enough bits
506 // Now if we have bits left over we have to encode them.
507 // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting
508 if (bitCount
>= 0 && (encoder
== null || encoder
.MustFlush
))
510 // Do we have bits we have to stick in?
513 if (buffer
.AddByte(_base64Bytes
[(bits
<< (6 - bitCount
)) & 0x3F]))
515 // Emitted spare bits, 0 bits left
520 // If converting and failed bitCount above, then we'll fail this too
521 if (buffer
.AddByte((byte)'-'))
523 // turned off bit mode';
528 // If not successful, convert will maintain state for next time, also
529 // AddByte will have decremented our char count, however we need it to remain the same
530 buffer
.GetNextChar();
533 // Do we have an encoder we're allowed to use?
534 // bytes == null if counting, so don't use encoder then
535 if (bytes
!= null && encoder
!= null)
537 // We already cleared bits & bitcount for mustflush case
539 encoder
.bitCount
= bitCount
;
540 encoder
._charsUsed
= buffer
.CharsUsed
;
546 internal sealed override unsafe int GetCharCount(byte* bytes
, int count
, DecoderNLS
? baseDecoder
)
548 Debug
.Assert(count
>= 0, "[UTF7Encoding.GetCharCount]count >=0");
549 Debug
.Assert(bytes
!= null, "[UTF7Encoding.GetCharCount]bytes!=null");
551 // Just call GetChars with null char* to do counting
552 return GetChars(bytes
, count
, null, 0, baseDecoder
);
555 internal sealed override unsafe int GetChars(
556 byte* bytes
, int byteCount
, char* chars
, int charCount
, DecoderNLS
? baseDecoder
)
558 Debug
.Assert(byteCount
>= 0, "[UTF7Encoding.GetChars]byteCount >=0");
559 Debug
.Assert(bytes
!= null, "[UTF7Encoding.GetChars]bytes!=null");
560 Debug
.Assert(charCount
>= 0, "[UTF7Encoding.GetChars]charCount >=0");
562 // Might use a decoder
563 UTF7Encoding
.Decoder
? decoder
= (UTF7Encoding
.Decoder
?)baseDecoder
;
565 // Get our output buffer info.
566 Encoding
.EncodingCharBuffer buffer
= new Encoding
.EncodingCharBuffer(
567 this, decoder
, chars
, charCount
, bytes
, byteCount
);
572 bool firstByte
= false;
576 bitCount
= decoder
.bitCount
;
577 firstByte
= decoder
.firstByte
;
579 Debug
.Assert(firstByte
== false || decoder
.bitCount
<= 0,
580 "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
583 // We may have had bits in the decoder that we couldn't output last time, so do so now
586 // Check our decoder buffer
587 if (!buffer
.AddChar((char)((bits
>> (bitCount
- 16)) & 0xFFFF)))
588 ThrowCharsOverflow(decoder
, true); // Always throw, they need at least 1 char even in Convert
590 // Used this one, clean up extra bits
594 // Loop through the input
595 while (buffer
.MoreData
)
597 byte currentByte
= buffer
.GetNextByte();
603 // Modified base 64 encoding.
606 if (currentByte
< 0x80 && ((v
= _base64Values
[currentByte
]) >= 0))
609 bits
= (bits
<< 6) | ((byte)v
);
613 c
= (bits
>> (bitCount
- 16)) & 0xFFFF;
616 // If not enough bits just continue
621 // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
624 if (currentByte
!= '-')
626 // >= 0x80 (because of 1st if statemtn)
627 // We need this check since the _base64Values[b] check below need b <= 0x7f.
628 // This is not a valid base 64 byte. Terminate the shifted-sequence and
631 // not in base 64 table
632 // According to the RFC 1642 and the example code of UTF-7
633 // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte
635 // Chars won't be updated unless this works, try to fallback
636 if (!buffer
.Fallback(currentByte
))
637 break; // Stop here, didn't throw
639 // Used that byte, we're done with it
644 // The encoding for '+' is "+-".
646 if (firstByte
) c
= '+';
647 // We just turn it off if not emitting a +, so we're done.
651 // End of modified base 64 encoding block.
654 else if (currentByte
== '+')
657 // Found the start of a modified base 64 encoding block or a plus sign.
666 if (currentByte
>= 0x80)
669 if (!buffer
.Fallback(currentByte
))
670 break; // Stop here, didn't throw
676 // Use the normal character
683 if (!buffer
.AddChar((char)c
))
685 // No room. If it was a plain char we'll try again later.
686 // Note, we'll consume this byte and stick it in decoder, even if we can't output it
687 if (bitCount
>= 0) // Can we rememmber this byte (char)
689 buffer
.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed
690 bitCount
+= 16; // We'll still need that char we have in our bits
692 break; // didn't throw, stop
697 // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
698 if (chars
!= null && decoder
!= null)
700 // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
701 if (decoder
.MustFlush
)
703 // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
705 decoder
.bitCount
= -1;
706 decoder
.firstByte
= false;
711 decoder
.bitCount
= bitCount
;
712 decoder
.firstByte
= firstByte
;
714 decoder
._bytesUsed
= buffer
.BytesUsed
;
716 // else ignore any hanging bits.
723 public override System
.Text
.Decoder
GetDecoder()
725 return new UTF7Encoding
.Decoder(this);
729 public override System
.Text
.Encoder
GetEncoder()
731 return new UTF7Encoding
.Encoder(this);
735 public override int GetMaxByteCount(int charCount
)
738 throw new ArgumentOutOfRangeException(nameof(charCount
),
739 SR
.ArgumentOutOfRange_NeedNonNegNum
);
741 // Suppose that every char can not be direct-encoded, we know that
742 // a byte can encode 6 bits of the Unicode character. And we will
743 // also need two extra bytes for the shift-in ('+') and shift-out ('-') mark.
744 // Therefore, the max byte should be:
745 // byteCount = 2 + Math.Ceiling((double)charCount * 16 / 6);
746 // That is always <= 2 + 3 * charCount;
747 // Longest case is alternating encoded, direct, encoded data for 5 + 1 + 5... bytes per char.
748 // UTF7 doesn't have left over surrogates, but if no input we may need an output - to turn off
749 // encoding if MustFlush is true.
751 // Its easiest to think of this as 2 bytes to turn on/off the base64 mode, then 3 bytes per char.
752 // 3 bytes is 18 bits of encoding, which is more than we need, but if its direct encoded then 3
753 // bytes allows us to turn off and then back on base64 mode if necessary.
755 // Note that UTF7 encoded surrogates individually and isn't worried about mismatches, so all
756 // code points are encodable int UTF7.
757 long byteCount
= (long)charCount
* 3 + 2;
759 // check for overflow
760 if (byteCount
> 0x7fffffff)
761 throw new ArgumentOutOfRangeException(nameof(charCount
), SR
.ArgumentOutOfRange_GetByteCountOverflow
);
763 return (int)byteCount
;
767 public override int GetMaxCharCount(int byteCount
)
770 throw new ArgumentOutOfRangeException(nameof(byteCount
),
771 SR
.ArgumentOutOfRange_NeedNonNegNum
);
773 // Worst case is 1 char per byte. Minimum 1 for left over bits in case decoder is being flushed
774 // Also note that we ignore extra bits (per spec), so UTF7 doesn't have unknown in this direction.
775 int charCount
= byteCount
;
776 if (charCount
== 0) charCount
= 1;
781 // Of all the amazing things... This MUST be Decoder so that our com name
782 // for System.Text.Decoder doesn't change
783 private sealed class Decoder
: DecoderNLS
788 internal int bitCount
;
790 internal bool firstByte
;
792 public Decoder(UTF7Encoding encoding
) : base(encoding
)
797 public override void Reset()
801 this.firstByte
= false;
802 if (_fallbackBuffer
!= null)
803 _fallbackBuffer
.Reset();
806 // Anything left in our encoder?
807 internal override bool HasState
=>
808 // NOTE: This forces the last -, which some encoder might not encode. If we
809 // don't see it we don't think we're done reading.
810 (this.bitCount
!= -1);
813 // Of all the amazing things... This MUST be Encoder so that our com name
814 // for System.Text.Encoder doesn't change
815 private sealed class Encoder
: EncoderNLS
820 internal int bitCount
;
822 public Encoder(UTF7Encoding encoding
) : base(encoding
)
827 public override void Reset()
831 if (_fallbackBuffer
!= null)
832 _fallbackBuffer
.Reset();
835 // Anything left in our encoder?
836 internal override bool HasState
=> this.bits
!= 0 || this.bitCount
!= -1;
839 // Preexisting UTF7 behavior for bad bytes was just to spit out the byte as the next char
840 // and turn off base64 mode if it was in that mode. We still exit the mode, but now we fallback.
841 private sealed class DecoderUTF7Fallback
: DecoderFallback
843 // Default replacement fallback uses no best fit and ? replacement string
845 public override DecoderFallbackBuffer
CreateFallbackBuffer() =>
846 new DecoderUTF7FallbackBuffer();
848 // Maximum number of characters that this instance of this fallback could return
849 public override int MaxCharCount
=> 1; // returns 1 char per bad byte
851 public override bool Equals(object? value) => value is DecoderUTF7Fallback
;
853 public override int GetHashCode() => 984;
856 private sealed class DecoderUTF7FallbackBuffer
: DecoderFallbackBuffer
858 // Store our default string
859 private char cFallback
= (char)0;
860 private int iCount
= -1;
864 public override bool Fallback(byte[] bytesUnknown
, int index
)
866 // We expect no previous fallback in our buffer
867 Debug
.Assert(iCount
< 0, "[DecoderUTF7FallbackBuffer.Fallback] Can't have recursive fallbacks");
868 Debug
.Assert(bytesUnknown
.Length
== 1, "[DecoderUTF7FallbackBuffer.Fallback] Only possible fallback case should be 1 unknown byte");
870 // Go ahead and get our fallback
871 cFallback
= (char)bytesUnknown
[0];
873 // Any of the fallback characters can be handled except for 0
884 public override char GetNextChar()
889 // Note: this means that 0 in UTF7 stream will never be emitted.
893 public override bool MovePrevious()
900 // return true if we were allowed to do this
901 return iCount
>= 0 && iCount
<= iSize
;
904 // Return # of chars left in this fallback
905 public override int Remaining
=> (iCount
> 0) ? iCount
: 0;
908 public override unsafe void Reset()
914 // This version just counts the fallback and doesn't actually copy anything.
915 internal override unsafe int InternalFallback(byte[] bytes
, byte* pBytes
)
916 // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
917 // array, and we might need the index, hence the byte*
919 // We expect no previous fallback in our buffer
920 Debug
.Assert(iCount
< 0, "[DecoderUTF7FallbackBuffer.InternalFallback] Can't have recursive fallbacks");
921 if (bytes
.Length
!= 1)
923 throw new ArgumentException(SR
.Argument_InvalidCharSequenceNoIndex
);
926 // Can't fallback a byte 0, so return for that case, 1 otherwise.
927 return bytes
[0] == 0 ? 0 : 1;