2 * UTF7Encoding.cs - Implementation of the
3 * "System.Text.UTF7Encoding" class.
5 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
6 * Copyright (c) 2003, 2004, Novell, Inc.
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
31 using System
.Runtime
.InteropServices
;
35 [MonoLimitation ("Serialization format not compatible with .NET")]
41 class UTF7Encoding
: Encoding
43 // Magic number used by Windows for UTF-7.
44 internal const int UTF7_CODE_PAGE
= 65000;
47 private bool allowOptionals
;
49 // Encoding rule table for 0x00-0x7F.
50 // 0 - full encode, 1 - direct, 2 - optional, 3 - encode plus.
51 private static readonly byte[] encodingRules
= {
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 00
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
54 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 3, 1, 1, 1, 1, // 20
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, // 30
57 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 2, 2, // 50
59 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 0, // 70
63 // Characters to use to encode 6-bit values in base64.
64 private const String base64Chars
=
65 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
67 // Map bytes in base64 to 6-bit values.
68 private static readonly sbyte[] base64Values
= {
69 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00
70 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10
71 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, // 20
72 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30
74 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40
75 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 50
76 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60
77 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, // 70
79 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80
80 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 90
81 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // A0
82 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // B0
84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // C0
85 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // D0
86 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // E0
87 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // F0
91 public UTF7Encoding ()
96 public UTF7Encoding (bool allowOptionals
)
97 : base (UTF7_CODE_PAGE
)
99 this.allowOptionals
= allowOptionals
;
102 encoding_name
= "Unicode (UTF-7)";
103 header_name
= "utf-7";
104 is_mail_news_display
= true;
105 is_mail_news_save
= true;
107 windows_code_page
= UnicodeEncoding
.UNICODE_CODE_PAGE
;
111 public override int GetHashCode ()
113 int basis
= base.GetHashCode ();
114 return allowOptionals
? -basis
: basis
;
118 public override bool Equals (object value)
120 UTF7Encoding e
= value as UTF7Encoding
;
123 return allowOptionals
== e
.allowOptionals
&&
124 EncoderFallback
.Equals (e
.EncoderFallback
) &&
125 DecoderFallback
.Equals (e
.DecoderFallback
);
128 // Internal version of "GetByteCount" that can handle
129 // a rolling state between calls.
130 private static int InternalGetByteCount
131 (char[] chars
, int index
, int count
, bool flush
,
132 int leftOver
, bool isInShifted
, bool allowOptionals
)
134 // Validate the parameters.
136 throw new ArgumentNullException ("chars");
138 if (index
< 0 || index
> chars
.Length
) {
139 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
141 if (count
< 0 || count
> (chars
.Length
- index
)) {
142 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
145 // Determine the length of the output.
147 int leftOverSize
= (leftOver
>> 8);
148 byte[] rules
= encodingRules
;
151 ch
= (int)(chars
[index
++]);
160 // Handle characters that must be fully encoded.
161 if ( !isInShifted
) {
167 while (leftOverSize
>= 6) {
173 // The character is encoded as itself.
175 if (leftOverSize
!= 0) {
176 // Flush the previous encoded sequence.
180 // Count the "-" (sequence terminator)
187 // The character may need to be encoded.
188 if (allowOptionals
) {
195 // Encode the plus sign as "+-".
197 if (leftOverSize
!= 0) {
198 // Flush the previous encoded sequence.
202 // Count the "-" (sequence terminator)
210 if (isInShifted
&& flush
) {
211 if (leftOverSize
!= 0)
213 // Flush the previous encoded sequence.
216 // Count the "-" (sequence terminator)
220 // Return the length to the caller.
224 // Get the number of bytes needed to encode a character buffer.
225 public override int GetByteCount (char[] chars
, int index
, int count
)
227 return InternalGetByteCount (chars
, index
, count
, true, 0, false, allowOptionals
);
230 // Internal version of "GetBytes" that can handle a
231 // rolling state between calls.
232 private static int InternalGetBytes
233 (char[] chars
, int charIndex
, int charCount
,
234 byte[] bytes
, int byteIndex
, bool flush
,
235 ref int leftOver
, ref bool isInShifted
, bool allowOptionals
)
237 // Validate the parameters.
239 throw new ArgumentNullException ("chars");
242 throw new ArgumentNullException ("bytes");
244 if (charIndex
< 0 || charIndex
> chars
.Length
) {
245 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
247 if (charCount
< 0 || charCount
> (chars
.Length
- charIndex
)) {
248 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
250 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
251 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
254 // Convert the characters.
255 int posn
= byteIndex
;
256 int byteLength
= bytes
.Length
;
257 int leftOverSize
= (leftOver
>> 8);
258 int leftOverBits
= (leftOver
& 0xFF);
259 byte[] rules
= encodingRules
;
260 String base64
= base64Chars
;
262 while (charCount
> 0) {
263 ch
= (int)(chars
[charIndex
++]);
272 // Handle characters that must be fully encoded.
274 if (posn
>= byteLength
) {
275 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
277 // Start the sequence
278 bytes
[posn
++] = (byte)'+';
282 leftOverBits
= ((leftOverBits
<< 16) | ch
);
284 while (leftOverSize
>= 6) {
285 if (posn
>= byteLength
) {
286 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
289 bytes
[posn
++] = (byte)(base64
[leftOverBits
>> leftOverSize
]);
290 leftOverBits
&= ((1 << leftOverSize
) - 1);
294 // The character is encoded as itself.
296 if (leftOverSize
!= 0) {
297 // Flush the previous encoded sequence.
298 if ((posn
+ 1) > byteLength
) {
299 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
301 bytes
[posn
++] = (byte)(base64
[leftOverBits
<< (6 - leftOverSize
)]);
303 if ((posn
+ 1) > byteLength
) {
304 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
306 // Terminate the sequence
307 bytes
[posn
++] = (byte)'-';
312 if (posn
>= byteLength
) {
313 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
315 bytes
[posn
++] = (byte)ch
;
318 // The character may need to be encoded.
319 if (allowOptionals
) {
326 // Encode the plus sign as "+-".
328 if (leftOverSize
!= 0) {
329 // Flush the previous encoded sequence.
330 if ((posn
+ 1) > byteLength
) {
331 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
333 bytes
[posn
++] = (byte)(base64
[leftOverBits
<< (6 - leftOverSize
)]);
335 if ((posn
+ 1) > byteLength
) {
336 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
338 // Terminate the sequence
339 bytes
[posn
++] = (byte)'-';
344 if ((posn
+ 2) > byteLength
) {
345 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
347 bytes
[posn
++] = (byte)'+';
348 bytes
[posn
++] = (byte)'-';
352 if (isInShifted
&& flush
) {
353 // Flush the previous encoded sequence.
354 if (leftOverSize
!= 0) {
355 if ((posn
+ 1) > byteLength
) {
356 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
358 bytes
[posn
++] = (byte)(base64
[leftOverBits
<< (6 - leftOverSize
)]);
360 // Terminate the sequence
361 bytes
[posn
++] = (byte)'-';
366 leftOver
= ((leftOverSize
<< 8) | leftOverBits
);
368 // Return the length to the caller.
369 return posn
- byteIndex
;
372 // Get the bytes that result from encoding a character buffer.
373 public override int GetBytes (char[] chars
, int charIndex
, int charCount
,
374 byte[] bytes
, int byteIndex
)
377 bool isInShifted
= false;
378 return InternalGetBytes (chars
, charIndex
, charCount
, bytes
, byteIndex
, true,
379 ref leftOver
, ref isInShifted
, allowOptionals
);
382 // Internal version of "GetCharCount" that can handle
383 // a rolling state between call.s
384 private static int InternalGetCharCount
385 (byte[] bytes
, int index
, int count
, int leftOver
)
387 // Validate the parameters.
389 throw new ArgumentNullException ("bytes");
391 if (index
< 0 || index
> bytes
.Length
) {
392 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
394 if (count
< 0 || count
> (bytes
.Length
- index
)) {
395 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
398 // Determine the length of the result.
401 bool normal
= ((leftOver
& 0x01000000) == 0);
402 bool prevIsPlus
= ((leftOver
& 0x02000000) != 0);
403 int leftOverSize
= ((leftOver
>> 16) & 0xFF);
404 sbyte[] base64
= base64Values
;
406 byteval
= (int)(bytes
[index
++]);
409 if (byteval
!= '+') {
410 // Directly-encoded character.
413 // Start of a base64-encoded character.
418 // Process the next byte in a base64 sequence.
419 if (byteval
== (int)'-') {
420 // End of a base64 sequence.
426 } else if (base64
[byteval
] != -1) {
427 // Extra character in a base64 sequence.
429 if (leftOverSize
>= 16) {
442 // Return the final length to the caller.
446 // Get the number of characters needed to decode a byte buffer.
447 public override int GetCharCount (byte[] bytes
, int index
, int count
)
449 return InternalGetCharCount (bytes
, index
, count
, 0);
452 // Internal version of "GetChars" that can handle a
453 // rolling state between calls.
454 private static int InternalGetChars (byte[] bytes
, int byteIndex
, int byteCount
,
455 char[] chars
, int charIndex
, ref int leftOver
)
457 // Validate the parameters.
459 throw new ArgumentNullException ("bytes");
462 throw new ArgumentNullException ("chars");
464 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
465 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
467 if (byteCount
< 0 || byteCount
> (bytes
.Length
- byteIndex
)) {
468 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
470 if (charIndex
< 0 || charIndex
> chars
.Length
) {
471 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
474 // Convert the bytes into characters.
475 int posn
= charIndex
;
476 int charLength
= chars
.Length
;
477 int byteval
, b64value
;
478 bool normal
= ((leftOver
& 0x01000000) == 0);
479 bool prevIsPlus
= ((leftOver
& 0x02000000) != 0);
480 bool afterHighSurrogate
= ((leftOver
& 0x04000000) != 0);
481 int leftOverSize
= ((leftOver
>> 16) & 0xFF);
482 int leftOverBits
= (leftOver
& 0xFFFF);
483 sbyte[] base64
= base64Values
;
484 while (byteCount
> 0) {
485 byteval
= (int)(bytes
[byteIndex
++]);
488 if (byteval
!= '+') {
489 // Directly-encoded character.
490 if (posn
>= charLength
) {
491 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
493 if (afterHighSurrogate
) {
494 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
496 chars
[posn
++] = (char)byteval
;
498 // Start of a base64-encoded character.
503 // Process the next byte in a base64 sequence.
504 if (byteval
== (int)'-') {
505 // End of a base64 sequence.
507 if (posn
>= charLength
) {
508 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
510 if (afterHighSurrogate
) {
511 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
516 // When decoding, any bits at the end of the Modified Base64 sequence that
517 // do not constitute a complete 16-bit Unicode character are discarded.
518 // If such discarded bits are non-zero the sequence is ill-formed.
523 else if ((b64value
= base64
[byteval
]) != -1)
525 // Extra character in a base64 sequence.
526 leftOverBits
= (leftOverBits
<< 6) | b64value
;
528 if (leftOverSize
>= 16) {
529 if (posn
>= charLength
) {
530 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
533 char nextChar
= (char)(leftOverBits
>> leftOverSize
);
534 if ((nextChar
& 0xFC00) == 0xD800) {
535 afterHighSurrogate
= true;
537 else if ((nextChar
& 0xFC00) == 0xDC00) {
538 if (!afterHighSurrogate
) {
539 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
541 afterHighSurrogate
= false;
543 chars
[posn
++] = nextChar
;
544 leftOverBits
&= ((1 << leftOverSize
) - 1);
547 if (posn
>= charLength
) {
548 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
550 if (afterHighSurrogate
) {
551 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
553 chars
[posn
++] = (char)byteval
;
561 leftOver
= (leftOverBits
| (leftOverSize
<< 16) |
562 (normal
? 0 : 0x01000000) |
563 (prevIsPlus
? 0x02000000 : 0) |
564 (afterHighSurrogate
? 0x04000000 : 0));
566 // Return the final length to the caller.
567 return posn
- charIndex
;
570 // Get the characters that result from decoding a byte buffer.
571 public override int GetChars (byte[] bytes
, int byteIndex
, int byteCount
,
572 char[] chars
, int charIndex
)
575 int amount
= InternalGetChars (bytes
, byteIndex
, byteCount
, chars
, charIndex
, ref leftOver
);
576 if ((leftOver
& 0x04000000) != 0) {
577 throw new ArgumentException (_("Arg_InvalidUTF7"), "chars");
582 // Get the maximum number of bytes needed to encode a
583 // specified number of characters.
584 public override int GetMaxByteCount (int charCount
)
587 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
591 return 8 * (int) (charCount
/ 3) + (charCount
% 3) * 3 + 2;
594 // Get the maximum number of characters needed to decode a
595 // specified number of bytes.
596 public override int GetMaxCharCount (int byteCount
)
599 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
604 // Get a UTF7-specific decoder that is attached to this instance.
605 public override Decoder
GetDecoder ()
607 return new UTF7Decoder ();
610 // Get a UTF7-specific encoder that is attached to this instance.
611 public override Encoder
GetEncoder ()
613 return new UTF7Encoder (allowOptionals
);
616 // UTF-7 decoder implementation.
617 private sealed class UTF7Decoder
: Decoder
620 private int leftOver
;
623 public UTF7Decoder ()
628 // Override inherited methods.
629 public override int GetCharCount (byte[] bytes
, int index
, int count
)
631 return InternalGetCharCount (bytes
, index
, count
, leftOver
);
633 public override int GetChars (byte[] bytes
, int byteIndex
,
634 int byteCount
, char[] chars
,
637 return InternalGetChars (bytes
, byteIndex
, byteCount
, chars
, charIndex
, ref leftOver
);
640 } // class UTF7Decoder
642 // UTF-7 encoder implementation.
643 private sealed class UTF7Encoder
: Encoder
645 private bool allowOptionals
;
646 private int leftOver
= 0;
647 private bool isInShifted
= false;
650 public UTF7Encoder (bool allowOptionals
)
652 this.allowOptionals
= allowOptionals
;
655 // Override inherited methods.
656 public override int GetByteCount (char[] chars
, int index
,
657 int count
, bool flush
)
659 return InternalGetByteCount
660 (chars
, index
, count
, flush
, leftOver
, isInShifted
, allowOptionals
);
662 public override int GetBytes (char[] chars
, int charIndex
,
663 int charCount
, byte[] bytes
,
664 int byteIndex
, bool flush
)
666 return InternalGetBytes (chars
, charIndex
, charCount
,
667 bytes
, byteIndex
, flush
,
668 ref leftOver
, ref isInShifted
, allowOptionals
);
671 } // class UTF7Encoder
673 // a bunch of practically missing implementations (but should just work)
675 [CLSCompliantAttribute (false)]
677 public override unsafe int GetByteCount (char *chars
, int count
)
679 return base.GetByteCount (chars
, count
);
683 public override int GetByteCount (string s
)
685 return base.GetByteCount (s
);
689 [CLSCompliantAttribute (false)]
690 public override unsafe int GetBytes (char *chars
, int charCount
, byte* bytes
, int byteCount
)
692 return base.GetBytes (chars
, charCount
, bytes
, byteCount
);
696 public override int GetBytes (string s
, int charIndex
, int charCount
, byte [] bytes
, int byteIndex
)
698 return base.GetBytes (s
, charIndex
, charCount
, bytes
, byteIndex
);
702 [CLSCompliantAttribute (false)]
703 public override unsafe int GetCharCount (byte *bytes
, int count
)
705 return base.GetCharCount (bytes
, count
);
709 [CLSCompliantAttribute (false)]
710 public override unsafe int GetChars (byte* bytes
, int byteCount
, char* chars
, int charCount
)
712 return base.GetChars (bytes
, byteCount
, chars
, charCount
);
716 public override string GetString (byte [] bytes
, int index
, int count
)
718 return base.GetString (bytes
, index
, count
);
721 }; // class UTF7Encoding
723 }; // namespace System.Text