**** Merged from MCS ****
[mono-project.git] / mcs / class / corlib / System.Text / UTF7Encoding.cs
blob074bc469078e02d227a2355cfe65dfb398ace768
1 /*
2 * UTF7Encoding.cs - Implementation of the
3 * "System.Text.UTF7Encoding" class.
5 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
6 * Copyright (c) 2003, 2004, Novell, Inc.
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
27 namespace System.Text
30 using System;
32 [Serializable]
33 [MonoTODO ("Fix serialization compatibility with MS.NET")]
34 #if ECMA_COMPAT
35 internal
36 #else
37 public
38 #endif
39 class UTF7Encoding : Encoding
41 // Magic number used by Windows for UTF-7.
42 internal const int UTF7_CODE_PAGE = 65000;
44 // Internal state.
45 private bool allowOptionals;
47 // Encoding rule table for 0x00-0x7F.
48 // 0 - full encode, 1 - direct, 2 - optional, 3 - encode plus.
49 private static readonly byte[] encodingRules = {
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 00
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
52 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 3, 1, 1, 1, 1, // 20
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, // 30
55 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 2, 2, 2, // 50
57 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 0, 0, // 70
61 // Characters to use to encode 6-bit values in base64.
62 private const String base64Chars =
63 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
65 // Map bytes in base64 to 6-bit values.
66 private static readonly sbyte[] base64Values = {
67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00
68 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10
69 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, 63, // 20
70 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30
72 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40
73 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 50
74 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60
75 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, // 70
77 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80
78 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 90
79 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // A0
80 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // B0
82 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // C0
83 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // D0
84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // E0
85 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // F0
88 // Constructors.
89 public UTF7Encoding ()
90 : this (false)
94 public UTF7Encoding (bool allowOptionals)
95 : base (UTF7_CODE_PAGE)
97 this.allowOptionals = allowOptionals;
99 body_name = "utf-7";
100 encoding_name = "Unicode (UTF-7)";
101 header_name = "utf-7";
102 is_mail_news_display = true;
103 is_mail_news_save = true;
104 web_name = "utf-7";
105 windows_code_page = UnicodeEncoding.UNICODE_CODE_PAGE;
108 // Internal version of "GetByteCount" that can handle
109 // a rolling state between calls.
110 private static int InternalGetByteCount
111 (char[] chars, int index, int count, bool flush,
112 int leftOver, bool allowOptionals)
114 // Validate the parameters.
115 if (chars == null) {
116 throw new ArgumentNullException ("chars");
118 if (index < 0 || index > chars.Length) {
119 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
121 if (count < 0 || count > (chars.Length - index)) {
122 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
125 // Determine the length of the output.
126 int length = 0;
127 int leftOverSize = (leftOver >> 8);
128 byte[] rules = encodingRules;
129 int ch, rule;
130 while (count > 0) {
131 ch = (int)(chars[index++]);
132 --count;
133 if (ch < 0x0080) {
134 rule = rules[ch];
135 } else {
136 rule = 0;
138 switch (rule) {
139 case 0:
140 // Handle characters that must be fully encoded.
141 if (leftOverSize == 0) {
142 ++length;
144 leftOverSize += 16;
145 while (leftOverSize >= 6) {
146 ++length;
147 leftOverSize -= 6;
149 break;
150 case 1:
151 // The character is encoded as itself.
152 if (leftOverSize != 0) {
153 // Flush the previous encoded sequence.
154 length += 2;
155 leftOverSize = 0;
157 ++length;
158 break;
159 case 2:
160 // The character may need to be encoded.
161 if (allowOptionals) {
162 goto case 1;
163 } else {
164 goto case 0;
166 // Not reached.
167 case 3:
168 // Encode the plus sign as "+-".
169 if (leftOverSize != 0) {
170 // Flush the previous encoded sequence.
171 length += 2;
172 leftOverSize = 0;
174 length += 2;
175 break;
178 if (leftOverSize != 0 && flush) {
179 length += 2;
182 // Return the length to the caller.
183 return length;
186 // Get the number of bytes needed to encode a character buffer.
187 public override int GetByteCount (char[] chars, int index, int count)
189 return InternalGetByteCount (chars, index, count, true, 0, allowOptionals);
192 // Internal version of "GetBytes" that can handle a
193 // rolling state between calls.
194 private static int InternalGetBytes
195 (char[] chars, int charIndex, int charCount,
196 byte[] bytes, int byteIndex, bool flush,
197 ref int leftOver, bool allowOptionals)
199 // Validate the parameters.
200 if (chars == null) {
201 throw new ArgumentNullException ("chars");
203 if (bytes == null) {
204 throw new ArgumentNullException ("bytes");
206 if (charIndex < 0 || charIndex > chars.Length) {
207 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
209 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
210 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
212 if (byteIndex < 0 || byteIndex > bytes.Length) {
213 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
216 // Convert the characters.
217 int posn = byteIndex;
218 int byteLength = bytes.Length;
219 int leftOverSize = (leftOver >> 8);
220 int leftOverBits = (leftOver & 0xFF);
221 byte[] rules = encodingRules;
222 String base64 = base64Chars;
223 int ch, rule;
224 while (charCount > 0) {
225 ch = (int)(chars[charIndex++]);
226 --charCount;
227 if (ch < 0x0080) {
228 rule = rules[ch];
229 } else {
230 rule = 0;
232 switch (rule) {
233 case 0:
234 // Handle characters that must be fully encoded.
235 if (leftOverSize == 0) {
236 if (posn >= byteLength) {
237 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
239 bytes[posn++] = (byte)'+';
241 leftOverBits = ((leftOverBits << 16) | ch);
242 leftOverSize += 16;
243 while (leftOverSize >= 6) {
244 if (posn >= byteLength) {
245 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
247 leftOverSize -= 6;
248 bytes[posn++] = (byte)(base64 [leftOverBits >> leftOverSize]);
249 leftOverBits &= ((1 << leftOverSize) - 1);
251 break;
252 case 1:
253 // The character is encoded as itself.
254 if (leftOverSize != 0) {
255 // Flush the previous encoded sequence.
256 if ((posn + 2) > byteLength) {
257 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
259 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
260 bytes[posn++] = (byte)'-';
261 leftOverSize = 0;
262 leftOverBits = 0;
264 if (posn >= byteLength) {
265 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
267 bytes[posn++] = (byte)ch;
268 break;
269 case 2:
270 // The character may need to be encoded.
271 if (allowOptionals) {
272 goto case 1;
273 } else {
274 goto case 0;
276 // Not reached.
277 case 3:
278 // Encode the plus sign as "+-".
279 if (leftOverSize != 0) {
280 // Flush the previous encoded sequence.
281 if ((posn + 2) > byteLength) {
282 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
284 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
285 bytes[posn++] = (byte)'-';
286 leftOverSize = 0;
287 leftOverBits = 0;
289 if ((posn + 2) > byteLength) {
290 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
292 bytes[posn++] = (byte)'+';
293 bytes[posn++] = (byte)'-';
294 break;
297 if (leftOverSize != 0 && flush) {
298 if ((posn + 2) > byteLength) {
299 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
301 bytes[posn++] = (byte)(base64 [leftOverBits << (6 - leftOverSize)]);
302 bytes[posn++] = (byte)'-';
303 leftOverSize = 0;
304 leftOverBits = 0;
306 leftOver = ((leftOverSize << 8) | leftOverBits);
308 // Return the length to the caller.
309 return posn - byteIndex;
312 // Get the bytes that result from encoding a character buffer.
313 public override int GetBytes (char[] chars, int charIndex, int charCount,
314 byte[] bytes, int byteIndex)
316 int leftOver = 0;
317 return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, true,
318 ref leftOver, allowOptionals);
321 // Internal version of "GetCharCount" that can handle
322 // a rolling state between call.s
323 private static int InternalGetCharCount
324 (byte[] bytes, int index, int count, int leftOver)
326 // Validate the parameters.
327 if (bytes == null) {
328 throw new ArgumentNullException ("bytes");
330 if (index < 0 || index > bytes.Length) {
331 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
333 if (count < 0 || count > (bytes.Length - index)) {
334 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
337 // Determine the length of the result.
338 int length = 0;
339 int byteval, b64value;
340 bool normal = ((leftOver & 0x01000000) == 0);
341 bool prevIsPlus = ((leftOver & 0x02000000) != 0);
342 int leftOverSize = ((leftOver >> 16) & 0xFF);
343 sbyte[] base64 = base64Values;
344 while (count > 0) {
345 byteval = (int)(bytes[index++]);
346 --count;
347 if (normal) {
348 if (byteval != '+') {
349 // Directly-encoded character.
350 ++length;
351 } else {
352 // Start of a base64-encoded character.
353 normal = false;
354 prevIsPlus = true;
356 } else {
357 // Process the next byte in a base64 sequence.
358 if (byteval == (int)'-') {
359 // End of a base64 sequence.
360 if (prevIsPlus) {
361 ++length;
362 leftOverSize = 0;
364 normal = true;
365 } else if ((b64value = base64[byteval]) != -1) {
366 // Extra character in a base64 sequence.
367 leftOverSize += 6;
368 if (leftOverSize >= 16) {
369 ++length;
370 leftOverSize -= 16;
372 } else {
373 // Normal character terminating a base64 sequence.
374 if (leftOverSize > 0) {
375 ++length;
376 leftOverSize = 0;
378 ++length;
379 normal = true;
381 prevIsPlus = false;
385 // Return the final length to the caller.
386 return length;
389 // Get the number of characters needed to decode a byte buffer.
390 public override int GetCharCount (byte[] bytes, int index, int count)
392 return InternalGetCharCount (bytes, index, count, 0);
395 // Internal version of "GetChars" that can handle a
396 // rolling state between calls.
397 private static int InternalGetChars (byte[] bytes, int byteIndex, int byteCount,
398 char[] chars, int charIndex, ref int leftOver)
400 // Validate the parameters.
401 if (bytes == null) {
402 throw new ArgumentNullException ("bytes");
404 if (chars == null) {
405 throw new ArgumentNullException ("chars");
407 if (byteIndex < 0 || byteIndex > bytes.Length) {
408 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
410 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
411 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
413 if (charIndex < 0 || charIndex > chars.Length) {
414 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
417 // Convert the bytes into characters.
418 int posn = charIndex;
419 int charLength = chars.Length;
420 int byteval, b64value;
421 bool normal = ((leftOver & 0x01000000) == 0);
422 bool prevIsPlus = ((leftOver & 0x02000000) != 0);
423 int leftOverSize = ((leftOver >> 16) & 0xFF);
424 int leftOverBits = (leftOver & 0xFFFF);
425 sbyte[] base64 = base64Values;
426 while (byteCount > 0) {
427 byteval = (int)(bytes[byteIndex++]);
428 --byteCount;
429 if (normal) {
430 if (byteval != '+') {
431 // Directly-encoded character.
432 if (posn >= charLength) {
433 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
435 chars[posn++] = (char)byteval;
436 } else {
437 // Start of a base64-encoded character.
438 normal = false;
439 prevIsPlus = true;
441 } else {
442 // Process the next byte in a base64 sequence.
443 if (byteval == (int)'-') {
444 // End of a base64 sequence.
445 if (prevIsPlus) {
446 if (posn >= charLength) {
447 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
449 chars[posn++] = '+';
451 // RFC1642 Rule #2
452 // When decoding, any bits at the end of the Modified Base64 sequence that
453 // do not constitute a complete 16-bit Unicode character are discarded.
454 // If such discarded bits are non-zero the sequence is ill-formed.
455 if (leftOverBits != 0)
456 throw new FormatException ("unused bits not zero");
457 normal = true;
458 } else if ((b64value = base64[byteval]) != -1) {
459 // Extra character in a base64 sequence.
460 leftOverBits = (leftOverBits << 6) | b64value;
461 leftOverSize += 6;
462 if (leftOverSize >= 16) {
463 if (posn >= charLength) {
464 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
466 leftOverSize -= 16;
467 chars[posn++] = (char)(leftOverBits >> leftOverSize);
468 leftOverBits &= ((1 << leftOverSize) - 1);
470 } else {
471 // Normal character terminating a base64 sequence.
472 if (leftOverSize > 0) {
473 if (posn >= charLength) {
474 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
476 chars[posn++] = (char)(leftOverBits << (16 - leftOverSize));
477 leftOverSize = 0;
478 leftOverBits = 0;
480 if (posn >= charLength) {
481 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
483 chars[posn++] = (char)byteval;
484 normal = true;
486 prevIsPlus = false;
489 leftOver = (leftOverBits | (leftOverSize << 16) |
490 (normal ? 0 : 0x01000000) |
491 (prevIsPlus ? 0x02000000 : 0));
493 // Return the final length to the caller.
494 return posn - charIndex;
497 // Get the characters that result from decoding a byte buffer.
498 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
499 char[] chars, int charIndex)
501 int leftOver = 0;
502 return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOver);
505 // Get the maximum number of bytes needed to encode a
506 // specified number of characters.
507 public override int GetMaxByteCount (int charCount)
509 if (charCount < 0) {
510 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
512 if (charCount == 0)
513 return 0;
514 return 8 * (int) (charCount / 3) + (charCount % 3) * 3 + 2;
517 // Get the maximum number of characters needed to decode a
518 // specified number of bytes.
519 public override int GetMaxCharCount (int byteCount)
521 if (byteCount < 0) {
522 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
524 return byteCount;
527 // Get a UTF7-specific decoder that is attached to this instance.
528 public override Decoder GetDecoder ()
530 return new UTF7Decoder ();
533 // Get a UTF7-specific encoder that is attached to this instance.
534 public override Encoder GetEncoder ()
536 return new UTF7Encoder (allowOptionals);
539 // UTF-7 decoder implementation.
540 private sealed class UTF7Decoder : Decoder
542 // Internal state.
543 private int leftOver;
545 // Constructor.
546 public UTF7Decoder ()
548 leftOver = 0;
551 // Override inherited methods.
552 public override int GetCharCount (byte[] bytes, int index, int count)
554 return InternalGetCharCount (bytes, index, count, leftOver);
556 public override int GetChars (byte[] bytes, int byteIndex,
557 int byteCount, char[] chars,
558 int charIndex)
560 return InternalGetChars (bytes, byteIndex, byteCount, chars, charIndex, ref leftOver);
563 } // class UTF7Decoder
565 // UTF-7 encoder implementation.
566 private sealed class UTF7Encoder : Encoder
568 private bool allowOptionals;
569 private int leftOver;
571 // Constructor.
572 public UTF7Encoder (bool allowOptionals)
574 this.allowOptionals = allowOptionals;
575 this.leftOver = 0;
578 // Override inherited methods.
579 public override int GetByteCount (char[] chars, int index,
580 int count, bool flush)
582 return InternalGetByteCount
583 (chars, index, count, flush, leftOver, allowOptionals);
585 public override int GetBytes (char[] chars, int charIndex,
586 int charCount, byte[] bytes,
587 int byteIndex, bool flush)
589 return InternalGetBytes (chars, charIndex, charCount,
590 bytes, byteIndex, flush,
591 ref leftOver, allowOptionals);
594 } // class UTF7Encoder
596 }; // class UTF7Encoding
598 }; // namespace System.Text