**** Merged from MCS ****
[mono-project.git] / mcs / class / corlib / System.Text / UTF8Encoding.cs
blobaf8062ed660349512fbe77c7242cd6a8309bb611
1 /*
2 * UTF8Encoding.cs - Implementation of the "System.Text.UTF8Encoding" class.
4 * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
5 * Copyright (C) 2004 Novell, Inc (http://www.novell.com)
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
26 namespace System.Text
29 using System;
31 [Serializable]
32 [MonoTODO ("Fix serialization compatibility with MS.NET")]
33 public class UTF8Encoding : Encoding
35 // Magic number used by Windows for UTF-8.
36 internal const int UTF8_CODE_PAGE = 65001;
38 // Internal state.
39 private bool emitIdentifier;
40 private bool throwOnInvalid;
42 // Constructors.
43 public UTF8Encoding () : this (false, false) {}
44 public UTF8Encoding (bool encoderShouldEmitUTF8Identifier)
45 : this (encoderShouldEmitUTF8Identifier, false) {}
47 public UTF8Encoding (bool encoderShouldEmitUTF8Identifier, bool throwOnInvalidBytes)
48 : base (UTF8_CODE_PAGE)
50 emitIdentifier = encoderShouldEmitUTF8Identifier;
51 throwOnInvalid = throwOnInvalidBytes;
53 web_name = body_name = header_name = "utf-8";
54 encoding_name = "Unicode (UTF-8)";
55 is_browser_save = true;
56 is_browser_display = true;
57 is_mail_news_display = true;
58 windows_code_page = UnicodeEncoding.UNICODE_CODE_PAGE;
61 // Internal version of "GetByteCount" which can handle a rolling
62 // state between multiple calls to this method.
63 private static int InternalGetByteCount (char[] chars, int index, int count, uint leftOver, bool flush)
65 // Validate the parameters.
66 if (chars == null) {
67 throw new ArgumentNullException ("chars");
69 if (index < 0 || index > chars.Length) {
70 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
72 if (count < 0 || count > (chars.Length - index)) {
73 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
76 // Determine the lengths of all characters.
77 char ch;
78 int length = 0;
79 uint pair = leftOver;
80 while (count > 0) {
81 ch = chars[index];
82 if (pair == 0) {
83 if (ch < '\u0080') {
84 ++length;
85 } else if (ch < '\u0800') {
86 length += 2;
87 } else if (ch >= '\uD800' && ch <= '\uDBFF') {
88 // This is the start of a surrogate pair.
89 pair = (uint)ch;
90 } else {
91 length += 3;
93 } else if (ch >= '\uDC00' && ch <= '\uDFFF') {
94 // We have a surrogate pair.
95 length += 4;
96 pair = 0;
97 } else {
98 // We have a surrogate start followed by a
99 // regular character. Technically, this is
100 // invalid, but we have to do something.
101 // We write out the surrogate start and then
102 // re-visit the current character again.
103 length += 3;
104 pair = 0;
105 continue;
107 ++index;
108 --count;
110 if (flush && pair != 0) {
111 // Flush the left-over surrogate pair start.
112 length += 3;
115 // Return the final length to the caller.
116 return length;
119 // Get the number of bytes needed to encode a character buffer.
120 public override int GetByteCount (char[] chars, int index, int count)
122 return InternalGetByteCount (chars, index, count, 0, true);
125 // Convenience wrappers for "GetByteCount".
126 public override int GetByteCount (String s)
128 // Validate the parameters.
129 if (s == null) {
130 throw new ArgumentNullException ("s");
133 // Determine the lengths of all characters.
134 char ch;
135 int index = 0;
136 int count = s.Length;
137 int length = 0;
138 uint pair;
139 while (count > 0) {
140 ch = s[index++];
141 if (ch < '\u0080') {
142 ++length;
143 } else if (ch < '\u0800') {
144 length += 2;
145 } else if (ch >= '\uD800' && ch <= '\uDBFF' && count > 1) {
146 // This may be the start of a surrogate pair.
147 pair = (uint)(s[index]);
148 if (pair >= (uint)0xDC00 && pair <= (uint)0xDFFF) {
149 length += 4;
150 ++index;
151 --count;
152 } else {
153 length += 3;
155 } else {
156 length += 3;
158 --count;
161 // Return the final length to the caller.
162 return length;
165 // Internal version of "GetBytes" which can handle a rolling
166 // state between multiple calls to this method.
167 private static int InternalGetBytes (char[] chars, int charIndex,
168 int charCount, byte[] bytes,
169 int byteIndex, ref uint leftOver,
170 bool flush)
172 // Validate the parameters.
173 if (chars == null) {
174 throw new ArgumentNullException ("chars");
176 if (bytes == null) {
177 throw new ArgumentNullException ("bytes");
179 if (charIndex < 0 || charIndex > chars.Length) {
180 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
182 if (charCount < 0 || charCount > (chars.Length - charIndex)) {
183 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
185 if (byteIndex < 0 || byteIndex > bytes.Length) {
186 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
189 // Convert the characters into bytes.
190 char ch;
191 int length = bytes.Length;
192 uint pair;
193 uint left = leftOver;
194 int posn = byteIndex;
195 while (charCount > 0) {
196 // Fetch the next UTF-16 character pair value.
197 ch = chars[charIndex++];
198 --charCount;
199 if (left == 0) {
200 if (ch >= '\uD800' && ch <= '\uDBFF') {
201 // This is the start of a surrogate pair.
202 left = (uint)ch;
203 continue;
204 } else {
205 // This is a regular character.
206 pair = (uint)ch;
208 } else if (ch >= '\uDC00' && ch <= '\uDFFF') {
209 // We have a surrogate pair.
210 pair = ((left - (uint)0xD800) << 10) +
211 (((uint)ch) - (uint)0xDC00) +
212 (uint)0x10000;
213 left = 0;
214 } else {
215 // We have a surrogate start followed by a
216 // regular character. Technically, this is
217 // invalid, but we have to do something.
218 // We write out the surrogate start and then
219 // re-visit the current character again.
220 pair = (uint)left;
221 left = 0;
222 --charIndex;
223 ++charCount;
226 // Encode the character pair value.
227 if (pair < (uint)0x0080) {
228 if (posn >= length) {
229 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
231 bytes[posn++] = (byte)pair;
232 } else if (pair < (uint)0x0800) {
233 if ((posn + 2) > length) {
234 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
236 bytes[posn++] = (byte)(0xC0 | (pair >> 6));
237 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
238 } else if (pair < (uint)0x10000) {
239 if ((posn + 3) > length) {
240 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
242 bytes[posn++] = (byte)(0xE0 | (pair >> 12));
243 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
244 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
245 } else {
246 if ((posn + 4) > length) {
247 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
249 bytes[posn++] = (byte)(0xF0 | (pair >> 18));
250 bytes[posn++] = (byte)(0x80 | ((pair >> 12) & 0x3F));
251 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
252 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
255 if (flush && left != 0) {
256 // Flush the left-over surrogate pair start.
257 if ((posn + 3) > length) {
258 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
260 bytes[posn++] = (byte)(0xE0 | (left >> 12));
261 bytes[posn++] = (byte)(0x80 | ((left >> 6) & 0x3F));
262 bytes[posn++] = (byte)(0x80 | (left & 0x3F));
263 left = 0;
265 leftOver = left;
267 // Return the final count to the caller.
268 return posn - byteIndex;
271 // Get the bytes that result from encoding a character buffer.
272 public override int GetBytes (char[] chars, int charIndex, int charCount,
273 byte[] bytes, int byteIndex)
275 uint leftOver = 0;
276 return InternalGetBytes (chars, charIndex, charCount, bytes, byteIndex, ref leftOver, true);
279 // Convenience wrappers for "GetBytes".
280 public override int GetBytes (String s, int charIndex, int charCount,
281 byte[] bytes, int byteIndex)
283 // Validate the parameters.
284 if (s == null) {
285 throw new ArgumentNullException ("s");
287 if (bytes == null) {
288 throw new ArgumentNullException ("bytes");
290 if (charIndex < 0 || charIndex > s.Length) {
291 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
293 if (charCount < 0 || charCount > (s.Length - charIndex)) {
294 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
296 if (byteIndex < 0 || byteIndex > bytes.Length) {
297 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
300 // Convert the characters into bytes.
301 char ch;
302 int length = bytes.Length;
303 uint pair;
304 int posn = byteIndex;
305 while (charCount > 0) {
306 // Fetch the next UTF-16 character pair value.
307 ch = s[charIndex++];
308 if (ch >= '\uD800' && ch <= '\uDBFF' && charCount > 1) {
309 // This may be the start of a surrogate pair.
310 pair = (uint)(s[charIndex]);
311 if (pair >= (uint)0xDC00 && pair <= (uint)0xDFFF) {
312 pair = (pair - (uint)0xDC00) +
313 ((((uint)ch) - (uint)0xD800) << 10) +
314 (uint)0x10000;
315 ++charIndex;
316 --charCount;
317 } else {
318 pair = (uint)ch;
320 } else {
321 pair = (uint)ch;
323 --charCount;
325 // Encode the character pair value.
326 if (pair < (uint)0x0080) {
327 if (posn >= length) {
328 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
330 bytes[posn++] = (byte)pair;
331 } else if (pair < (uint)0x0800) {
332 if ((posn + 2) > length) {
333 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
335 bytes[posn++] = (byte)(0xC0 | (pair >> 6));
336 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
337 } else if (pair < (uint)0x10000) {
338 if ((posn + 3) > length) {
339 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
341 bytes[posn++] = (byte)(0xE0 | (pair >> 12));
342 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
343 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
344 } else {
345 if ((posn + 4) > length) {
346 throw new ArgumentException (_("Arg_InsufficientSpace"), "bytes");
348 bytes[posn++] = (byte)(0xF0 | (pair >> 18));
349 bytes[posn++] = (byte)(0x80 | ((pair >> 12) & 0x3F));
350 bytes[posn++] = (byte)(0x80 | ((pair >> 6) & 0x3F));
351 bytes[posn++] = (byte)(0x80 | (pair & 0x3F));
355 // Return the final count to the caller.
356 return posn - byteIndex;
359 // Internal version of "GetCharCount" which can handle a rolling
360 // state between multiple calls to this method.
361 private static int InternalGetCharCount (byte[] bytes, int index, int count,
362 uint leftOverBits,
363 uint leftOverCount,
364 bool throwOnInvalid, bool flush)
366 // Validate the parameters.
367 if (bytes == null) {
368 throw new ArgumentNullException ("bytes");
370 if (index < 0 || index > bytes.Length) {
371 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
373 if (count < 0 || count > (bytes.Length - index)) {
374 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
377 // Determine the number of characters that we have.
378 uint ch;
379 int length = 0;
380 uint leftBits = leftOverBits;
381 uint leftSoFar = (leftOverCount & (uint)0x0F);
382 uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);
383 while (count > 0) {
384 ch = (uint)(bytes[index++]);
385 --count;
386 if (leftSize == 0) {
387 // Process a UTF-8 start character.
388 if (ch < (uint)0x0080) {
389 // Single-byte UTF-8 character.
390 ++length;
391 } else if ((ch & (uint)0xE0) == (uint)0xC0) {
392 // Double-byte UTF-8 character.
393 leftBits = (ch & (uint)0x1F);
394 leftSoFar = 1;
395 leftSize = 2;
396 } else if ((ch & (uint)0xF0) == (uint)0xE0) {
397 // Three-byte UTF-8 character.
398 leftBits = (ch & (uint)0x0F);
399 leftSoFar = 1;
400 leftSize = 3;
401 } else if ((ch & (uint)0xF8) == (uint)0xF0) {
402 // Four-byte UTF-8 character.
403 leftBits = (ch & (uint)0x07);
404 leftSoFar = 1;
405 leftSize = 4;
406 } else if ((ch & (uint)0xFC) == (uint)0xF8) {
407 // Five-byte UTF-8 character.
408 leftBits = (ch & (uint)0x03);
409 leftSoFar = 1;
410 leftSize = 5;
411 } else if ((ch & (uint)0xFC) == (uint)0xFC) {
412 // Six-byte UTF-8 character.
413 leftBits = (ch & (uint)0x03);
414 leftSoFar = 1;
415 leftSize = 6;
416 } else {
417 // Invalid UTF-8 start character.
418 if (throwOnInvalid) {
419 throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
422 } else {
423 // Process an extra byte in a multi-byte sequence.
424 if ((ch & (uint)0xC0) == (uint)0x80) {
425 leftBits = ((leftBits << 6) | (ch & (uint)0x3F));
426 if (++leftSoFar >= leftSize) {
427 // We have a complete character now.
428 if (leftBits < (uint)0x10000) {
429 if (leftBits != (uint)0xFEFF) {
430 // is it an overlong ?
431 bool overlong = false;
432 switch (leftSize) {
433 case 2:
434 overlong = (leftBits <= 0x7F);
435 break;
436 case 3:
437 overlong = (leftBits <= 0x07FF);
438 break;
439 case 4:
440 overlong = (leftBits <= 0xFFFF);
441 break;
442 case 5:
443 overlong = (leftBits <= 0x1FFFFF);
444 break;
445 case 6:
446 overlong = (leftBits <= 0x03FFFFFF);
447 break;
449 if (overlong) {
450 if (throwOnInvalid)
451 throw new ArgumentException (_("Overlong"), leftBits.ToString ());
453 else
454 ++length;
456 } else if (leftBits < (uint)0x110000) {
457 length += 2;
458 } else if (throwOnInvalid) {
459 throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
461 leftSize = 0;
463 } else {
464 // Invalid UTF-8 sequence: clear and restart.
465 if (throwOnInvalid) {
466 throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
468 leftSize = 0;
469 --index;
470 ++count;
474 if (flush && leftSize != 0 && throwOnInvalid) {
475 // We had left-over bytes that didn't make up
476 // a complete UTF-8 character sequence.
477 throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
480 // Return the final length to the caller.
481 return length;
484 // Get the number of characters needed to decode a byte buffer.
485 public override int GetCharCount (byte[] bytes, int index, int count)
487 return InternalGetCharCount (bytes, index, count, 0, 0, throwOnInvalid, true);
490 // Get the characters that result from decoding a byte buffer.
491 private static int InternalGetChars (byte[] bytes, int byteIndex,
492 int byteCount, char[] chars,
493 int charIndex, ref uint leftOverBits,
494 ref uint leftOverCount,
495 bool throwOnInvalid, bool flush)
497 // Validate the parameters.
498 if (bytes == null) {
499 throw new ArgumentNullException ("bytes");
501 if (chars == null) {
502 throw new ArgumentNullException ("chars");
504 if (byteIndex < 0 || byteIndex > bytes.Length) {
505 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
507 if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
508 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
510 if (charIndex < 0 || charIndex > chars.Length) {
511 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
514 if (charIndex == chars.Length)
515 return 0;
517 // Convert the bytes into the output buffer.
518 uint ch;
519 int length = chars.Length;
520 int posn = charIndex;
521 uint leftBits = leftOverBits;
522 uint leftSoFar = (leftOverCount & (uint)0x0F);
523 uint leftSize = ((leftOverCount >> 4) & (uint)0x0F);
524 while (byteCount > 0) {
525 // Fetch the next character from the byte buffer.
526 ch = (uint)(bytes[byteIndex++]);
527 --byteCount;
528 if (leftSize == 0) {
529 // Process a UTF-8 start character.
530 if (ch < (uint)0x0080) {
531 // Single-byte UTF-8 character.
532 if (posn >= length) {
533 throw new ArgumentException (_("Arg_InsufficientSpace"), "chars");
535 chars[posn++] = (char)ch;
536 } else if ((ch & (uint)0xE0) == (uint)0xC0) {
537 // Double-byte UTF-8 character.
538 leftBits = (ch & (uint)0x1F);
539 leftSoFar = 1;
540 leftSize = 2;
541 } else if ((ch & (uint)0xF0) == (uint)0xE0) {
542 // Three-byte UTF-8 character.
543 leftBits = (ch & (uint)0x0F);
544 leftSoFar = 1;
545 leftSize = 3;
546 } else if ((ch & (uint)0xF8) == (uint)0xF0) {
547 // Four-byte UTF-8 character.
548 leftBits = (ch & (uint)0x07);
549 leftSoFar = 1;
550 leftSize = 4;
551 } else if ((ch & (uint)0xFC) == (uint)0xF8) {
552 // Five-byte UTF-8 character.
553 leftBits = (ch & (uint)0x03);
554 leftSoFar = 1;
555 leftSize = 5;
556 } else if ((ch & (uint)0xFC) == (uint)0xFC) {
557 // Six-byte UTF-8 character.
558 leftBits = (ch & (uint)0x03);
559 leftSoFar = 1;
560 leftSize = 6;
561 } else {
562 // Invalid UTF-8 start character.
563 if (throwOnInvalid) {
564 throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
567 } else {
568 // Process an extra byte in a multi-byte sequence.
569 if ((ch & (uint)0xC0) == (uint)0x80) {
570 leftBits = ((leftBits << 6) | (ch & (uint)0x3F));
571 if (++leftSoFar >= leftSize) {
572 // We have a complete character now.
573 if (leftBits < (uint)0x10000) {
574 if (leftBits != (uint)0xFEFF) {
575 // is it an overlong ?
576 bool overlong = false;
577 switch (leftSize) {
578 case 2:
579 overlong = (leftBits <= 0x7F);
580 break;
581 case 3:
582 overlong = (leftBits <= 0x07FF);
583 break;
584 case 4:
585 overlong = (leftBits <= 0xFFFF);
586 break;
587 case 5:
588 overlong = (leftBits <= 0x1FFFFF);
589 break;
590 case 6:
591 overlong = (leftBits <= 0x03FFFFFF);
592 break;
594 if (overlong) {
595 if (throwOnInvalid)
596 throw new ArgumentException (_("Overlong"), leftBits.ToString ());
598 else {
599 if (posn >= length) {
600 throw new ArgumentException
601 (_("Arg_InsufficientSpace"), "chars");
603 chars[posn++] = (char)leftBits;
606 } else if (leftBits < (uint)0x110000) {
607 if ((posn + 2) > length) {
608 throw new ArgumentException
609 (_("Arg_InsufficientSpace"), "chars");
611 leftBits -= (uint)0x10000;
612 chars[posn++] = (char)((leftBits >> 10) +
613 (uint)0xD800);
614 chars[posn++] =
615 (char)((leftBits & (uint)0x3FF) + (uint)0xDC00);
616 } else if (throwOnInvalid) {
617 throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
619 leftSize = 0;
621 } else {
622 // Invalid UTF-8 sequence: clear and restart.
623 if (throwOnInvalid) {
624 throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
626 leftSize = 0;
627 --byteIndex;
628 ++byteCount;
632 if (flush && leftSize != 0 && throwOnInvalid) {
633 // We had left-over bytes that didn't make up
634 // a complete UTF-8 character sequence.
635 throw new ArgumentException (_("Arg_InvalidUTF8"), "bytes");
637 leftOverBits = leftBits;
638 leftOverCount = (leftSoFar | (leftSize << 4));
640 // Return the final length to the caller.
641 return posn - charIndex;
644 // Get the characters that result from decoding a byte buffer.
645 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
646 char[] chars, int charIndex)
648 uint leftOverBits = 0;
649 uint leftOverCount = 0;
650 return InternalGetChars (bytes, byteIndex, byteCount, chars,
651 charIndex, ref leftOverBits, ref leftOverCount, throwOnInvalid, true);
654 // Get the maximum number of bytes needed to encode a
655 // specified number of characters.
656 public override int GetMaxByteCount (int charCount)
658 if (charCount < 0) {
659 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
661 return charCount * 4;
664 // Get the maximum number of characters needed to decode a
665 // specified number of bytes.
666 public override int GetMaxCharCount (int byteCount)
668 if (byteCount < 0) {
669 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_NonNegative"));
671 return byteCount;
674 // Get a UTF8-specific decoder that is attached to this instance.
675 public override Decoder GetDecoder ()
677 return new UTF8Decoder (throwOnInvalid);
680 // Get a UTF8-specific encoder that is attached to this instance.
681 public override Encoder GetEncoder ()
683 return new UTF8Encoder (emitIdentifier);
686 // Get the UTF8 preamble.
687 public override byte[] GetPreamble ()
689 if (emitIdentifier) {
690 byte[] pre = new byte [3];
691 pre[0] = (byte)0xEF;
692 pre[1] = (byte)0xBB;
693 pre[2] = (byte)0xBF;
694 return pre;
695 } else {
696 return new byte [0];
700 // Determine if this object is equal to another.
701 public override bool Equals (Object value)
703 UTF8Encoding enc = (value as UTF8Encoding);
704 if (enc != null) {
705 return (codePage == enc.codePage &&
706 emitIdentifier == enc.emitIdentifier &&
707 throwOnInvalid == enc.throwOnInvalid);
708 } else {
709 return false;
713 // Get the hash code for this object.
714 public override int GetHashCode ()
716 return base.GetHashCode ();
719 public override byte [] GetBytes (String s)
721 if (s == null)
722 throw new ArgumentNullException ("s");
724 int length = GetByteCount (s);
725 byte [] bytes = new byte [length];
726 GetBytes (s, 0, s.Length, bytes, 0);
727 return bytes;
730 // UTF-8 decoder implementation.
731 [Serializable]
732 private class UTF8Decoder : Decoder
734 private bool throwOnInvalid;
735 private uint leftOverBits;
736 private uint leftOverCount;
738 // Constructor.
739 public UTF8Decoder (bool throwOnInvalid)
741 this.throwOnInvalid = throwOnInvalid;
742 leftOverBits = 0;
743 leftOverCount = 0;
746 // Override inherited methods.
747 public override int GetCharCount (byte[] bytes, int index, int count)
749 return InternalGetCharCount (bytes, index, count,
750 leftOverBits, leftOverCount, throwOnInvalid, false);
752 public override int GetChars (byte[] bytes, int byteIndex,
753 int byteCount, char[] chars, int charIndex)
755 return InternalGetChars (bytes, byteIndex, byteCount,
756 chars, charIndex, ref leftOverBits, ref leftOverCount, throwOnInvalid, false);
759 } // class UTF8Decoder
761 // UTF-8 encoder implementation.
762 [Serializable]
763 private class UTF8Encoder : Encoder
765 private bool emitIdentifier;
766 private uint leftOver;
768 // Constructor.
769 public UTF8Encoder (bool emitIdentifier)
771 this.emitIdentifier = emitIdentifier;
772 leftOver = 0;
775 // Override inherited methods.
776 public override int GetByteCount (char[] chars, int index,
777 int count, bool flush)
779 return InternalGetByteCount (chars, index, count, leftOver, flush);
781 public override int GetBytes (char[] chars, int charIndex,
782 int charCount, byte[] bytes, int byteCount, bool flush)
784 int result;
785 result = InternalGetBytes (chars, charIndex, charCount, bytes, byteCount, ref leftOver, flush);
786 emitIdentifier = false;
787 return result;
790 } // class UTF8Encoder
792 }; // class UTF8Encoding
794 }; // namespace System.Text