3 // Copyright (c) Microsoft Corporation. All rights reserved.
9 using System
.Collections
;
10 using System
.Collections
.Generic
;
12 using System
.Runtime
.Remoting
;
13 using System
.Runtime
.Serialization
;
14 using System
.Globalization
;
15 using System
.Security
;
16 using System
.Security
.Permissions
;
17 using System
.Threading
;
19 using System
.Diagnostics
.CodeAnalysis
;
20 using System
.Diagnostics
.Contracts
;
21 #if FEATURE_CODEPAGES_FILE
22 using Win32Native
= Microsoft
.Win32
.Win32Native
;
25 // This abstract base class represents a character encoding. The class provides
26 // methods to convert arrays and strings of Unicode characters to and from
27 // arrays of bytes. A number of Encoding implementations are provided in
28 // the System.Text package, including:
30 // ASCIIEncoding, which encodes Unicode characters as single 7-bit
31 // ASCII characters. This encoding only supports character values between 0x00
33 // BaseCodePageEncoding, which encapsulates a Windows code page. Any
34 // installed code page can be accessed through this encoding, and conversions
35 // are performed using the WideCharToMultiByte and
36 // MultiByteToWideChar Windows API functions.
37 // UnicodeEncoding, which encodes each Unicode character as two
38 // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
39 // page 1201) encodings are recognized.
40 // UTF7Encoding, which encodes Unicode characters using the UTF-7
41 // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
42 // encoding supports all Unicode character values, and can also be accessed
43 // as code page 65000.
44 // UTF8Encoding, which encodes Unicode characters using the UTF-8
45 // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
46 // encoding supports all Unicode character values, and can also be accessed
47 // as code page 65001.
48 // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
50 // In addition to directly instantiating Encoding objects, an
51 // application can use the ForCodePage, GetASCII,
52 // GetDefault, GetUnicode, GetUTF7, and GetUTF8
53 // methods in this class to obtain encodings.
55 // Through an encoding, the GetBytes method is used to convert arrays
56 // of characters to arrays of bytes, and the GetChars method is used to
57 // convert arrays of bytes to arrays of characters. The GetBytes and
58 // GetChars methods maintain no state between conversions, and are
59 // generally intended for conversions of complete blocks of bytes and
60 // characters in one operation. When the data to be converted is only available
61 // in sequential blocks (such as data read from a stream) or when the amount of
62 // data is so large that it needs to be divided into smaller blocks, an
63 // application may choose to use a Decoder or an Encoder to
64 // perform the conversion. Decoders and encoders allow sequential blocks of
65 // data to be converted and they maintain the state required to support
66 // conversions of data that spans adjacent blocks. Decoders and encoders are
67 // obtained using the GetDecoder and GetEncoder methods.
69 // The core GetBytes and GetChars methods require the caller
70 // to provide the destination buffer and ensure that the buffer is large enough
71 // to hold the entire result of the conversion. When using these methods,
72 // either directly on an Encoding object or on an associated
73 // Decoder or Encoder, an application can use one of two methods
74 // to allocate destination buffers.
76 // The GetByteCount and GetCharCount methods can be used to
77 // compute the exact size of the result of a particular conversion, and an
78 // appropriately sized buffer for that conversion can then be allocated.
79 // The GetMaxByteCount and GetMaxCharCount methods can be
80 // be used to compute the maximum possible size of a conversion of a given
81 // number of bytes or characters, and a buffer of that size can then be reused
82 // for multiple conversions.
84 // The first method generally uses less memory, whereas the second method
85 // generally executes faster.
88 [System
.Runtime
.InteropServices
.ComVisible(true)]
90 public abstract class Encoding
: ICloneable
92 private static volatile Encoding defaultEncoding
;
93 private static volatile Encoding unicodeEncoding
;
94 private static volatile Encoding bigEndianUnicode
;
96 private static volatile Encoding utf7Encoding
;
98 private static volatile Encoding utf8Encoding
;
100 private static volatile Encoding utf32Encoding
;
103 private static volatile Encoding asciiEncoding
;
106 private static volatile Encoding latin1Encoding
;
108 static volatile Hashtable encodings
;
111 // The following values are from mlang.idl. These values
112 // should be in sync with those in mlang.idl.
114 private const int MIMECONTF_MAILNEWS
= 0x00000001;
115 private const int MIMECONTF_BROWSER
= 0x00000002;
116 private const int MIMECONTF_SAVABLE_MAILNEWS
= 0x00000100;
117 private const int MIMECONTF_SAVABLE_BROWSER
= 0x00000200;
119 // Special Case Code Pages
120 private const int CodePageDefault
= 0;
121 private const int CodePageNoOEM
= 1; // OEM Code page not supported
122 private const int CodePageNoMac
= 2; // MAC code page not supported
123 private const int CodePageNoThread
= 3; // Thread code page not supported
124 private const int CodePageNoSymbol
= 42; // Symbol code page not supported
125 private const int CodePageUnicode
= 1200; // Unicode
126 private const int CodePageBigEndian
= 1201; // Big Endian Unicode
127 private const int CodePageWindows1252
= 1252; // Windows 1252 code page
129 // 20936 has same code page as 10008, so we'll special case it
130 private const int CodePageMacGB2312
= 10008;
131 private const int CodePageGB2312
= 20936;
132 private const int CodePageMacKorean
= 10003;
133 private const int CodePageDLLKorean
= 20949;
135 // ISO 2022 Code Pages
136 private const int ISO2022JP
= 50220;
137 private const int ISO2022JPESC
= 50221;
138 private const int ISO2022JPSISO
= 50222;
139 private const int ISOKorean
= 50225;
140 private const int ISOSimplifiedCN
= 50227;
141 private const int EUCJP
= 51932;
142 private const int ChineseHZ
= 52936; // HZ has ~}~{~~ sequences
144 // 51936 is the same as 936
145 private const int DuplicateEUCCN
= 51936;
146 private const int EUCCN
= 936;
148 private const int EUCKR
= 51949;
150 // Latin 1 & ASCII Code Pages
151 internal const int CodePageASCII
= 20127; // ASCII
152 internal const int ISO_8859_1
= 28591; // Latin1
155 private const int ISCIIAssemese
= 57006;
156 private const int ISCIIBengali
= 57003;
157 private const int ISCIIDevanagari
= 57002;
158 private const int ISCIIGujarathi
= 57010;
159 private const int ISCIIKannada
= 57008;
160 private const int ISCIIMalayalam
= 57009;
161 private const int ISCIIOriya
= 57007;
162 private const int ISCIIPanjabi
= 57011;
163 private const int ISCIITamil
= 57004;
164 private const int ISCIITelugu
= 57005;
167 private const int GB18030
= 54936;
170 private const int ISO_8859_8I
= 38598;
171 private const int ISO_8859_8_Visual
= 28598;
173 // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
174 private const int ENC50229
= 50229;
176 // Special code pages
177 private const int CodePageUTF7
= 65000;
178 private const int CodePageUTF8
= 65001;
179 private const int CodePageUTF32
= 12000;
180 private const int CodePageUTF32BE
= 12001;
182 internal int m_codePage
= 0;
184 // dataItem should be internal (not private). otherwise it will break during the deserialization
185 // of the data came from Everett
186 internal CodePageDataItem dataItem
= null;
189 internal bool m_deserializedFromEverett
= false;
191 // Because of encoders we may be read only
192 [OptionalField(VersionAdded
= 2)]
193 private bool m_isReadOnly
= true;
195 // Encoding (encoder) fallback
196 [OptionalField(VersionAdded
= 2)]
197 internal EncoderFallback encoderFallback
= null;
198 [OptionalField(VersionAdded
= 2)]
199 internal DecoderFallback decoderFallback
= null;
201 protected Encoding() : this(0)
206 protected Encoding(int codePage
)
208 // Validate code page
211 throw new ArgumentOutOfRangeException("codePage");
213 Contract
.EndContractBlock();
215 // Remember code page
216 m_codePage
= codePage
;
218 // Use default encoder/decoder fallbacks
219 this.SetDefaultFallbacks();
222 // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
223 // because the encoding object is always created as read-only object and don’t allow setting encoder/decoder fallback
224 // after the creation is done.
225 protected Encoding(int codePage
, EncoderFallback encoderFallback
, DecoderFallback decoderFallback
)
227 // Validate code page
230 throw new ArgumentOutOfRangeException("codePage");
232 Contract
.EndContractBlock();
234 // Remember code page
235 m_codePage
= codePage
;
237 this.encoderFallback
= encoderFallback
?? new InternalEncoderBestFitFallback(this);
238 this.decoderFallback
= decoderFallback
?? new InternalDecoderBestFitFallback(this);
241 // Default fallback that we'll use.
242 internal virtual void SetDefaultFallbacks()
244 // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
245 // For ASCII we use "?" replacement fallback, etc.
246 this.encoderFallback
= new InternalEncoderBestFitFallback(this);
247 this.decoderFallback
= new InternalDecoderBestFitFallback(this);
251 #region Serialization
252 internal void OnDeserializing()
254 // intialize the optional Whidbey fields
255 encoderFallback
= null;
256 decoderFallback
= null;
260 internal void OnDeserialized()
262 if (encoderFallback
== null || decoderFallback
== null)
264 m_deserializedFromEverett
= true;
265 SetDefaultFallbacks();
268 // dataItem is always recalculated from the code page #
273 private void OnDeserializing(StreamingContext ctx
)
280 private void OnDeserialized(StreamingContext ctx
)
286 private void OnSerializing(StreamingContext ctx
)
288 // to be consistent with SerializeEncoding
292 // the following two methods are used for the inherited classes which implemented ISerializable
293 // Deserialization Helper
294 internal void DeserializeEncoding(SerializationInfo info
, StreamingContext context
)
297 if (info
==null) throw new ArgumentNullException("info");
298 Contract
.EndContractBlock();
300 // All versions have a code page
301 this.m_codePage
= (int)info
.GetValue("m_codePage", typeof(int));
303 // We can get dataItem on the fly if needed, and the index is different between versions
304 // so ignore whatever dataItem data we get from Everett.
305 this.dataItem
= null;
307 // See if we have a code page
311 // Try Whidbey V2.0 Fields
314 this.m_isReadOnly
= (bool)info
.GetValue("m_isReadOnly", typeof(bool));
316 this.encoderFallback
= (EncoderFallback
)info
.GetValue("encoderFallback", typeof(EncoderFallback
));
317 this.decoderFallback
= (DecoderFallback
)info
.GetValue("decoderFallback", typeof(DecoderFallback
));
319 catch (SerializationException
)
322 // Didn't have Whidbey things, must be Everett
324 this.m_deserializedFromEverett
= true;
326 // May as well be read only
327 this.m_isReadOnly
= true;
328 SetDefaultFallbacks();
332 // Serialization Helper
333 internal void SerializeEncoding(SerializationInfo info
, StreamingContext context
)
336 if (info
==null) throw new ArgumentNullException("info");
337 Contract
.EndContractBlock();
339 // These are new V2.0 Whidbey stuff
340 info
.AddValue("m_isReadOnly", this.m_isReadOnly
);
341 info
.AddValue("encoderFallback", this.EncoderFallback
);
342 info
.AddValue("decoderFallback", this.DecoderFallback
);
344 // These were in Everett V1.1 as well
345 info
.AddValue("m_codePage", this.m_codePage
);
347 // This was unique to Everett V1.1
348 info
.AddValue("dataItem", null);
350 // Everett duplicated these fields, so these are needed for portability
351 info
.AddValue("Encoding+m_codePage", this.m_codePage
);
352 info
.AddValue("Encoding+dataItem", null);
355 #endregion Serialization
357 // Converts a byte array from one encoding to another. The bytes in the
358 // bytes array are converted from srcEncoding to
359 // dstEncoding, and the returned value is a new byte array
360 // containing the result of the conversion.
363 public static byte[] Convert(Encoding srcEncoding
, Encoding dstEncoding
,
366 throw new ArgumentNullException("bytes");
367 Contract
.Ensures(Contract
.Result
<byte[]>() != null);
369 return Convert(srcEncoding
, dstEncoding
, bytes
, 0, bytes
.Length
);
372 // Converts a range of bytes in a byte array from one encoding to another.
373 // This method converts count bytes from bytes starting at
374 // index index from srcEncoding to dstEncoding, and
375 // returns a new byte array containing the result of the conversion.
378 public static byte[] Convert(Encoding srcEncoding
, Encoding dstEncoding
,
379 byte[] bytes
, int index
, int count
) {
380 if (srcEncoding
== null || dstEncoding
== null) {
381 throw new ArgumentNullException((srcEncoding
== null ? "srcEncoding" : "dstEncoding"),
382 Environment
.GetResourceString("ArgumentNull_Array"));
385 throw new ArgumentNullException("bytes",
386 Environment
.GetResourceString("ArgumentNull_Array"));
388 Contract
.Ensures(Contract
.Result
<byte[]>() != null);
390 return dstEncoding
.GetBytes(srcEncoding
.GetChars(bytes
, index
, count
));
393 // Private object for locking instead of locking on a public type for SQL reliability work.
394 private static Object s_InternalSyncObject
;
395 private static Object InternalSyncObject
{
397 if (s_InternalSyncObject
== null) {
398 Object o
= new Object();
399 Interlocked
.CompareExchange
<Object
>(ref s_InternalSyncObject
, o
, null);
401 return s_InternalSyncObject
;
406 [System
.Security
.SecurityCritical
]
408 public static void RegisterProvider(EncodingProvider provider
)
410 // Parameters validated inside EncodingProvider
411 EncodingProvider
.AddProvider(provider
);
416 [System
.Security
.SecuritySafeCritical
] // auto-generated
418 public static Encoding
GetEncoding(int codepage
)
420 Encoding result
= EncodingProvider
.GetEncodingFromProvider(codepage
);
425 // NOTE: If you add a new encoding that can be get by codepage, be sure to
426 // add the corresponding item in EncodingTable.
427 // Otherwise, the code below will throw exception when trying to call
428 // EncodingTable.GetDataItem().
430 if (codepage
< 0 || codepage
> 65535) {
431 throw new ArgumentOutOfRangeException(
432 "codepage", Environment
.GetResourceString("ArgumentOutOfRange_Range",
436 Contract
.EndContractBlock();
440 // See if we have a hash table with our encoding in it already.
441 if (encodings
!= null) {
442 result
= (Encoding
)encodings
[codepage
];
447 // Don't conflict with ourselves
448 lock (InternalSyncObject
)
450 // Need a new hash table
451 // in case another thread beat us to creating the Dictionary
452 if (encodings
== null) {
453 encodings
= new Hashtable();
456 // Double check that we don't have one in the table (in case another thread beat us here)
457 if ((result
= (Encoding
)encodings
[codepage
]) != null)
460 // Special case the commonly used Encoding classes here, then call
461 // GetEncodingRare to avoid loading classes like MLangCodePageEncoding
462 // and ASCIIEncoding. ASP.NET uses UTF-8 & ISO-8859-1.
465 case CodePageDefault
: // 0, default code page
466 result
= Encoding
.Default
;
468 case CodePageUnicode
: // 1200, Unicode
471 case CodePageBigEndian
: // 1201, big endian unicode
472 result
= BigEndianUnicode
;
474 #if FEATURE_CODEPAGES_FILE
475 case CodePageWindows1252
: // 1252, Windows
476 result
= new SBCSCodePageEncoding(codepage
);
481 // on desktop, UTF7 is handled by GetEncodingRare.
482 // On Coreclr, we handle this directly without bringing GetEncodingRare, so that we get real UTF-7 encoding.
483 case CodePageUTF7
: // 65000, UTF7
489 case CodePageUTF32
: // 12000
492 case CodePageUTF32BE
: // 12001
493 result
= new UTF32Encoding(true, true);
498 case CodePageUTF8
: // 65001, UTF8
502 // These are (hopefully) not very common, but also shouldn't slow us down much and make default
503 // case able to handle more code pages by calling GetEncodingCodePage
504 case CodePageNoOEM
: // 1
505 case CodePageNoMac
: // 2
506 case CodePageNoThread
: // 3
507 case CodePageNoSymbol
: // 42
508 // Win32 also allows the following special code page values. We won't allow them except in the
510 // #define CP_ACP 0 // default to ANSI code page
511 // #define CP_OEMCP 1 // default to OEM code page
512 // #define CP_MACCP 2 // default to MAC code page
513 // #define CP_THREAD_ACP 3 // current thread's ANSI code page
514 // #define CP_SYMBOL 42 // SYMBOL translations
515 throw new ArgumentException(Environment
.GetResourceString(
516 "Argument_CodepageNotSupported", codepage
), "codepage");
518 // Have to do ASCII and Latin 1 first so they don't get loaded as code pages
519 case CodePageASCII
: // 20127
524 case ISO_8859_1
: // 28591
530 #if FEATURE_CODEPAGES_FILE
531 // 1st assume its a code page.
532 result
= GetEncodingCodePage(codepage
);
534 result
= GetEncodingRare(codepage
);
537 // Is it a valid code page?
538 if (EncodingTable
.GetCodePageDataItem(codepage
) == null)
540 throw new NotSupportedException(
541 Environment
.GetResourceString("NotSupported_NoCodepageData", codepage
));
543 #if MONO_HYBRID_ENCODING_SUPPORT
545 case CodePageUTF32
: // 12000
548 case CodePageUTF32BE
: // 12001
549 result
= new UTF32Encoding(true, true);
552 result
= (Encoding
)(EncodingHelper
.InvokeI18N ("GetEncoding", codepage
));
554 throw new NotSupportedException(string.Format("Encoding {0} data could not be found. Make sure you have correct international codeset assembly installed and enabled.", codepage
));
561 #endif // FEATURE_CODEPAGES_FILE
564 encodings
.Add(codepage
, result
);
572 public static Encoding
GetEncoding(int codepage
,
573 EncoderFallback encoderFallback
, DecoderFallback decoderFallback
)
575 Encoding baseEncoding
= EncodingProvider
.GetEncodingFromProvider(codepage
, encoderFallback
, decoderFallback
);
577 if (baseEncoding
!= null)
580 // Get the default encoding (which is cached and read only)
581 baseEncoding
= GetEncoding(codepage
);
583 // Clone it and set the fallback
584 Encoding fallbackEncoding
= (Encoding
)baseEncoding
.Clone();
585 fallbackEncoding
.EncoderFallback
= encoderFallback
;
586 fallbackEncoding
.DecoderFallback
= decoderFallback
;
588 return fallbackEncoding
;
590 #if FEATURE_CODEPAGES_FILE
591 [System
.Security
.SecurityCritical
] // auto-generated
592 private static Encoding
GetEncodingRare(int codepage
)
594 Contract
.Assert(codepage
!= 0 && codepage
!= 1200 && codepage
!= 1201 && codepage
!= 65001,
595 "[Encoding.GetEncodingRare]This code page (" + codepage
+ ") isn't supported by GetEncodingRare!");
599 case CodePageUTF7
: // 65000
602 case CodePageUTF32
: // 12000
605 case CodePageUTF32BE
: // 12001
606 result
= new UTF32Encoding(true, true);
610 case ISCIIDevanagari
:
618 result
= new ISCIIEncoding(codepage
);
620 // GB2312-80 uses same code page for 20936 and mac 10008
621 case CodePageMacGB2312
:
622 // case CodePageGB2312:
623 // result = new DBCSCodePageEncoding(codepage, EUCCN);
624 result
= new DBCSCodePageEncoding(CodePageMacGB2312
, CodePageGB2312
);
627 // Mac Korean 10003 and 20949 are the same
628 case CodePageMacKorean
:
629 result
= new DBCSCodePageEncoding(CodePageMacKorean
, CodePageDLLKorean
);
631 // GB18030 Code Pages
633 result
= new GB18030Encoding();
635 // ISO2022 Code Pages
637 // case ISOSimplifiedCN
639 case ISO2022JP
: // JIS JP, full-width Katakana mode (no half-width Katakana)
640 case ISO2022JPESC
: // JIS JP, esc sequence to do Katakana.
641 case ISO2022JPSISO
: // JIS JP with Shift In/ Shift Out Katakana support
642 result
= new ISO2022Encoding(codepage
);
644 // Duplicate EUC-CN (51936) just calls a base code page 936,
645 // so does ISOSimplifiedCN (50227), which's gotta be broken
647 case ISOSimplifiedCN
:
648 result
= new DBCSCodePageEncoding(codepage
, EUCCN
); // Just maps to 936
651 result
= new EUCJPEncoding();
654 result
= new DBCSCodePageEncoding(codepage
, CodePageDLLKorean
); // Maps to 20949
657 throw new NotSupportedException(Environment
.GetResourceString("NotSupported_CodePage50229"));
659 result
= new SBCSCodePageEncoding(codepage
, ISO_8859_8_Visual
); // Hebrew maps to a different code page
662 // Not found, already tried codepage table code pages in GetEncoding()
663 throw new NotSupportedException(
664 Environment
.GetResourceString("NotSupported_NoCodepageData", codepage
));
669 [System
.Security
.SecurityCritical
] // auto-generated
670 private static Encoding
GetEncodingCodePage(int CodePage
)
672 // Single Byte or Double Byte Code Page? (0 if not found)
673 int i
= BaseCodePageEncoding
.GetCodePageByteSize(CodePage
);
674 if (i
== 1) return new SBCSCodePageEncoding(CodePage
);
675 else if (i
== 2) return new DBCSCodePageEncoding(CodePage
);
677 // Return null if we didn't find one.
680 #endif // FEATURE_CODEPAGES_FILE
681 // Returns an Encoding object for a given name or a given code page value.
684 public static Encoding
GetEncoding(String name
)
686 Encoding baseEncoding
= EncodingProvider
.GetEncodingFromProvider(name
);
687 if (baseEncoding
!= null)
691 // NOTE: If you add a new encoding that can be requested by name, be sure to
692 // add the corresponding item in EncodingTable.
693 // Otherwise, the code below will throw exception when trying to call
694 // EncodingTable.GetCodePageFromName().
696 return (GetEncoding(EncodingTable
.GetCodePageFromName(name
)));
699 // Returns an Encoding object for a given name or a given code page value.
702 public static Encoding
GetEncoding(String name
,
703 EncoderFallback encoderFallback
, DecoderFallback decoderFallback
)
705 Encoding baseEncoding
= EncodingProvider
.GetEncodingFromProvider(name
, encoderFallback
, decoderFallback
);
706 if (baseEncoding
!= null)
710 // NOTE: If you add a new encoding that can be requested by name, be sure to
711 // add the corresponding item in EncodingTable.
712 // Otherwise, the code below will throw exception when trying to call
713 // EncodingTable.GetCodePageFromName().
715 return (GetEncoding(EncodingTable
.GetCodePageFromName(name
), encoderFallback
, decoderFallback
));
718 // Return a list of all EncodingInfo objects describing all of our encodings
720 public static EncodingInfo
[] GetEncodings()
722 return EncodingTable
.GetEncodings();
726 public virtual byte[] GetPreamble()
728 return EmptyArray
<Byte
>.Value
;
731 private void GetDataItem() {
732 if (dataItem
==null) {
733 dataItem
= EncodingTable
.GetCodePageDataItem(m_codePage
);
735 throw new NotSupportedException(
736 Environment
.GetResourceString("NotSupported_NoCodepageData", m_codePage
));
741 // Returns the name for this encoding that can be used with mail agent body tags.
742 // If the encoding may not be used, the string is empty.
744 public virtual String BodyName
748 if (dataItem
==null) {
751 return (dataItem
.BodyName
);
755 // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
757 public virtual String EncodingName
762 return (Environment
.GetResourceStringEncodingName(m_codePage
));
764 return (Environment
.GetResourceString("Globalization.cp_" + m_codePage
));
769 // Returns the name for this encoding that can be used with mail agent header
770 // tags. If the encoding may not be used, the string is empty.
772 public virtual String HeaderName
776 if (dataItem
==null) {
779 return (dataItem
.HeaderName
);
783 // Returns the array of IANA-registered names for this encoding. If there is an
784 // IANA preferred name, it is the first name in the array.
786 public virtual String WebName
790 if (dataItem
==null) {
793 return (dataItem
.WebName
);
797 // Returns the windows code page that most closely corresponds to this encoding.
799 public virtual int WindowsCodePage
803 if (dataItem
==null) {
806 return (dataItem
.UIFamilyCodePage
);
811 // True if and only if the encoding is used for display by browsers clients.
813 public virtual bool IsBrowserDisplay
{
815 if (dataItem
==null) {
818 return ((dataItem
.Flags
& MIMECONTF_BROWSER
) != 0);
822 // True if and only if the encoding is used for saving by browsers clients.
824 public virtual bool IsBrowserSave
{
826 if (dataItem
==null) {
829 return ((dataItem
.Flags
& MIMECONTF_SAVABLE_BROWSER
) != 0);
833 // True if and only if the encoding is used for display by mail and news clients.
835 public virtual bool IsMailNewsDisplay
{
837 if (dataItem
==null) {
840 return ((dataItem
.Flags
& MIMECONTF_MAILNEWS
) != 0);
845 // True if and only if the encoding is used for saving documents by mail and
848 public virtual bool IsMailNewsSave
{
850 if (dataItem
==null) {
853 return ((dataItem
.Flags
& MIMECONTF_SAVABLE_MAILNEWS
) != 0);
857 // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
859 [System
.Runtime
.InteropServices
.ComVisible(false)]
860 public virtual bool IsSingleByte
869 [System
.Runtime
.InteropServices
.ComVisible(false)]
870 public EncoderFallback EncoderFallback
874 return encoderFallback
;
880 throw new InvalidOperationException(Environment
.GetResourceString("InvalidOperation_ReadOnly"));
883 throw new ArgumentNullException("value");
884 Contract
.EndContractBlock();
886 encoderFallback
= value;
891 [System
.Runtime
.InteropServices
.ComVisible(false)]
892 public DecoderFallback DecoderFallback
896 return decoderFallback
;
902 throw new InvalidOperationException(Environment
.GetResourceString("InvalidOperation_ReadOnly"));
905 throw new ArgumentNullException("value");
906 Contract
.EndContractBlock();
908 decoderFallback
= value;
913 [System
.Runtime
.InteropServices
.ComVisible(false)]
914 public virtual Object
Clone()
916 Encoding newEncoding
= (Encoding
)this.MemberwiseClone();
918 // New one should be readable
919 newEncoding
.m_isReadOnly
= false;
924 [System
.Runtime
.InteropServices
.ComVisible(false)]
925 public bool IsReadOnly
929 return (m_isReadOnly
);
935 // Returns an encoding for the ASCII character set. The returned encoding
936 // will be an instance of the ASCIIEncoding class.
939 public static Encoding ASCII
943 if (asciiEncoding
== null) asciiEncoding
= new ASCIIEncoding();
944 return asciiEncoding
;
950 // Returns an encoding for the Latin1 character set. The returned encoding
951 // will be an instance of the Latin1Encoding class.
953 // This is for our optimizations
954 private static Encoding Latin1
958 if (latin1Encoding
== null) latin1Encoding
= new Latin1Encoding();
959 return latin1Encoding
;
964 // Returns the number of bytes required to encode the given character
968 public virtual int GetByteCount(char[] chars
)
972 throw new ArgumentNullException("chars",
973 Environment
.GetResourceString("ArgumentNull_Array"));
975 Contract
.EndContractBlock();
977 return GetByteCount(chars
, 0, chars
.Length
);
981 public virtual int GetByteCount(String s
)
984 throw new ArgumentNullException("s");
985 Contract
.EndContractBlock();
987 char[] chars
= s
.ToCharArray();
988 return GetByteCount(chars
, 0, chars
.Length
);
992 // Returns the number of bytes required to encode a range of characters in
993 // a character array.
996 public abstract int GetByteCount(char[] chars
, int index
, int count
);
998 // We expect this to be the workhorse for NLS encodings
999 // unfortunately for existing overrides, it has to call the [] version,
1000 // which is really slow, so this method should be avoided if you're calling
1001 // a 3rd party encoding.
1003 [System
.Security
.SecurityCritical
] // auto-generated
1004 [CLSCompliant(false)]
1005 [System
.Runtime
.InteropServices
.ComVisible(false)]
1006 public virtual unsafe int GetByteCount(char* chars
, int count
)
1008 // Validate input parameters
1010 throw new ArgumentNullException("chars",
1011 Environment
.GetResourceString("ArgumentNull_Array"));
1014 throw new ArgumentOutOfRangeException("count",
1015 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1016 Contract
.EndContractBlock();
1018 char[] arrChar
= new char[count
];
1021 for (index
= 0; index
< count
; index
++)
1022 arrChar
[index
] = chars
[index
];
1024 return GetByteCount(arrChar
, 0, count
);
1027 // For NLS Encodings, workhorse takes an encoder (may be null)
1028 // Always validate parameters before calling internal version, which will only assert.
1029 [System
.Security
.SecurityCritical
] // auto-generated
1030 internal virtual unsafe int GetByteCount(char* chars
, int count
, EncoderNLS encoder
)
1032 Contract
.Requires(chars
!= null);
1033 Contract
.Requires(count
>= 0);
1035 return GetByteCount(chars
, count
);
1038 // Returns a byte array containing the encoded representation of the given
1042 public virtual byte[] GetBytes(char[] chars
)
1046 throw new ArgumentNullException("chars",
1047 Environment
.GetResourceString("ArgumentNull_Array"));
1049 Contract
.EndContractBlock();
1050 return GetBytes(chars
, 0, chars
.Length
);
1053 // Returns a byte array containing the encoded representation of a range
1054 // of characters in a character array.
1057 public virtual byte[] GetBytes(char[] chars
, int index
, int count
)
1059 byte[] result
= new byte[GetByteCount(chars
, index
, count
)];
1060 GetBytes(chars
, index
, count
, result
, 0);
1064 // Encodes a range of characters in a character array into a range of bytes
1065 // in a byte array. An exception occurs if the byte array is not large
1066 // enough to hold the complete encoding of the characters. The
1067 // GetByteCount method can be used to determine the exact number of
1068 // bytes that will be produced for a given range of characters.
1069 // Alternatively, the GetMaxByteCount method can be used to
1070 // determine the maximum number of bytes that will be produced for a given
1071 // number of characters, regardless of the actual character values.
1073 public abstract int GetBytes(char[] chars
, int charIndex
, int charCount
,
1074 byte[] bytes
, int byteIndex
);
1076 // Returns a byte array containing the encoded representation of the given
1080 public virtual byte[] GetBytes(String s
)
1083 throw new ArgumentNullException("s",
1084 Environment
.GetResourceString("ArgumentNull_String"));
1085 Contract
.EndContractBlock();
1087 int byteCount
= GetByteCount(s
);
1088 byte[] bytes
= new byte[byteCount
];
1089 int bytesReceived
= GetBytes(s
, 0, s
.Length
, bytes
, 0);
1090 Contract
.Assert(byteCount
== bytesReceived
);
1094 public virtual int GetBytes(String s
, int charIndex
, int charCount
,
1095 byte[] bytes
, int byteIndex
)
1098 throw new ArgumentNullException("s");
1099 Contract
.EndContractBlock();
1100 return GetBytes(s
.ToCharArray(), charIndex
, charCount
, bytes
, byteIndex
);
1103 // This is our internal workhorse
1104 // Always validate parameters before calling internal version, which will only assert.
1105 [System
.Security
.SecurityCritical
] // auto-generated
1106 internal virtual unsafe int GetBytes(char* chars
, int charCount
,
1107 byte* bytes
, int byteCount
, EncoderNLS encoder
)
1109 return GetBytes(chars
, charCount
, bytes
, byteCount
);
1112 // We expect this to be the workhorse for NLS Encodings, but for existing
1113 // ones we need a working (if slow) default implimentation)
1115 // WARNING WARNING WARNING
1117 // WARNING: If this breaks it could be a security threat. Obviously we
1118 // call this internally, so you need to make sure that your pointers, counts
1119 // and indexes are correct when you call this method.
1121 // In addition, we have internal code, which will be marked as "safe" calling
1122 // this code. However this code is dependent upon the implimentation of an
1123 // external GetBytes() method, which could be overridden by a third party and
1124 // the results of which cannot be guaranteed. We use that result to copy
1125 // the byte[] to our byte* output buffer. If the result count was wrong, we
1126 // could easily overflow our output buffer. Therefore we do an extra test
1127 // when we copy the buffer so that we don't overflow byteCount either.
1129 [System
.Security
.SecurityCritical
] // auto-generated
1130 [CLSCompliant(false)]
1131 [System
.Runtime
.InteropServices
.ComVisible(false)]
1132 public virtual unsafe int GetBytes(char* chars
, int charCount
,
1133 byte* bytes
, int byteCount
)
1135 // Validate input parameters
1136 if (bytes
== null || chars
== null)
1137 throw new ArgumentNullException(bytes
== null ? "bytes" : "chars",
1138 Environment
.GetResourceString("ArgumentNull_Array"));
1140 if (charCount
< 0 || byteCount
< 0)
1141 throw new ArgumentOutOfRangeException((charCount
<0 ? "charCount" : "byteCount"),
1142 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1143 Contract
.EndContractBlock();
1145 // Get the char array to convert
1146 char[] arrChar
= new char[charCount
];
1149 for (index
= 0; index
< charCount
; index
++)
1150 arrChar
[index
] = chars
[index
];
1152 // Get the byte array to fill
1153 byte[] arrByte
= new byte[byteCount
];
1156 int result
= GetBytes(arrChar
, 0, charCount
, arrByte
, 0);
1158 // The only way this could fail is a bug in GetBytes
1159 Contract
.Assert(result
<= byteCount
, "[Encoding.GetBytes]Returned more bytes than we have space for");
1161 // Copy the byte array
1162 // WARNING: We MUST make sure that we don't copy too many bytes. We can't
1163 // rely on result because it could be a 3rd party implimentation. We need
1164 // to make sure we never copy more than byteCount bytes no matter the value
1166 if (result
< byteCount
)
1169 // Copy the data, don't overrun our array!
1170 for (index
= 0; index
< byteCount
; index
++)
1171 bytes
[index
] = arrByte
[index
];
1176 // Returns the number of characters produced by decoding the given byte
1180 public virtual int GetCharCount(byte[] bytes
)
1184 throw new ArgumentNullException("bytes",
1185 Environment
.GetResourceString("ArgumentNull_Array"));
1187 Contract
.EndContractBlock();
1188 return GetCharCount(bytes
, 0, bytes
.Length
);
1191 // Returns the number of characters produced by decoding a range of bytes
1195 public abstract int GetCharCount(byte[] bytes
, int index
, int count
);
1197 // We expect this to be the workhorse for NLS Encodings, but for existing
1198 // ones we need a working (if slow) default implimentation)
1200 [System
.Security
.SecurityCritical
] // auto-generated
1201 [CLSCompliant(false)]
1202 [System
.Runtime
.InteropServices
.ComVisible(false)]
1203 public virtual unsafe int GetCharCount(byte* bytes
, int count
)
1205 // Validate input parameters
1207 throw new ArgumentNullException("bytes",
1208 Environment
.GetResourceString("ArgumentNull_Array"));
1211 throw new ArgumentOutOfRangeException("count",
1212 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1213 Contract
.EndContractBlock();
1215 byte[] arrbyte
= new byte[count
];
1218 for (index
= 0; index
< count
; index
++)
1219 arrbyte
[index
] = bytes
[index
];
1221 return GetCharCount(arrbyte
, 0, count
);
1224 // This is our internal workhorse
1225 // Always validate parameters before calling internal version, which will only assert.
1226 [System
.Security
.SecurityCritical
] // auto-generated
1227 internal virtual unsafe int GetCharCount(byte* bytes
, int count
, DecoderNLS decoder
)
1229 return GetCharCount(bytes
, count
);
1232 // Returns a character array containing the decoded representation of a
1233 // given byte array.
1236 public virtual char[] GetChars(byte[] bytes
)
1240 throw new ArgumentNullException("bytes",
1241 Environment
.GetResourceString("ArgumentNull_Array"));
1243 Contract
.EndContractBlock();
1244 return GetChars(bytes
, 0, bytes
.Length
);
1247 // Returns a character array containing the decoded representation of a
1248 // range of bytes in a byte array.
1251 public virtual char[] GetChars(byte[] bytes
, int index
, int count
)
1253 char[] result
= new char[GetCharCount(bytes
, index
, count
)];
1254 GetChars(bytes
, index
, count
, result
, 0);
1258 // Decodes a range of bytes in a byte array into a range of characters in a
1259 // character array. An exception occurs if the character array is not large
1260 // enough to hold the complete decoding of the bytes. The
1261 // GetCharCount method can be used to determine the exact number of
1262 // characters that will be produced for a given range of bytes.
1263 // Alternatively, the GetMaxCharCount method can be used to
1264 // determine the maximum number of characterss that will be produced for a
1265 // given number of bytes, regardless of the actual byte values.
1268 public abstract int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
1269 char[] chars
, int charIndex
);
1272 // We expect this to be the workhorse for NLS Encodings, but for existing
1273 // ones we need a working (if slow) default implimentation)
1275 // WARNING WARNING WARNING
1277 // WARNING: If this breaks it could be a security threat. Obviously we
1278 // call this internally, so you need to make sure that your pointers, counts
1279 // and indexes are correct when you call this method.
1281 // In addition, we have internal code, which will be marked as "safe" calling
1282 // this code. However this code is dependent upon the implimentation of an
1283 // external GetChars() method, which could be overridden by a third party and
1284 // the results of which cannot be guaranteed. We use that result to copy
1285 // the char[] to our char* output buffer. If the result count was wrong, we
1286 // could easily overflow our output buffer. Therefore we do an extra test
1287 // when we copy the buffer so that we don't overflow charCount either.
1289 [System
.Security
.SecurityCritical
] // auto-generated
1290 [CLSCompliant(false)]
1291 [System
.Runtime
.InteropServices
.ComVisible(false)]
1292 public virtual unsafe int GetChars(byte* bytes
, int byteCount
,
1293 char* chars
, int charCount
)
1295 // Validate input parameters
1296 if (chars
== null || bytes
== null)
1297 throw new ArgumentNullException(chars
== null ? "chars" : "bytes",
1298 Environment
.GetResourceString("ArgumentNull_Array"));
1300 if (byteCount
< 0 || charCount
< 0)
1301 throw new ArgumentOutOfRangeException((byteCount
<0 ? "byteCount" : "charCount"),
1302 Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1303 Contract
.EndContractBlock();
1305 // Get the byte array to convert
1306 byte[] arrByte
= new byte[byteCount
];
1309 for (index
= 0; index
< byteCount
; index
++)
1310 arrByte
[index
] = bytes
[index
];
1312 // Get the char array to fill
1313 char[] arrChar
= new char[charCount
];
1316 int result
= GetChars(arrByte
, 0, byteCount
, arrChar
, 0);
1318 // The only way this could fail is a bug in GetChars
1319 Contract
.Assert(result
<= charCount
, "[Encoding.GetChars]Returned more chars than we have space for");
1321 // Copy the char array
1322 // WARNING: We MUST make sure that we don't copy too many chars. We can't
1323 // rely on result because it could be a 3rd party implimentation. We need
1324 // to make sure we never copy more than charCount chars no matter the value
1326 if (result
< charCount
)
1329 // Copy the data, don't overrun our array!
1330 for (index
= 0; index
< charCount
; index
++)
1331 chars
[index
] = arrChar
[index
];
1337 // This is our internal workhorse
1338 // Always validate parameters before calling internal version, which will only assert.
1339 [System
.Security
.SecurityCritical
] // auto-generated
1340 internal virtual unsafe int GetChars(byte* bytes
, int byteCount
,
1341 char* chars
, int charCount
, DecoderNLS decoder
)
1343 return GetChars(bytes
, byteCount
, chars
, charCount
);
1347 [System
.Security
.SecurityCritical
] // auto-generated
1348 [CLSCompliant(false)]
1349 [System
.Runtime
.InteropServices
.ComVisible(false)]
1350 public unsafe string GetString(byte* bytes
, int byteCount
)
1353 throw new ArgumentNullException("bytes", Environment
.GetResourceString("ArgumentNull_Array"));
1356 throw new ArgumentOutOfRangeException("byteCount", Environment
.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1357 Contract
.EndContractBlock();
1359 return String
.CreateStringFromEncoding(bytes
, byteCount
, this);
1362 // Returns the code page identifier of this encoding. The returned value is
1363 // an integer between 0 and 65535 if the encoding has a code page
1364 // identifier, or -1 if the encoding does not represent a code page.
1367 public virtual int CodePage
1375 // IsAlwaysNormalized
1376 // Returns true if the encoding is always normalized for the specified encoding form
1378 [System
.Runtime
.InteropServices
.ComVisible(false)]
1379 public bool IsAlwaysNormalized()
1381 #if !FEATURE_NORM_IDNA_ONLY
1382 return this.IsAlwaysNormalized(NormalizationForm
.FormC
);
1384 return this.IsAlwaysNormalized((NormalizationForm
)ExtendedNormalizationForms
.FormIdna
);
1389 [System
.Runtime
.InteropServices
.ComVisible(false)]
1390 public virtual bool IsAlwaysNormalized(NormalizationForm form
)
1392 // Assume false unless the encoding knows otherwise
1396 // Returns a Decoder object for this encoding. The returned object
1397 // can be used to decode a sequence of bytes into a sequence of characters.
1398 // Contrary to the GetChars family of methods, a Decoder can
1399 // convert partial sequences of bytes into partial sequences of characters
1400 // by maintaining the appropriate state between the conversions.
1402 // This default implementation returns a Decoder that simply
1403 // forwards calls to the GetCharCount and GetChars methods to
1404 // the corresponding methods of this encoding. Encodings that require state
1405 // to be maintained between successive conversions should override this
1406 // method and return an instance of an appropriate Decoder
1410 public virtual Decoder
GetDecoder()
1412 return new DefaultDecoder(this);
1415 [System
.Security
.SecurityCritical
] // auto-generated
1416 private static Encoding
CreateDefaultEncoding()
1420 #if FEATURE_CODEPAGES_FILE
1421 int codePage
= Win32Native
.GetACP();
1423 // For US English, we can save some startup working set by not calling
1424 // GetEncoding(int codePage) since JITting GetEncoding will force us to load
1425 // all the Encoding classes for ASCII, UTF7 & UTF8, & UnicodeEncoding.
1427 if (codePage
== 1252)
1428 enc
= new SBCSCodePageEncoding(codePage
);
1430 enc
= GetEncoding(codePage
);
1431 #else // FEATURE_CODEPAGES_FILE
1433 #if MONO_HYBRID_ENCODING_SUPPORT
1434 enc
= EncodingHelper
.GetDefaultEncoding ();
1435 enc
.m_isReadOnly
= true;
1437 // For silverlight we use UTF8 since ANSI isn't available
1441 #endif //FEATURE_CODEPAGES_FILE
1446 #if MONO_HYBRID_ENCODING_SUPPORT
1447 internal void setReadOnly (bool value = true)
1449 m_isReadOnly
= value;
1453 // Returns an encoding for the system's current ANSI code page.
1456 public static Encoding Default
{
1457 [System
.Security
.SecuritySafeCritical
] // auto-generated
1459 if (defaultEncoding
== null) {
1460 defaultEncoding
= CreateDefaultEncoding();
1462 return defaultEncoding
;
1466 // Returns an Encoder object for this encoding. The returned object
1467 // can be used to encode a sequence of characters into a sequence of bytes.
1468 // Contrary to the GetBytes family of methods, an Encoder can
1469 // convert partial sequences of characters into partial sequences of bytes
1470 // by maintaining the appropriate state between the conversions.
1472 // This default implementation returns an Encoder that simply
1473 // forwards calls to the GetByteCount and GetBytes methods to
1474 // the corresponding methods of this encoding. Encodings that require state
1475 // to be maintained between successive conversions should override this
1476 // method and return an instance of an appropriate Encoder
1480 public virtual Encoder
GetEncoder()
1482 return new DefaultEncoder(this);
1485 // Returns the maximum number of bytes required to encode a given number of
1486 // characters. This method can be used to determine an appropriate buffer
1487 // size for byte arrays passed to the GetBytes method of this
1488 // encoding or the GetBytes method of an Encoder for this
1489 // encoding. All encodings must guarantee that no buffer overflow
1490 // exceptions will occur if buffers are sized according to the results of
1493 // WARNING: If you're using something besides the default replacement encoder fallback,
1494 // then you could have more bytes than this returned from an actual call to GetBytes().
1497 public abstract int GetMaxByteCount(int charCount
);
1499 // Returns the maximum number of characters produced by decoding a given
1500 // number of bytes. This method can be used to determine an appropriate
1501 // buffer size for character arrays passed to the GetChars method of
1502 // this encoding or the GetChars method of a Decoder for this
1503 // encoding. All encodings must guarantee that no buffer overflow
1504 // exceptions will occur if buffers are sized according to the results of
1508 public abstract int GetMaxCharCount(int byteCount
);
1510 // Returns a string containing the decoded representation of a given byte
1514 public virtual String
GetString(byte[] bytes
)
1517 throw new ArgumentNullException("bytes",
1518 Environment
.GetResourceString("ArgumentNull_Array"));
1519 Contract
.EndContractBlock();
1521 return GetString(bytes
, 0, bytes
.Length
);
1524 // Returns a string containing the decoded representation of a range of
1525 // bytes in a byte array.
1527 // Internally we override this for performance
1530 public virtual String
GetString(byte[] bytes
, int index
, int count
)
1532 return new String(GetChars(bytes
, index
, count
));
1535 // Returns an encoding for Unicode format. The returned encoding will be
1536 // an instance of the UnicodeEncoding class.
1538 // It will use little endian byte order, but will detect
1539 // input in big endian if it finds a byte order mark per Unicode 2.0.
1542 public static Encoding Unicode
{
1544 if (unicodeEncoding
== null) unicodeEncoding
= new UnicodeEncoding(false, true);
1545 return unicodeEncoding
;
1549 // Returns an encoding for Unicode format. The returned encoding will be
1550 // an instance of the UnicodeEncoding class.
1552 // It will use big endian byte order, but will detect
1553 // input in little endian if it finds a byte order mark per Unicode 2.0.
1556 public static Encoding BigEndianUnicode
{
1558 if (bigEndianUnicode
== null) bigEndianUnicode
= new UnicodeEncoding(true, true);
1559 return bigEndianUnicode
;
1564 // Returns an encoding for the UTF-7 format. The returned encoding will be
1565 // an instance of the UTF7Encoding class.
1567 public static Encoding UTF7
{
1569 if (utf7Encoding
== null) utf7Encoding
= new UTF7Encoding();
1570 return utf7Encoding
;
1574 // Returns an encoding for the UTF-8 format. The returned encoding will be
1575 // an instance of the UTF8Encoding class.
1578 public static Encoding UTF8
{
1580 if (utf8Encoding
== null) utf8Encoding
= new UTF8Encoding(true);
1581 return utf8Encoding
;
1585 // Returns an encoding for the UTF-32 format. The returned encoding will be
1586 // an instance of the UTF32Encoding class.
1589 public static Encoding UTF32
{
1591 if (utf32Encoding
== null) utf32Encoding
= new UTF32Encoding(false, true);
1592 return utf32Encoding
;
1598 public override bool Equals(Object
value) {
1599 Encoding that
= value as Encoding
;
1601 return (m_codePage
== that
.m_codePage
) &&
1602 (EncoderFallback
.Equals(that
.EncoderFallback
)) &&
1603 (DecoderFallback
.Equals(that
.DecoderFallback
));
1608 public override int GetHashCode() {
1609 return m_codePage
+ this.EncoderFallback
.GetHashCode() + this.DecoderFallback
.GetHashCode();
1612 internal virtual char[] GetBestFitUnicodeToBytesData()
1614 // Normally we don't have any best fit data.
1615 return EmptyArray
<Char
>.Value
;
1618 internal virtual char[] GetBestFitBytesToUnicodeData()
1620 // Normally we don't have any best fit data.
1621 return EmptyArray
<Char
>.Value
;
1624 internal void ThrowBytesOverflow()
1626 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1627 // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
1628 throw new ArgumentException(
1629 Environment
.GetResourceString("Argument_EncodingConversionOverflowBytes",
1630 EncodingName
, EncoderFallback
.GetType()), "bytes");
1633 [System
.Security
.SecurityCritical
] // auto-generated
1634 internal void ThrowBytesOverflow(EncoderNLS encoder
, bool nothingEncoded
)
1636 if (encoder
== null || encoder
.m_throwOnOverflow
|| nothingEncoded
)
1638 if (encoder
!= null && encoder
.InternalHasFallbackBuffer
)
1639 encoder
.FallbackBuffer
.InternalReset();
1640 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1641 // This happens if user has implimented an encoder fallback with a broken GetMaxCharCount
1642 ThrowBytesOverflow();
1645 // If we didn't throw, we are in convert and have to remember our flushing
1646 encoder
.ClearMustFlush();
1649 internal void ThrowCharsOverflow()
1651 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1652 // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
1653 throw new ArgumentException(
1654 Environment
.GetResourceString("Argument_EncodingConversionOverflowChars",
1655 EncodingName
, DecoderFallback
.GetType()), "chars");
1658 [System
.Security
.SecurityCritical
] // auto-generated
1659 internal void ThrowCharsOverflow(DecoderNLS decoder
, bool nothingDecoded
)
1661 if (decoder
== null || decoder
.m_throwOnOverflow
|| nothingDecoded
)
1663 if (decoder
!= null && decoder
.InternalHasFallbackBuffer
)
1664 decoder
.FallbackBuffer
.InternalReset();
1666 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1667 // This happens if user has implimented a decoder fallback with a broken GetMaxCharCount
1668 ThrowCharsOverflow();
1671 // If we didn't throw, we are in convert and have to remember our flushing
1672 decoder
.ClearMustFlush();
1676 internal class DefaultEncoder
: Encoder
, ISerializable
, IObjectReference
1678 private Encoding m_encoding
;
1679 [NonSerialized
] private bool m_hasInitializedEncoding
;
1681 [NonSerialized
] internal char charLeftOver
;
1683 public DefaultEncoder(Encoding encoding
)
1685 m_encoding
= encoding
;
1686 m_hasInitializedEncoding
= true;
1689 // Constructor called by serialization, have to handle deserializing from Everett
1690 internal DefaultEncoder(SerializationInfo info
, StreamingContext context
)
1692 if (info
==null) throw new ArgumentNullException("info");
1693 Contract
.EndContractBlock();
1695 // All we have is our encoding
1696 this.m_encoding
= (Encoding
)info
.GetValue("encoding", typeof(Encoding
));
1700 this.m_fallback
= (EncoderFallback
) info
.GetValue("m_fallback", typeof(EncoderFallback
));
1701 this.charLeftOver
= (Char
) info
.GetValue("charLeftOver", typeof(Char
));
1703 catch (SerializationException
)
1708 // Just get it from GetEncoding
1709 [System
.Security
.SecurityCritical
] // auto-generated
1710 public Object
GetRealObject(StreamingContext context
)
1712 // upon deserialization since the DefaultEncoder implement IObjectReference the
1713 // serialization code tries to do the fixup. The fixup returns another
1714 // IObjectReference (the DefaultEncoder) class and hence so on and on.
1715 // Finally the deserialization logics fails after following maximum references
1716 // unless we short circuit with the following
1717 if (m_hasInitializedEncoding
)
1722 Encoder encoder
= m_encoding
.GetEncoder();
1723 if (m_fallback
!= null)
1724 encoder
.m_fallback
= m_fallback
;
1725 if (charLeftOver
!= (char) 0)
1727 EncoderNLS encoderNls
= encoder
as EncoderNLS
;
1728 if (encoderNls
!= null)
1729 encoderNls
.charLeftOver
= charLeftOver
;
1734 #if FEATURE_SERIALIZATION
1735 // ISerializable implementation, get data for this object
1736 [System
.Security
.SecurityCritical
] // auto-generated_required
1737 void ISerializable
.GetObjectData(SerializationInfo info
, StreamingContext context
)
1740 if (info
==null) throw new ArgumentNullException("info");
1741 Contract
.EndContractBlock();
1743 // All we have is our encoding
1744 info
.AddValue("encoding", this.m_encoding
);
1748 // Returns the number of bytes the next call to GetBytes will
1749 // produce if presented with the given range of characters and the given
1750 // value of the flush parameter. The returned value takes into
1751 // account the state in which the encoder was left following the last call
1752 // to GetBytes. The state of the encoder is not affected by a call
1756 public override int GetByteCount(char[] chars
, int index
, int count
, bool flush
)
1758 return m_encoding
.GetByteCount(chars
, index
, count
);
1761 [System
.Security
.SecurityCritical
] // auto-generated
1762 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1763 public unsafe override int GetByteCount(char* chars
, int count
, bool flush
)
1765 return m_encoding
.GetByteCount(chars
, count
);
1768 // Encodes a range of characters in a character array into a range of bytes
1769 // in a byte array. The method encodes charCount characters from
1770 // chars starting at index charIndex, storing the resulting
1771 // bytes in bytes starting at index byteIndex. The encoding
1772 // takes into account the state in which the encoder was left following the
1773 // last call to this method. The flush parameter indicates whether
1774 // the encoder should flush any shift-states and partial characters at the
1775 // end of the conversion. To ensure correct termination of a sequence of
1776 // blocks of encoded bytes, the last call to GetBytes should specify
1777 // a value of true for the flush parameter.
1779 // An exception occurs if the byte array is not large enough to hold the
1780 // complete encoding of the characters. The GetByteCount method can
1781 // be used to determine the exact number of bytes that will be produced for
1782 // a given range of characters. Alternatively, the GetMaxByteCount
1783 // method of the Encoding that produced this encoder can be used to
1784 // determine the maximum number of bytes that will be produced for a given
1785 // number of characters, regardless of the actual character values.
1788 public override int GetBytes(char[] chars
, int charIndex
, int charCount
,
1789 byte[] bytes
, int byteIndex
, bool flush
)
1791 return m_encoding
.GetBytes(chars
, charIndex
, charCount
, bytes
, byteIndex
);
1794 [System
.Security
.SecurityCritical
] // auto-generated
1795 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1796 public unsafe override int GetBytes(char* chars
, int charCount
,
1797 byte* bytes
, int byteCount
, bool flush
)
1799 return m_encoding
.GetBytes(chars
, charCount
, bytes
, byteCount
);
1804 internal class DefaultDecoder
: Decoder
, ISerializable
, IObjectReference
1806 private Encoding m_encoding
;
1808 private bool m_hasInitializedEncoding
;
1810 public DefaultDecoder(Encoding encoding
)
1812 m_encoding
= encoding
;
1813 m_hasInitializedEncoding
= true;
1816 // Constructor called by serialization, have to handle deserializing from Everett
1817 internal DefaultDecoder(SerializationInfo info
, StreamingContext context
)
1820 if (info
==null) throw new ArgumentNullException("info");
1821 Contract
.EndContractBlock();
1823 // All we have is our encoding
1824 this.m_encoding
= (Encoding
)info
.GetValue("encoding", typeof(Encoding
));
1828 this.m_fallback
= (DecoderFallback
) info
.GetValue("m_fallback", typeof(DecoderFallback
));
1830 catch (SerializationException
)
1836 // Just get it from GetEncoding
1837 [System
.Security
.SecurityCritical
] // auto-generated
1838 public Object
GetRealObject(StreamingContext context
)
1840 // upon deserialization since the DefaultEncoder implement IObjectReference the
1841 // serialization code tries to do the fixup. The fixup returns another
1842 // IObjectReference (the DefaultEncoder) class and hence so on and on.
1843 // Finally the deserialization logics fails after following maximum references
1844 // unless we short circuit with the following
1845 if (m_hasInitializedEncoding
)
1850 Decoder decoder
= m_encoding
.GetDecoder();
1851 if (m_fallback
!= null)
1852 decoder
.m_fallback
= m_fallback
;
1857 #if FEATURE_SERIALIZATION
1858 // ISerializable implementation, get data for this object
1859 [System
.Security
.SecurityCritical
] // auto-generated_required
1860 void ISerializable
.GetObjectData(SerializationInfo info
, StreamingContext context
)
1863 if (info
==null) throw new ArgumentNullException("info");
1864 Contract
.EndContractBlock();
1866 // All we have is our encoding
1867 info
.AddValue("encoding", this.m_encoding
);
1871 // Returns the number of characters the next call to GetChars will
1872 // produce if presented with the given range of bytes. The returned value
1873 // takes into account the state in which the decoder was left following the
1874 // last call to GetChars. The state of the decoder is not affected
1875 // by a call to this method.
1878 public override int GetCharCount(byte[] bytes
, int index
, int count
)
1880 return GetCharCount(bytes
, index
, count
, false);
1883 public override int GetCharCount(byte[] bytes
, int index
, int count
, bool flush
)
1885 return m_encoding
.GetCharCount(bytes
, index
, count
);
1888 [System
.Security
.SecurityCritical
] // auto-generated
1889 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1890 public unsafe override int GetCharCount(byte* bytes
, int count
, bool flush
)
1892 // By default just call the encoding version, no flush by default
1893 return m_encoding
.GetCharCount(bytes
, count
);
1896 // Decodes a range of bytes in a byte array into a range of characters
1897 // in a character array. The method decodes byteCount bytes from
1898 // bytes starting at index byteIndex, storing the resulting
1899 // characters in chars starting at index charIndex. The
1900 // decoding takes into account the state in which the decoder was left
1901 // following the last call to this method.
1903 // An exception occurs if the character array is not large enough to
1904 // hold the complete decoding of the bytes. The GetCharCount method
1905 // can be used to determine the exact number of characters that will be
1906 // produced for a given range of bytes. Alternatively, the
1907 // GetMaxCharCount method of the Encoding that produced this
1908 // decoder can be used to determine the maximum number of characters that
1909 // will be produced for a given number of bytes, regardless of the actual
1913 public override int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
1914 char[] chars
, int charIndex
)
1916 return GetChars(bytes
, byteIndex
, byteCount
, chars
, charIndex
, false);
1919 public override int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
1920 char[] chars
, int charIndex
, bool flush
)
1922 return m_encoding
.GetChars(bytes
, byteIndex
, byteCount
, chars
, charIndex
);
1925 [System
.Security
.SecurityCritical
] // auto-generated
1926 [SuppressMessage("Microsoft.Contracts", "CC1055")] // Skip extra error checking to avoid *potential* AppCompat problems.
1927 public unsafe override int GetChars(byte* bytes
, int byteCount
,
1928 char* chars
, int charCount
, bool flush
)
1930 // By default just call the encoding's version
1931 return m_encoding
.GetChars(bytes
, byteCount
, chars
, charCount
);
1935 internal class EncodingCharBuffer
1940 unsafe char* charStart
;
1942 unsafe char* charEnd
;
1943 int charCountResult
= 0;
1947 unsafe byte* byteStart
;
1949 unsafe byte* byteEnd
;
1952 DecoderFallbackBuffer fallbackBuffer
;
1954 [System
.Security
.SecurityCritical
] // auto-generated
1955 internal unsafe EncodingCharBuffer(Encoding enc
, DecoderNLS decoder
, char* charStart
, int charCount
,
1956 byte* byteStart
, int byteCount
)
1959 this.decoder
= decoder
;
1961 this.chars
= charStart
;
1962 this.charStart
= charStart
;
1963 this.charEnd
= charStart
+ charCount
;
1965 this.byteStart
= byteStart
;
1966 this.bytes
= byteStart
;
1967 this.byteEnd
= byteStart
+ byteCount
;
1969 if (this.decoder
== null)
1970 this.fallbackBuffer
= enc
.DecoderFallback
.CreateFallbackBuffer();
1972 this.fallbackBuffer
= this.decoder
.FallbackBuffer
;
1974 // If we're getting chars or getting char count we don't expect to have
1975 // to remember fallbacks between calls (so it should be empty)
1976 Contract
.Assert(fallbackBuffer
.Remaining
== 0,
1977 "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
1978 fallbackBuffer
.InternalInitialize(bytes
, charEnd
);
1981 [System
.Security
.SecurityCritical
] // auto-generated
1982 internal unsafe bool AddChar(char ch
, int numBytes
)
1986 if (chars
>= charEnd
)
1989 bytes
-=numBytes
; // Didn't encode these bytes
1990 enc
.ThrowCharsOverflow(decoder
, bytes
<= byteStart
); // Throw?
1991 return false; // No throw, but no store either
2000 [System
.Security
.SecurityCritical
] // auto-generated
2001 internal unsafe bool AddChar(char ch
)
2003 return AddChar(ch
,1);
2007 [System
.Security
.SecurityCritical
] // auto-generated
2008 internal unsafe bool AddChar(char ch1
, char ch2
, int numBytes
)
2010 // Need room for 2 chars
2011 if (chars
>= charEnd
- 1)
2014 bytes
-=numBytes
; // Didn't encode these bytes
2015 enc
.ThrowCharsOverflow(decoder
, bytes
<= byteStart
); // Throw?
2016 return false; // No throw, but no store either
2018 return AddChar(ch1
, numBytes
) && AddChar(ch2
, numBytes
);
2021 [System
.Security
.SecurityCritical
] // auto-generated
2022 internal unsafe void AdjustBytes(int count
)
2027 internal unsafe bool MoreData
2029 [System
.Security
.SecurityCritical
] // auto-generated
2032 return bytes
< byteEnd
;
2036 // Do we have count more bytes?
2037 [System
.Security
.SecurityCritical
] // auto-generated
2038 internal unsafe bool EvenMoreData(int count
)
2040 return (bytes
<= byteEnd
- count
);
2043 // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
2044 // but we'll double check just to make sure.
2045 [System
.Security
.SecurityCritical
] // auto-generated
2046 internal unsafe byte GetNextByte()
2048 Contract
.Assert(bytes
< byteEnd
, "[EncodingCharBuffer.GetNextByte]Expected more date");
2049 if (bytes
>= byteEnd
)
2054 internal unsafe int BytesUsed
2056 [System
.Security
.SecurityCritical
] // auto-generated
2059 return (int)(bytes
- byteStart
);
2063 [System
.Security
.SecurityCritical
] // auto-generated
2064 internal unsafe bool Fallback(byte fallbackByte
)
2067 byte[] byteBuffer
= new byte[] { fallbackByte }
;
2069 // Do the fallback and add the data.
2070 return Fallback(byteBuffer
);
2073 [System
.Security
.SecurityCritical
] // auto-generated
2074 internal unsafe bool Fallback(byte byte1
, byte byte2
)
2077 byte[] byteBuffer
= new byte[] { byte1, byte2 }
;
2079 // Do the fallback and add the data.
2080 return Fallback(byteBuffer
);
2083 [System
.Security
.SecurityCritical
] // auto-generated
2084 internal unsafe bool Fallback(byte byte1
, byte byte2
, byte byte3
, byte byte4
)
2087 byte[] byteBuffer
= new byte[] { byte1, byte2, byte3, byte4 }
;
2089 // Do the fallback and add the data.
2090 return Fallback(byteBuffer
);
2093 [System
.Security
.SecurityCritical
] // auto-generated
2094 internal unsafe bool Fallback(byte[] byteBuffer
)
2096 // Do the fallback and add the data.
2099 char* pTemp
= chars
;
2100 if (fallbackBuffer
.InternalFallback(byteBuffer
, bytes
, ref chars
) == false)
2103 bytes
-= byteBuffer
.Length
; // Didn't use how many ever bytes we're falling back
2104 fallbackBuffer
.InternalReset(); // We didn't use this fallback.
2105 enc
.ThrowCharsOverflow(decoder
, chars
== charStart
); // Throw?
2106 return false; // No throw, but no store either
2108 charCountResult
+= unchecked((int)(chars
- pTemp
));
2112 charCountResult
+= fallbackBuffer
.InternalFallback(byteBuffer
, bytes
);
2118 internal unsafe int Count
2122 return charCountResult
;
2127 internal class EncodingByteBuffer
2132 unsafe byte* byteStart
;
2134 unsafe byte* byteEnd
;
2138 unsafe char* charStart
;
2140 unsafe char* charEnd
;
2141 int byteCountResult
= 0;
2144 internal EncoderFallbackBuffer fallbackBuffer
;
2146 [System
.Security
.SecurityCritical
] // auto-generated
2147 internal unsafe EncodingByteBuffer(Encoding inEncoding
, EncoderNLS inEncoder
,
2148 byte* inByteStart
, int inByteCount
, char* inCharStart
, int inCharCount
)
2150 this.enc
= inEncoding
;
2151 this.encoder
= inEncoder
;
2153 this.charStart
= inCharStart
;
2154 this.chars
= inCharStart
;
2155 this.charEnd
= inCharStart
+ inCharCount
;
2157 this.bytes
= inByteStart
;
2158 this.byteStart
= inByteStart
;
2159 this.byteEnd
= inByteStart
+ inByteCount
;
2161 if (this.encoder
== null)
2162 this.fallbackBuffer
= enc
.EncoderFallback
.CreateFallbackBuffer();
2165 this.fallbackBuffer
= this.encoder
.FallbackBuffer
;
2166 // If we're not converting we must not have data in our fallback buffer
2167 if (encoder
.m_throwOnOverflow
&& encoder
.InternalHasFallbackBuffer
&&
2168 this.fallbackBuffer
.Remaining
> 0)
2169 throw new ArgumentException(Environment
.GetResourceString("Argument_EncoderFallbackNotEmpty",
2170 encoder
.Encoding
.EncodingName
, encoder
.Fallback
.GetType()));
2172 fallbackBuffer
.InternalInitialize(chars
, charEnd
, encoder
, bytes
!= null);
2175 [System
.Security
.SecurityCritical
] // auto-generated
2176 internal unsafe bool AddByte(byte b
, int moreBytesExpected
)
2178 Contract
.Assert(moreBytesExpected
>= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
2181 if (bytes
>= byteEnd
- moreBytesExpected
)
2183 // Throw maybe. Check which buffer to back up (only matters if Converting)
2184 this.MovePrevious(true); // Throw if necessary
2185 return false; // No throw, but no store either
2194 [System
.Security
.SecurityCritical
] // auto-generated
2195 internal unsafe bool AddByte(byte b1
)
2197 return (AddByte(b1
, 0));
2200 [System
.Security
.SecurityCritical
] // auto-generated
2201 internal unsafe bool AddByte(byte b1
, byte b2
)
2203 return (AddByte(b1
, b2
, 0));
2206 [System
.Security
.SecurityCritical
] // auto-generated
2207 internal unsafe bool AddByte(byte b1
, byte b2
, int moreBytesExpected
)
2209 return (AddByte(b1
, 1 + moreBytesExpected
) && AddByte(b2
, moreBytesExpected
));
2212 [System
.Security
.SecurityCritical
] // auto-generated
2213 internal unsafe bool AddByte(byte b1
, byte b2
, byte b3
)
2215 return AddByte(b1
, b2
, b3
, (int)0);
2218 [System
.Security
.SecurityCritical
] // auto-generated
2219 internal unsafe bool AddByte(byte b1
, byte b2
, byte b3
, int moreBytesExpected
)
2221 return (AddByte(b1
, 2 + moreBytesExpected
) &&
2222 AddByte(b2
, 1 + moreBytesExpected
) &&
2223 AddByte(b3
, moreBytesExpected
));
2226 [System
.Security
.SecurityCritical
] // auto-generated
2227 internal unsafe bool AddByte(byte b1
, byte b2
, byte b3
, byte b4
)
2229 return (AddByte(b1
, 3) &&
2235 [System
.Security
.SecurityCritical
] // auto-generated
2236 internal unsafe void MovePrevious(bool bThrow
)
2238 if (fallbackBuffer
.bFallingBack
)
2239 fallbackBuffer
.MovePrevious(); // don't use last fallback
2242 Contract
.Assert(chars
> charStart
||
2243 ((bThrow
== true) && (bytes
== byteStart
)),
2244 "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
2245 if (chars
> charStart
)
2246 chars
--; // don't use last char
2250 enc
.ThrowBytesOverflow(encoder
, bytes
== byteStart
); // Throw? (and reset fallback if not converting)
2253 [System
.Security
.SecurityCritical
] // auto-generated
2254 internal unsafe bool Fallback(char charFallback
)
2257 return fallbackBuffer
.InternalFallback(charFallback
, ref chars
);
2260 internal unsafe bool MoreData
2262 [System
.Security
.SecurityCritical
] // auto-generated
2265 // See if fallbackBuffer is not empty or if there's data left in chars buffer.
2266 return ((fallbackBuffer
.Remaining
> 0) || (chars
< charEnd
));
2270 [System
.Security
.SecurityCritical
] // auto-generated
2271 internal unsafe char GetNextChar()
2273 // See if there's something in our fallback buffer
2274 char cReturn
= fallbackBuffer
.InternalGetNextChar();
2276 // Nothing in the fallback buffer, return our normal data.
2279 if (chars
< charEnd
)
2280 cReturn
= *(chars
++);
2286 internal unsafe int CharsUsed
2288 [System
.Security
.SecurityCritical
] // auto-generated
2291 return (int)(chars
- charStart
);
2295 internal unsafe int Count
2299 return byteCountResult
;