2 * UnicodeEncoding.cs - Implementation of the
3 * "System.Text.UnicodeEncoding" class.
5 * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
6 * Copyright (C) 2003, 2004 Novell, Inc.
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
33 [MonoTODO ("Fix serialization compatibility with MS.NET")]
34 public class UnicodeEncoding
: Encoding
36 // Magic numbers used by Windows for Unicode.
37 internal const int UNICODE_CODE_PAGE
= 1200;
38 internal const int BIG_UNICODE_CODE_PAGE
= 1201;
41 // Size of characters in this encoding.
42 public const int CharSize
= 2;
46 private bool bigEndian
;
47 private bool byteOrderMark
;
50 public UnicodeEncoding () : this (false, true)
55 public UnicodeEncoding (bool bigEndian
, bool byteOrderMark
)
56 : base ((bigEndian
? BIG_UNICODE_CODE_PAGE
: UNICODE_CODE_PAGE
))
58 this.bigEndian
= bigEndian
;
59 this.byteOrderMark
= byteOrderMark
;
62 body_name
= "unicodeFFFE";
63 encoding_name
= "Unicode (Big-Endian)";
64 header_name
= "unicodeFFFE";
65 is_browser_save
= false;
66 web_name
= "utf-16be";
69 encoding_name
= "Unicode";
70 header_name
= "utf-16";
71 is_browser_save
= true;
75 // Windows reports the same code page number for
76 // both the little-endian and big-endian forms.
77 windows_code_page
= UNICODE_CODE_PAGE
;
80 // Get the number of bytes needed to encode a character buffer.
81 public override int GetByteCount (char[] chars
, int index
, int count
)
84 throw new ArgumentNullException ("chars");
86 if (index
< 0 || index
> chars
.Length
) {
87 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
89 if (count
< 0 || count
> (chars
.Length
- index
)) {
90 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
95 // Convenience wrappers for "GetByteCount".
96 public override int GetByteCount (String s
)
99 throw new ArgumentNullException ("s");
104 // Get the bytes that result from encoding a character buffer.
105 public override int GetBytes (char[] chars
, int charIndex
, int charCount
,
106 byte[] bytes
, int byteIndex
)
109 throw new ArgumentNullException ("chars");
112 throw new ArgumentNullException ("bytes");
114 if (charIndex
< 0 || charIndex
> chars
.Length
) {
115 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
117 if (charCount
< 0 || charCount
> (chars
.Length
- charIndex
)) {
118 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
120 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
121 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
123 if ((bytes
.Length
- byteIndex
) < (charCount
* 2)) {
124 throw new ArgumentException (_("Arg_InsufficientSpace"));
126 int posn
= byteIndex
;
129 while (charCount
-- > 0) {
130 ch
= chars
[charIndex
++];
131 bytes
[posn
++] = (byte)(ch
>> 8);
132 bytes
[posn
++] = (byte)ch
;
135 while (charCount
-- > 0) {
136 ch
= chars
[charIndex
++];
137 bytes
[posn
++] = (byte)ch
;
138 bytes
[posn
++] = (byte)(ch
>> 8);
141 return posn
- byteIndex
;
144 // Convenience wrappers for "GetBytes".
145 public override int GetBytes (String s
, int charIndex
, int charCount
,
146 byte[] bytes
, int byteIndex
)
149 throw new ArgumentNullException ("s");
152 throw new ArgumentNullException ("bytes");
154 if (charIndex
< 0 || charIndex
> s
.Length
) {
155 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
157 if (charCount
< 0 || charCount
> (s
.Length
- charIndex
)) {
158 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
160 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
161 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
163 if ((bytes
.Length
- byteIndex
) < (charCount
* 2)) {
164 throw new ArgumentException (_("Arg_InsufficientSpace"));
166 int posn
= byteIndex
;
169 while (charCount
-- > 0) {
171 bytes
[posn
++] = (byte)(ch
>> 8);
172 bytes
[posn
++] = (byte)ch
;
175 while (charCount
-- > 0) {
177 bytes
[posn
++] = (byte)ch
;
178 bytes
[posn
++] = (byte)(ch
>> 8);
181 return posn
- byteIndex
;
184 public override byte [] GetBytes (String s
)
186 return base.GetBytes (s
);
189 // Get the number of characters needed to decode a byte buffer.
190 public override int GetCharCount (byte[] bytes
, int index
, int count
)
193 throw new ArgumentNullException ("bytes");
195 if (index
< 0 || index
> bytes
.Length
) {
196 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
198 if (count
< 0 || count
> (bytes
.Length
- index
)) {
199 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
202 if ((bytes
[index
] == (byte)0xFE && bytes
[index
+ 1] == (byte)0xFF) ||
203 (bytes
[index
] == (byte)0xFF && bytes
[index
+ 1] == (byte)0xFE)) {
204 return ((count
- 1) / 2);
210 // Get the characters that result from decoding a byte buffer.
211 public override int GetChars (byte[] bytes
, int byteIndex
, int byteCount
,
212 char[] chars
, int charIndex
)
215 throw new ArgumentNullException ("bytes");
218 throw new ArgumentNullException ("chars");
220 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
221 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
223 if (byteCount
< 0 || byteCount
> (bytes
.Length
- byteIndex
)) {
224 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
226 if (charIndex
< 0 || charIndex
> chars
.Length
) {
227 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
230 // Determine the byte order in the incoming buffer.
232 if (byteCount
>= 2) {
233 if (bytes
[byteIndex
] == (byte)0xFE && bytes
[byteIndex
+ 1] == (byte)0xFF) {
237 } else if (bytes
[byteIndex
] == (byte)0xFF && bytes
[byteIndex
+ 1] == (byte)0xFE) {
242 isBigEndian
= bigEndian
;
245 isBigEndian
= bigEndian
;
248 // Validate that we have sufficient space in "chars".
249 if ((chars
.Length
- charIndex
) < (byteCount
/ 2)) {
250 throw new ArgumentException (_("Arg_InsufficientSpace"));
253 // Convert the characters.
254 int posn
= charIndex
;
256 while (byteCount
>= 2) {
258 ((char)((((int)(bytes
[byteIndex
])) << 8) |
259 ((int)(bytes
[byteIndex
+ 1]))));
264 while (byteCount
>= 2) {
266 ((char)((((int)(bytes
[byteIndex
+ 1])) << 8) |
267 ((int)(bytes
[byteIndex
]))));
272 return posn
- charIndex
;
275 // Get the maximum number of bytes needed to encode a
276 // specified number of characters.
277 public override int GetMaxByteCount (int charCount
)
280 throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
282 return charCount
* 2;
285 // Get the maximum number of characters needed to decode a
286 // specified number of bytes.
287 public override int GetMaxCharCount (int byteCount
)
290 throw new ArgumentOutOfRangeException
291 ("byteCount", _("ArgRange_NonNegative"));
293 return byteCount
/ 2;
296 // Get a Unicode-specific decoder that is attached to this instance.
297 public override Decoder
GetDecoder ()
299 return new UnicodeDecoder (bigEndian
);
302 // Get the Unicode preamble.
303 public override byte[] GetPreamble ()
306 byte[] preamble
= new byte[2];
308 preamble
[0] = (byte)0xFE;
309 preamble
[1] = (byte)0xFF;
311 preamble
[0] = (byte)0xFF;
312 preamble
[1] = (byte)0xFE;
320 // Determine if this object is equal to another.
321 public override bool Equals (Object
value)
323 UnicodeEncoding enc
= (value as UnicodeEncoding
);
325 return (codePage
== enc
.codePage
&&
326 bigEndian
== enc
.bigEndian
&&
327 byteOrderMark
== enc
.byteOrderMark
);
333 // Get the hash code for this object.
334 public override int GetHashCode ()
336 return base.GetHashCode ();
339 // Unicode decoder implementation.
340 private sealed class UnicodeDecoder
: Decoder
342 private bool bigEndian
;
343 private int leftOverByte
;
346 public UnicodeDecoder (bool bigEndian
)
348 this.bigEndian
= bigEndian
;
352 // Override inherited methods.
353 public override int GetCharCount (byte[] bytes
, int index
, int count
)
356 throw new ArgumentNullException ("bytes");
358 if (index
< 0 || index
> bytes
.Length
) {
359 throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
361 if (count
< 0 || count
> (bytes
.Length
- index
)) {
362 throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
364 if (leftOverByte
!= -1) {
365 return (count
+ 1) / 2;
370 public override int GetChars (byte[] bytes
, int byteIndex
,
371 int byteCount
, char[] chars
,
375 throw new ArgumentNullException ("bytes");
378 throw new ArgumentNullException ("chars");
380 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
381 throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
383 if (byteCount
< 0 || byteCount
> (bytes
.Length
- byteIndex
)) {
384 throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
386 if (charIndex
< 0 || charIndex
> chars
.Length
) {
387 throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
390 // Convert the characters.
391 int posn
= charIndex
;
392 bool isBigEndian
= bigEndian
;
393 int leftOver
= leftOverByte
;
394 int length
= chars
.Length
;
396 while (byteCount
> 0) {
397 if (leftOver
!= -1) {
399 ch
= ((char)((leftOver
<< 8) | ((int)(bytes
[byteIndex
]))));
401 ch
= ((char)(leftOver
|
402 (((int)(bytes
[byteIndex
])) << 8)));
407 } else if (byteCount
> 1) {
409 ch
= ((char)((((int)(bytes
[byteIndex
])) << 8) |
410 ((int)(bytes
[byteIndex
+ 1]))));
412 ch
= ((char)((((int)(bytes
[byteIndex
+ 1])) << 8) |
413 ((int)(bytes
[byteIndex
]))));
418 leftOver
= (int)(bytes
[byteIndex
]);
421 if (ch
== '\uFFFE') {
422 // Switch byte orders.
423 bigEndian
= !bigEndian
;
424 } else if (ch
!= '\uFEFF') {
425 // Ordinary character.
429 throw new ArgumentException (_("Arg_InsufficientSpace"));
433 leftOverByte
= leftOver
;
434 bigEndian
= isBigEndian
;
436 // Finished - return the converted length.
437 return posn
- charIndex
;
440 } // class UnicodeDecoder
442 }; // class UnicodeEncoding
444 }; // namespace System.Text