5 // Alan Tam Siu Lung (Tam@SiuLung.com)
6 // Dick Porter (dick@ximian.com)
9 // This is actually EUC-CN, which is why 0x80 gets added and subtracted when
19 internal class CP936
: Encoding
21 // Magic number used by Windows for the GB2312 code page.
22 private const int GB2312_CODE_PAGE
= 936;
24 private Gb2312Convert convert
;
27 public CP936() : base(GB2312_CODE_PAGE
) {
28 convert
= Gb2312Convert
.Convert
;
31 // Get the number of bytes needed to encode a character buffer.
32 public override int GetByteCount(char[] chars
, int index
,
36 throw new ArgumentNullException("chars");
38 if (index
< 0 || index
> chars
.Length
) {
39 throw new ArgumentOutOfRangeException("index", Strings
.GetString("ArgRange_Array"));
41 if (count
< 0 || index
+ count
> chars
.Length
) {
42 throw new ArgumentOutOfRangeException("count", Strings
.GetString("ArgRange_Array"));
45 byte[] buffer
= new byte[count
* 2];
46 return GetBytes(chars
, index
, count
, buffer
, 0);
49 // Get the bytes that result from encoding a character buffer.
50 public override int GetBytes(char[] chars
, int charIndex
,
51 int charCount
, byte[] bytes
,
55 throw new ArgumentNullException("chars");
58 throw new ArgumentNullException("bytes");
60 if (charIndex
< 0 || charIndex
> chars
.Length
) {
61 throw new ArgumentOutOfRangeException("charIndex", Strings
.GetString("ArgRange_Array"));
63 if (charCount
< 0 || charIndex
+ charCount
> chars
.Length
) {
64 throw new ArgumentOutOfRangeException("charCount", Strings
.GetString("ArgRange_Array"));
66 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
67 throw new ArgumentOutOfRangeException("byteIndex", Strings
.GetString("ArgRange_Array"));
71 int byteLength
= bytes
.Length
;
74 while(charCount
-- > 0) {
75 ch
= chars
[charIndex
++];
77 if (posn
>= byteLength
) {
78 throw new ArgumentException ("bytes", Strings
.GetString ("Arg_InsufficientSpace"));
81 if (ch
<= 0x80 || ch
== 0xFF) {
82 // Character maps to itself
83 bytes
[posn
++] = (byte)ch
;
87 byte byte1
=0, byte2
=0;
90 if (ch
>= 0xa4 && ch
<= 0x101) {
91 tablepos
= (ch
- 0xa4) * 2;
92 byte1
= convert
.gb2312_from_unicode1
[tablepos
];
93 byte2
= convert
.gb2312_from_unicode1
[tablepos
+ 1];
94 } else if (ch
>= 0x113 && ch
<= 0x2c9) {
157 } else if (ch
>= 0x391 && ch
<= 0x3c9) {
158 tablepos
= (ch
- 0x391) * 2;
159 byte1
= convert
.gb2312_from_unicode2
[tablepos
];
160 byte2
= convert
.gb2312_from_unicode2
[tablepos
+ 1];
161 } else if (ch
>= 0x401 && ch
<= 0x451) {
162 tablepos
= (ch
- 0x401) * 2;
163 byte1
= convert
.gb2312_from_unicode3
[tablepos
];
164 byte2
= convert
.gb2312_from_unicode3
[tablepos
+ 1];
165 } else if (ch
>= 0x2015 && ch
<= 0x203b) {
166 tablepos
= (ch
- 0x2015) * 2;
167 byte1
= convert
.gb2312_from_unicode4
[tablepos
];
168 byte2
= convert
.gb2312_from_unicode4
[tablepos
+ 1];
169 } else if (ch
>= 0x2103 && ch
<= 0x22a5) {
170 tablepos
= (ch
- 0x2103) * 2;
171 byte1
= convert
.gb2312_from_unicode5
[tablepos
];
172 byte2
= convert
.gb2312_from_unicode5
[tablepos
+ 1];
173 } else if (ch
== 0x2312) {
176 } else if (ch
>= 0x2460 && ch
<= 0x249b) {
177 tablepos
= (ch
- 0x2460) * 2;
178 byte1
= convert
.gb2312_from_unicode6
[tablepos
];
179 byte2
= convert
.gb2312_from_unicode6
[tablepos
+ 1];
180 } else if (ch
>= 0x2500 && ch
<= 0x254b) {
182 byte2
= (byte)(0x24 + (ch
% 0x100));
183 } else if (ch
>= 0x25a0 && ch
<= 0x2642) {
238 } else if (ch
>= 0x3000 && ch
<= 0x3129) {
239 tablepos
= (ch
- 0x3000) * 2;
240 byte1
= convert
.gb2312_from_unicode7
[tablepos
];
241 byte2
= convert
.gb2312_from_unicode7
[tablepos
+ 1];
242 } else if (ch
>= 0x3220 && ch
<= 0x3229) {
244 byte2
= (byte)(0x65 + (ch
- 0x3220));
245 } else if (ch
>= 0x4e00 && ch
<= 0x9fa0) {
246 tablepos
= (ch
- 0x4e00) * 2;
247 byte1
= convert
.gb2312_from_unicode8
[tablepos
];
248 byte2
= convert
.gb2312_from_unicode8
[tablepos
+ 1];
249 } else if (ch
>= 0xff01 && ch
<= 0xff5e) {
250 tablepos
= (ch
- 0xff01) * 2;
251 byte1
= convert
.gb2312_from_unicode9
[tablepos
];
252 byte2
= convert
.gb2312_from_unicode9
[tablepos
+ 1];
253 } else if (ch
== 0xffe0) {
256 } else if (ch
== 0xffe1) {
259 } else if (ch
== 0xffe3) {
262 } else if (ch
== 0xffe5) {
267 if (byte1
== 0 || byte2
== 0) {
268 bytes
[posn
++] = (byte)'?';
269 } else if ((posn
+ 1) >= byteLength
) {
270 throw new ArgumentException ("bytes", (Strings
.GetString ("Arg_InsufficientSpace")));
272 bytes
[posn
++] = (byte)(byte1
+ 0x80);
273 bytes
[posn
++] = (byte)(byte2
+ 0x80);
277 return(posn
- byteIndex
);
280 // Get the number of characters needed to decode a byte buffer.
281 public override int GetCharCount(byte[] bytes
, int index
,
285 throw new ArgumentNullException("bytes");
287 if (index
< 0 || index
> bytes
.Length
) {
288 throw new ArgumentOutOfRangeException("index", Strings
.GetString("ArgRange_Array"));
290 if (count
< 0 || index
+ count
> bytes
.Length
) {
291 throw new ArgumentOutOfRangeException("count", Strings
.GetString("ArgRange_Array"));
294 char[] buffer
= new char[count
];
295 return GetChars(bytes
, index
, count
, buffer
, 0);
298 // Get the characters that result from decoding a byte buffer.
299 public override int GetChars(byte[] bytes
, int byteIndex
,
300 int byteCount
, char[] chars
,
304 throw new ArgumentNullException("bytes");
307 throw new ArgumentNullException("chars");
309 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
310 throw new ArgumentOutOfRangeException("byteIndex", Strings
.GetString("ArgRange_Array"));
312 if (byteCount
< 0 || byteIndex
+ byteCount
> bytes
.Length
) {
313 throw new ArgumentOutOfRangeException("byteCount", Strings
.GetString("ArgRange_Array"));
315 if (charIndex
< 0 || charIndex
> chars
.Length
) {
316 throw new ArgumentOutOfRangeException("charIndex", Strings
.GetString("ArgRange_Array"));
319 int charLength
= chars
.Length
;
320 int posn
= charIndex
;
322 int byte1
, byte2
, value;
323 byte[] table
= convert
.gb2312_to_unicode
;
325 while(byteCount
> 0) {
326 byte1
= bytes
[byteIndex
++];
330 if (posn
>= charLength
) {
331 throw new ArgumentException ("chars", (Strings
.GetString ("Arg_InsufficientSpace")));
335 chars
[posn
++] = (char)byte1
;
339 if ((byte1
<= 0xa0 &&
344 } else if (byteCount
== 0) {
345 // Missing second byte
348 byte2
= bytes
[byteIndex
++];
352 (byte1
- 0x80) <= 0x20 ||
353 (byte1
- 0x80) > 0x77 ||
354 (byte2
- 0x80) <= 0x20 ||
355 (byte2
- 0x80) >= 0x7f) {
358 int idx
= ((byte1
- 0xa1) * 94 + (byte2
- 0xa1)) * 2;
362 value = (int)(table
[idx
] | (table
[idx
+ 1] << 8));
368 chars
[posn
++] = (char)value;
374 return(posn
- charIndex
);
377 // Get the maximum number of bytes needed to encode a
378 // specified number of characters.
379 public override int GetMaxByteCount(int charCount
)
382 throw new ArgumentOutOfRangeException("charCount", Strings
.GetString("ArgRange_NonNegative"));
385 return(charCount
* 2);
388 // Get the maximum number of characters needed to decode a
389 // specified number of bytes.
390 public override int GetMaxCharCount(int byteCount
)
393 throw new ArgumentOutOfRangeException("byteCount", Strings
.GetString("ArgRange_NonNegative"));
398 // Get a decoder that handles a rolling GB2312 state.
399 public override Decoder
GetDecoder()
401 return(new CP936Decoder(convert
));
405 // Get the mail body name for this encoding.
406 public override String BodyName
408 get { return("gb2312"); }
411 // Get the human-readable name for this encoding.
412 public override String EncodingName
414 get { return("Chinese Simplified (GB2312)"); }
417 // Get the mail agent header name for this encoding.
418 public override String HeaderName
420 get { return("gb2312"); }
423 // Determine if this encoding can be displayed in a Web browser.
424 public override bool IsBrowserDisplay
426 get { return(true); }
429 // Determine if this encoding can be saved from a Web browser.
430 public override bool IsBrowserSave
432 get { return(true); }
435 // Determine if this encoding can be displayed in a mail/news agent.
436 public override bool IsMailNewsDisplay
438 get { return(true); }
441 // Determine if this encoding can be saved from a mail/news agent.
442 public override bool IsMailNewsSave
444 get { return(true); }
447 // Get the IANA-preferred Web name for this encoding.
448 public override String WebName
450 get { return("gb2312"); }
453 // Get the Windows code page represented by this object.
454 public override int WindowsCodePage
456 get { return GB2312_CODE_PAGE; }
458 #endif // !ECMA_COMPAT
460 // Decoder that handles a rolling GB2312 state.
461 private sealed class CP936Decoder
: Decoder
463 private Gb2312Convert convert
;
464 private int lastByte
;
467 public CP936Decoder(Gb2312Convert convert
) {
468 this.convert
= convert
;
472 // Override inherited methods.
473 public override int GetCharCount(byte[] bytes
, int index
, int count
)
476 throw new ArgumentNullException("bytes");
478 if (index
< 0 || index
> bytes
.Length
) {
479 throw new ArgumentOutOfRangeException("index", Strings
.GetString("ArgRange_Array"));
481 if (count
< 0 || count
> (bytes
.Length
- index
)) {
482 throw new ArgumentOutOfRangeException("count", Strings
.GetString("ArgRange_Array"));
485 char[] buffer
= new char[count
* 2];
486 return(GetChars(bytes
, index
, count
, buffer
, 0));
490 public override int GetChars(byte[] bytes
,
497 throw new ArgumentNullException("bytes");
500 throw new ArgumentNullException("chars");
502 if (byteIndex
< 0 || byteIndex
> bytes
.Length
) {
503 throw new ArgumentOutOfRangeException("byteIndex", Strings
.GetString("ArgRange_Array"));
505 if (byteCount
< 0 || byteIndex
+ byteCount
> bytes
.Length
) {
506 throw new ArgumentOutOfRangeException("byteCount", Strings
.GetString("ArgRange_Array"));
508 if (charIndex
< 0 || charIndex
> chars
.Length
) {
509 throw new ArgumentOutOfRangeException("charIndex", Strings
.GetString("ArgRange_Array"));
512 int charLength
= chars
.Length
;
513 int posn
= charIndex
;
515 byte[] table
= convert
.gb2312_to_unicode
;
517 while(byteCount
> 0) {
518 b
= bytes
[byteIndex
++];
522 if (posn
>= charLength
) {
523 throw new ArgumentException ("chars", (Strings
.GetString ("Arg_InsufficientSpace")));
528 chars
[posn
++] = (char)b
;
529 } else if ((b
<= 0xa0 &&
533 // Invalid first byte
537 // double-byte sequence
542 // double-byte sequence
543 if (lastByte
< 0x80 ||
544 (lastByte
- 0x80) <= 0x20 ||
545 (lastByte
- 0x80) > 0x77 ||
546 (b
- 0x80) <= 0x20 ||
547 (b
- 0x80) >= 0x7f) {
548 // Invalid second byte
551 int idx
= ((lastByte
- 0xa1) * 94 + (b
- 0xa1) * 2);
556 value = (int)(table
[idx
] | (table
[idx
+ 1] << 8));
560 chars
[posn
++] = (char)value;
570 return (posn
- charIndex
);
575 internal class ENCgb2312
: CP936
577 public ENCgb2312(): base () {}