**** Merged from MCS ****
[mono-project.git] / mcs / class / I18N / CJK / CP936.cs
blob74861bd22d46f6def52073281bb298fd24e4fc88
1 //
2 // I18N.CJK.CP936
3 //
4 // Author:
5 // Alan Tam Siu Lung (Tam@SiuLung.com)
6 // Dick Porter (dick@ximian.com)
7 //
9 // This is actually EUC-CN, which is why 0x80 gets added and subtracted when
10 // converting.
13 using System;
14 using System.Text;
15 using I18N.Common;
17 namespace I18N.CJK
19 internal class CP936 : Encoding
21 // Magic number used by Windows for the GB2312 code page.
22 private const int GB2312_CODE_PAGE = 936;
24 private Gb2312Convert convert;
26 // Constructor.
27 public CP936() : base(GB2312_CODE_PAGE) {
28 convert = Gb2312Convert.Convert;
31 // Get the number of bytes needed to encode a character buffer.
32 public override int GetByteCount(char[] chars, int index,
33 int count)
35 if (chars == null) {
36 throw new ArgumentNullException("chars");
38 if (index < 0 || index > chars.Length) {
39 throw new ArgumentOutOfRangeException("index", Strings.GetString("ArgRange_Array"));
41 if (count < 0 || index + count > chars.Length) {
42 throw new ArgumentOutOfRangeException("count", Strings.GetString("ArgRange_Array"));
45 byte[] buffer = new byte[count * 2];
46 return GetBytes(chars, index, count, buffer, 0);
49 // Get the bytes that result from encoding a character buffer.
50 public override int GetBytes(char[] chars, int charIndex,
51 int charCount, byte[] bytes,
52 int byteIndex)
54 if (chars == null) {
55 throw new ArgumentNullException("chars");
57 if (bytes == null) {
58 throw new ArgumentNullException("bytes");
60 if (charIndex < 0 || charIndex > chars.Length) {
61 throw new ArgumentOutOfRangeException("charIndex", Strings.GetString("ArgRange_Array"));
63 if (charCount < 0 || charIndex + charCount > chars.Length) {
64 throw new ArgumentOutOfRangeException("charCount", Strings.GetString("ArgRange_Array"));
66 if (byteIndex < 0 || byteIndex > bytes.Length) {
67 throw new ArgumentOutOfRangeException("byteIndex", Strings.GetString("ArgRange_Array"));
70 int posn = byteIndex;
71 int byteLength = bytes.Length;
72 int ch;
74 while(charCount-- > 0) {
75 ch = chars[charIndex++];
77 if (posn >= byteLength) {
78 throw new ArgumentException ("bytes", Strings.GetString ("Arg_InsufficientSpace"));
81 if (ch <= 0x80 || ch == 0xFF) {
82 // Character maps to itself
83 bytes[posn++] = (byte)ch;
84 continue;
87 byte byte1=0, byte2=0;
88 int tablepos;
90 if (ch >= 0xa4 && ch <= 0x101) {
91 tablepos = (ch - 0xa4) * 2;
92 byte1 = convert.gb2312_from_unicode1[tablepos];
93 byte2 = convert.gb2312_from_unicode1[tablepos + 1];
94 } else if (ch >= 0x113 && ch <= 0x2c9) {
95 switch(ch) {
96 case 0x113:
97 byte1 = 0x28;
98 byte2 = 0x25;
99 break;
100 case 0x11b:
101 byte1 = 0x28;
102 byte2 = 0x27;
103 break;
104 case 0x12b:
105 byte1 = 0x28;
106 byte2 = 0x29;
107 break;
108 case 0x14d:
109 byte1 = 0x28;
110 byte2 = 0x2d;
111 break;
112 case 0x16b:
113 byte1 = 0x28;
114 byte2 = 0x31;
115 break;
116 case 0x1ce:
117 byte1 = 0x28;
118 byte2 = 0x23;
119 break;
120 case 0x1d0:
121 byte1 = 0x28;
122 byte2 = 0x2b;
123 break;
124 case 0x1d2:
125 byte1 = 0x28;
126 byte2 = 0x2f;
127 break;
128 case 0x1d4:
129 byte1 = 0x28;
130 byte2 = 0x33;
131 break;
132 case 0x1d6:
133 byte1 = 0x28;
134 byte2 = 0x35;
135 break;
136 case 0x1d8:
137 byte1 = 0x28;
138 byte2 = 0x36;
139 break;
140 case 0x1da:
141 byte1 = 0x28;
142 byte2 = 0x37;
143 break;
144 case 0x1dc:
145 byte1 = 0x28;
146 byte2 = 0x38;
147 break;
148 case 0x2c7:
149 byte1 = 0x21;
150 byte2 = 0x26;
151 break;
152 case 0x2c9:
153 byte1 = 0x21;
154 byte2 = 0x25;
155 break;
157 } else if (ch >= 0x391 && ch <= 0x3c9) {
158 tablepos = (ch - 0x391) * 2;
159 byte1 = convert.gb2312_from_unicode2[tablepos];
160 byte2 = convert.gb2312_from_unicode2[tablepos + 1];
161 } else if (ch >= 0x401 && ch <= 0x451) {
162 tablepos = (ch - 0x401) * 2;
163 byte1 = convert.gb2312_from_unicode3[tablepos];
164 byte2 = convert.gb2312_from_unicode3[tablepos + 1];
165 } else if (ch >= 0x2015 && ch <= 0x203b) {
166 tablepos = (ch - 0x2015) * 2;
167 byte1 = convert.gb2312_from_unicode4[tablepos];
168 byte2 = convert.gb2312_from_unicode4[tablepos + 1];
169 } else if (ch >= 0x2103 && ch <= 0x22a5) {
170 tablepos = (ch - 0x2103) * 2;
171 byte1 = convert.gb2312_from_unicode5[tablepos];
172 byte2 = convert.gb2312_from_unicode5[tablepos + 1];
173 } else if (ch == 0x2312) {
174 byte1 = 0x21;
175 byte2 = 0x50;
176 } else if (ch >= 0x2460 && ch <= 0x249b) {
177 tablepos = (ch - 0x2460) * 2;
178 byte1 = convert.gb2312_from_unicode6[tablepos];
179 byte2 = convert.gb2312_from_unicode6[tablepos + 1];
180 } else if (ch >= 0x2500 && ch <= 0x254b) {
181 byte1 = 0x29;
182 byte2 = (byte)(0x24 + (ch % 0x100));
183 } else if (ch >= 0x25a0 && ch <= 0x2642) {
184 switch(ch) {
185 case 0x25a0:
186 byte1 = 0x21;
187 byte2 = 0x76;
188 break;
189 case 0x25a1:
190 byte1 = 0x21;
191 byte2 = 0x75;
192 break;
193 case 0x25b2:
194 byte1 = 0x21;
195 byte2 = 0x78;
196 break;
197 case 0x25b3:
198 byte1 = 0x21;
199 byte2 = 0x77;
200 break;
201 case 0x25c6:
202 byte1 = 0x21;
203 byte2 = 0x74;
204 break;
205 case 0x25c7:
206 byte1 = 0x21;
207 byte2 = 0x73;
208 break;
209 case 0x25cb:
210 byte1 = 0x21;
211 byte2 = 0x70;
212 break;
213 case 0x25ce:
214 byte1 = 0x21;
215 byte2 = 0x72;
216 break;
217 case 0x25cf:
218 byte1 = 0x21;
219 byte2 = 0x71;
220 break;
221 case 0x2605:
222 byte1 = 0x21;
223 byte2 = 0x6f;
224 break;
225 case 0x2606:
226 byte1 = 0x21;
227 byte2 = 0x6e;
228 break;
229 case 0x2640:
230 byte1 = 0x21;
231 byte2 = 0x62;
232 break;
233 case 0x2642:
234 byte1 = 0x21;
235 byte2 = 0x61;
236 break;
238 } else if (ch >= 0x3000 && ch <= 0x3129) {
239 tablepos = (ch - 0x3000) * 2;
240 byte1 = convert.gb2312_from_unicode7[tablepos];
241 byte2 = convert.gb2312_from_unicode7[tablepos + 1];
242 } else if (ch >= 0x3220 && ch <= 0x3229) {
243 byte1 = 0x22;
244 byte2 = (byte)(0x65 + (ch - 0x3220));
245 } else if (ch >= 0x4e00 && ch <= 0x9fa0) {
246 tablepos = (ch - 0x4e00) * 2;
247 byte1 = convert.gb2312_from_unicode8[tablepos];
248 byte2 = convert.gb2312_from_unicode8[tablepos + 1];
249 } else if (ch >= 0xff01 && ch <= 0xff5e) {
250 tablepos = (ch - 0xff01) * 2;
251 byte1 = convert.gb2312_from_unicode9[tablepos];
252 byte2 = convert.gb2312_from_unicode9[tablepos + 1];
253 } else if (ch == 0xffe0) {
254 byte1 = 0x21;
255 byte2 = 0x69;
256 } else if (ch == 0xffe1) {
257 byte1 = 0x21;
258 byte2 = 0x6a;
259 } else if (ch == 0xffe3) {
260 byte1 = 0x21;
261 byte2 = 0x7e;
262 } else if (ch == 0xffe5) {
263 byte1 = 0x21;
264 byte2 = 0x24;
267 if (byte1 == 0 || byte2 == 0) {
268 bytes[posn++] = (byte)'?';
269 } else if ((posn + 1) >= byteLength) {
270 throw new ArgumentException ("bytes", (Strings.GetString ("Arg_InsufficientSpace")));
271 } else {
272 bytes[posn++] = (byte)(byte1 + 0x80);
273 bytes[posn++] = (byte)(byte2 + 0x80);
277 return(posn - byteIndex);
280 // Get the number of characters needed to decode a byte buffer.
281 public override int GetCharCount(byte[] bytes, int index,
282 int count)
284 if (bytes == null) {
285 throw new ArgumentNullException("bytes");
287 if (index < 0 || index > bytes.Length) {
288 throw new ArgumentOutOfRangeException("index", Strings.GetString("ArgRange_Array"));
290 if (count < 0 || index + count > bytes.Length) {
291 throw new ArgumentOutOfRangeException("count", Strings.GetString("ArgRange_Array"));
294 char[] buffer = new char[count];
295 return GetChars(bytes, index, count, buffer, 0);
298 // Get the characters that result from decoding a byte buffer.
299 public override int GetChars(byte[] bytes, int byteIndex,
300 int byteCount, char[] chars,
301 int charIndex)
303 if (bytes == null) {
304 throw new ArgumentNullException("bytes");
306 if (chars == null) {
307 throw new ArgumentNullException("chars");
309 if (byteIndex < 0 || byteIndex > bytes.Length) {
310 throw new ArgumentOutOfRangeException("byteIndex", Strings.GetString("ArgRange_Array"));
312 if (byteCount < 0 || byteIndex + byteCount > bytes.Length) {
313 throw new ArgumentOutOfRangeException("byteCount", Strings.GetString("ArgRange_Array"));
315 if (charIndex < 0 || charIndex > chars.Length) {
316 throw new ArgumentOutOfRangeException("charIndex", Strings.GetString("ArgRange_Array"));
319 int charLength = chars.Length;
320 int posn = charIndex;
321 int length = 0;
322 int byte1, byte2, value;
323 byte[] table = convert.gb2312_to_unicode;
325 while(byteCount > 0) {
326 byte1 = bytes[byteIndex++];
327 byteCount--;
328 length++;
330 if (posn >= charLength) {
331 throw new ArgumentException ("chars", (Strings.GetString ("Arg_InsufficientSpace")));
334 if (byte1 < 0x80) {
335 chars[posn++] = (char)byte1;
336 continue;
339 if ((byte1 <= 0xa0 &&
340 byte1 != 0x8e &&
341 byte1 != 0x8f) ||
342 byte1 > 0xfe) {
343 value = 0;
344 } else if (byteCount == 0) {
345 // Missing second byte
346 value = 0;
347 } else {
348 byte2 = bytes[byteIndex++];
349 byteCount--;
351 if (byte1 < 0x80 ||
352 (byte1 - 0x80) <= 0x20 ||
353 (byte1 - 0x80) > 0x77 ||
354 (byte2 - 0x80) <= 0x20 ||
355 (byte2 - 0x80) >= 0x7f) {
356 value = 0;
357 } else {
358 int idx = ((byte1 - 0xa1) * 94 + (byte2 - 0xa1)) * 2;
359 if (idx > 0x3fe2) {
360 value = 0;
361 } else {
362 value = (int)(table[idx] | (table[idx + 1] << 8));
367 if (value != 0) {
368 chars[posn++] = (char)value;
369 } else {
370 chars[posn++] = '?';
374 return(posn - charIndex);
377 // Get the maximum number of bytes needed to encode a
378 // specified number of characters.
379 public override int GetMaxByteCount(int charCount)
381 if (charCount < 0) {
382 throw new ArgumentOutOfRangeException("charCount", Strings.GetString("ArgRange_NonNegative"));
385 return(charCount * 2);
388 // Get the maximum number of characters needed to decode a
389 // specified number of bytes.
390 public override int GetMaxCharCount(int byteCount)
392 if (byteCount < 0) {
393 throw new ArgumentOutOfRangeException("byteCount", Strings.GetString("ArgRange_NonNegative"));
395 return(byteCount);
398 // Get a decoder that handles a rolling GB2312 state.
399 public override Decoder GetDecoder()
401 return(new CP936Decoder(convert));
404 #if !ECMA_COMPAT
405 // Get the mail body name for this encoding.
406 public override String BodyName
408 get { return("gb2312"); }
411 // Get the human-readable name for this encoding.
412 public override String EncodingName
414 get { return("Chinese Simplified (GB2312)"); }
417 // Get the mail agent header name for this encoding.
418 public override String HeaderName
420 get { return("gb2312"); }
423 // Determine if this encoding can be displayed in a Web browser.
424 public override bool IsBrowserDisplay
426 get { return(true); }
429 // Determine if this encoding can be saved from a Web browser.
430 public override bool IsBrowserSave
432 get { return(true); }
435 // Determine if this encoding can be displayed in a mail/news agent.
436 public override bool IsMailNewsDisplay
438 get { return(true); }
441 // Determine if this encoding can be saved from a mail/news agent.
442 public override bool IsMailNewsSave
444 get { return(true); }
447 // Get the IANA-preferred Web name for this encoding.
448 public override String WebName
450 get { return("gb2312"); }
453 // Get the Windows code page represented by this object.
454 public override int WindowsCodePage
456 get { return GB2312_CODE_PAGE; }
458 #endif // !ECMA_COMPAT
460 // Decoder that handles a rolling GB2312 state.
461 private sealed class CP936Decoder : Decoder
463 private Gb2312Convert convert;
464 private int lastByte;
466 // Constructor.
467 public CP936Decoder(Gb2312Convert convert) {
468 this.convert = convert;
469 this.lastByte = 0;
472 // Override inherited methods.
473 public override int GetCharCount(byte[] bytes, int index, int count)
475 if (bytes == null) {
476 throw new ArgumentNullException("bytes");
478 if (index < 0 || index > bytes.Length) {
479 throw new ArgumentOutOfRangeException("index", Strings.GetString("ArgRange_Array"));
481 if (count < 0 || count > (bytes.Length - index)) {
482 throw new ArgumentOutOfRangeException("count", Strings.GetString("ArgRange_Array"));
485 char[] buffer = new char[count * 2];
486 return(GetChars(bytes, index, count, buffer, 0));
490 public override int GetChars(byte[] bytes,
491 int byteIndex,
492 int byteCount,
493 char[] chars,
494 int charIndex)
496 if (bytes == null) {
497 throw new ArgumentNullException("bytes");
499 if (chars == null) {
500 throw new ArgumentNullException("chars");
502 if (byteIndex < 0 || byteIndex > bytes.Length) {
503 throw new ArgumentOutOfRangeException("byteIndex", Strings.GetString("ArgRange_Array"));
505 if (byteCount < 0 || byteIndex + byteCount > bytes.Length) {
506 throw new ArgumentOutOfRangeException("byteCount", Strings.GetString("ArgRange_Array"));
508 if (charIndex < 0 || charIndex > chars.Length) {
509 throw new ArgumentOutOfRangeException("charIndex", Strings.GetString("ArgRange_Array"));
512 int charLength = chars.Length;
513 int posn = charIndex;
514 int b, value;
515 byte[] table = convert.gb2312_to_unicode;
517 while(byteCount > 0) {
518 b = bytes[byteIndex++];
519 byteCount--;
521 if (lastByte == 0) {
522 if (posn >= charLength) {
523 throw new ArgumentException ("chars", (Strings.GetString ("Arg_InsufficientSpace")));
526 if (b < 0x80) {
527 // ASCII
528 chars[posn++] = (char)b;
529 } else if ((b <= 0xa0 &&
530 b != 0x8e &&
531 b != 0x8f) ||
532 b > 0xfe) {
533 // Invalid first byte
534 chars[posn++] = '?';
535 } else {
536 // First byte in a
537 // double-byte sequence
538 lastByte = b;
540 } else {
541 // Second byte in a
542 // double-byte sequence
543 if (lastByte < 0x80 ||
544 (lastByte - 0x80) <= 0x20 ||
545 (lastByte - 0x80) > 0x77 ||
546 (b - 0x80) <= 0x20 ||
547 (b - 0x80) >= 0x7f) {
548 // Invalid second byte
549 chars[posn++] = '?';
550 } else {
551 int idx = ((lastByte - 0xa1) * 94 + (b - 0xa1) * 2);
553 if (idx > 0x3fe2) {
554 value = 0;
555 } else {
556 value = (int)(table[idx] | (table[idx + 1] << 8));
559 if (value != 0) {
560 chars[posn++] = (char)value;
561 } else {
562 chars[posn++] = '?';
566 lastByte = 0;
570 return (posn - charIndex);
575 internal class ENCgb2312 : CP936
577 public ENCgb2312(): base () {}