2009-11-02 Jb Evain <jbevain@novell.com>
[mcs.git] / class / I18N / CJK / CP932.cs
blobe5f81aa7ca87ae0c1983a976cc58f6da74db347c
1 /*
2 * CP932.cs - Japanese (Shift-JIS) code page.
4 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
26 // Copyright (C) 2005-2006 Novell, Inc.
29 namespace I18N.CJK
32 using System;
33 using System.Text;
34 using I18N.Common;
36 [Serializable]
37 public unsafe class CP932 : MonoEncoding
39 // Magic number used by Windows for the Shift-JIS code page.
40 private const int SHIFTJIS_CODE_PAGE = 932;
42 // Constructor.
43 public CP932() : base(SHIFTJIS_CODE_PAGE)
47 // Get the number of bytes needed to encode a character buffer.
48 public unsafe override int GetByteCountImpl (char* chars, int count)
50 int index = 0;
52 // Determine the length of the final output.
53 int length = 0;
54 int ch, value;
55 #if __PNET__
56 byte *cjkToJis = JISConvert.Convert.cjkToJis;
57 byte *extraToJis = JISConvert.Convert.extraToJis;
58 #else
59 byte[] cjkToJis = JISConvert.Convert.cjkToJis;
60 byte[] extraToJis = JISConvert.Convert.extraToJis;
61 #endif
62 while(count > 0)
64 ch = chars[index++];
65 --count;
66 ++length;
67 if(ch < 0x0080)
69 // Character maps to itself.
70 continue;
72 else if(ch < 0x0100)
74 // Check for special Latin 1 characters that
75 // can be mapped to double-byte code points.
76 if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
77 ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
78 ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
79 ch == 0x00D7 || ch == 0x00F7)
81 ++length;
84 else if(ch >= 0x0391 && ch <= 0x0451)
86 // Greek subset characters.
87 ++length;
89 else if(ch >= 0x2010 && ch <= 0x9FA5)
91 // This range contains the bulk of the CJK set.
92 value = (ch - 0x2010) * 2;
93 value = ((int)(cjkToJis[value])) |
94 (((int)(cjkToJis[value + 1])) << 8);
95 if(value >= 0x0100)
97 ++length;
100 else if(ch >= 0xE000 && ch <= 0xE757)
101 // PrivateUse
102 ++length;
103 else if(ch >= 0xFF01 && ch <= 0xFFEF)
105 // This range contains extra characters,
106 // including half-width katakana.
107 value = (ch - 0xFF01) * 2;
108 value = ((int)(extraToJis[value])) |
109 (((int)(extraToJis[value + 1])) << 8);
110 if(value >= 0x0100)
112 ++length;
117 // Return the length to the caller.
118 return length;
121 // Get the bytes that result from encoding a character buffer.
122 public unsafe override int GetBytesImpl (
123 char* chars, int charCount, byte* bytes, int byteCount)
125 int charIndex = 0;
126 int byteIndex = 0;
127 #if NET_2_0
128 EncoderFallbackBuffer buffer = null;
129 #endif
131 // Convert the characters into their byte form.
132 int posn = byteIndex;
133 int byteLength = byteCount;
134 int ch, value;
135 #if __PNET__
136 byte *cjkToJis = JISConvert.Convert.cjkToJis;
137 byte *greekToJis = JISConvert.Convert.greekToJis;
138 byte *extraToJis = JISConvert.Convert.extraToJis;
139 #else
140 byte[] cjkToJis = JISConvert.Convert.cjkToJis;
141 byte[] greekToJis = JISConvert.Convert.greekToJis;
142 byte[] extraToJis = JISConvert.Convert.extraToJis;
143 #endif
144 while(charCount > 0)
146 ch = chars[charIndex++];
147 --charCount;
148 if(posn >= byteLength)
150 throw new ArgumentException
151 (Strings.GetString("Arg_InsufficientSpace"),
152 "bytes");
154 if(ch < 0x0080)
156 // Character maps to itself.
157 bytes[posn++] = (byte)ch;
158 continue;
160 else if(ch < 0x0100)
162 // Check for special Latin 1 characters that
163 // can be mapped to double-byte code points.
164 if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
165 ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
166 ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
167 ch == 0x00D7 || ch == 0x00F7)
169 if((posn + 1) >= byteLength)
171 throw new ArgumentException
172 (Strings.GetString
173 ("Arg_InsufficientSpace"), "bytes");
175 switch(ch)
177 case 0x00A2:
178 bytes[posn++] = (byte)0x81;
179 bytes[posn++] = (byte)0x91;
180 break;
182 case 0x00A3:
183 bytes[posn++] = (byte)0x81;
184 bytes[posn++] = (byte)0x92;
185 break;
187 case 0x00A7:
188 bytes[posn++] = (byte)0x81;
189 bytes[posn++] = (byte)0x98;
190 break;
192 case 0x00A8:
193 bytes[posn++] = (byte)0x81;
194 bytes[posn++] = (byte)0x4E;
195 break;
197 case 0x00AC:
198 bytes[posn++] = (byte)0x81;
199 bytes[posn++] = (byte)0xCA;
200 break;
202 case 0x00B0:
203 bytes[posn++] = (byte)0x81;
204 bytes[posn++] = (byte)0x8B;
205 break;
207 case 0x00B1:
208 bytes[posn++] = (byte)0x81;
209 bytes[posn++] = (byte)0x7D;
210 break;
212 case 0x00B4:
213 bytes[posn++] = (byte)0x81;
214 bytes[posn++] = (byte)0x4C;
215 break;
217 case 0x00B6:
218 bytes[posn++] = (byte)0x81;
219 bytes[posn++] = (byte)0xF7;
220 break;
222 case 0x00D7:
223 bytes[posn++] = (byte)0x81;
224 bytes[posn++] = (byte)0x7E;
225 break;
227 case 0x00F7:
228 bytes[posn++] = (byte)0x81;
229 bytes[posn++] = (byte)0x80;
230 break;
233 else if(ch == 0x00A5)
235 // Yen sign.
236 bytes[posn++] = (byte)0x5C;
238 else
240 #if NET_2_0
241 HandleFallback (ref buffer,
242 chars, ref charIndex, ref charCount,
243 bytes, ref posn, ref byteCount);
244 #else
245 // Invalid character.
246 bytes[posn++] = (byte)'?';
247 #endif
249 continue;
251 else if(ch >= 0x0391 && ch <= 0x0451)
253 // Greek subset characters.
254 value = (ch - 0x0391) * 2;
255 value = ((int)(greekToJis[value])) |
256 (((int)(greekToJis[value + 1])) << 8);
258 else if(ch >= 0x2010 && ch <= 0x9FA5)
260 // This range contains the bulk of the CJK set.
261 value = (ch - 0x2010) * 2;
262 value = ((int)(cjkToJis[value])) |
263 (((int)(cjkToJis[value + 1])) << 8);
265 else if(ch >= 0xE000 && ch <= 0xE757)
267 // PrivateUse
268 int diff = ch - 0xE000;
269 value = ((int) (diff / 0xBC) << 8)
270 + (diff % 0xBC)
271 + 0xF040;
272 if (value % 0x100 >= 0x7F)
273 value++;
275 else if(ch >= 0xFF01 && ch <= 0xFF60)
277 value = (ch - 0xFF01) * 2;
278 value = ((int)(extraToJis[value])) |
279 (((int)(extraToJis[value + 1])) << 8);
281 else if(ch >= 0xFF60 && ch <= 0xFFA0)
283 value = ch - 0xFF60 + 0xA0;
285 else
287 // Invalid character.
288 value = 0;
290 if(value == 0)
292 #if NET_2_0
293 HandleFallback (ref buffer,
294 chars, ref charIndex, ref charCount,
295 bytes, ref posn, ref byteCount);
296 #else
297 bytes[posn++] = (byte)'?';
298 #endif
300 else if(value < 0x0100)
302 bytes[posn++] = (byte)value;
304 else if((posn + 1) >= byteLength)
306 throw new ArgumentException
307 (Strings.GetString("Arg_InsufficientSpace"),
308 "bytes");
310 else if(value < 0x8000)
312 // JIS X 0208 character.
313 value -= 0x0100;
314 ch = (value / 0xBC);
315 value = (value % 0xBC) + 0x40;
316 if(value >= 0x7F)
318 ++value;
320 if(ch < (0x9F - 0x80))
322 bytes[posn++] = (byte)(ch + 0x81);
324 else
326 bytes[posn++] = (byte)(ch - (0x9F - 0x80) + 0xE0);
328 bytes[posn++] = (byte)value;
330 else if (value >= 0xF040 && value <= 0xF9FC)
332 // PrivateUse
333 bytes[posn++] = (byte) (value / 0x100);
334 bytes[posn++] = (byte) (value % 0x100);
336 else
338 // JIS X 0212 character, which Shift-JIS doesn't
339 // support, but we've already allocated two slots.
340 bytes[posn++] = (byte)'?';
341 bytes[posn++] = (byte)'?';
345 // Return the final length to the caller.
346 return posn - byteIndex;
349 public override int GetCharCount (byte [] bytes, int index, int count)
351 return new CP932Decoder (JISConvert.Convert).GetCharCount (
352 bytes, index, count, true);
355 public override int GetChars (
356 byte [] bytes, int byteIndex, int byteCount,
357 char [] chars, int charIndex)
359 return new CP932Decoder (JISConvert.Convert).GetChars (bytes,
360 byteIndex, byteCount, chars, charIndex,
361 true);
364 // Get the maximum number of bytes needed to encode a
365 // specified number of characters.
366 public override int GetMaxByteCount(int charCount)
368 if(charCount < 0)
370 throw new ArgumentOutOfRangeException
371 ("charCount",
372 Strings.GetString("ArgRange_NonNegative"));
374 return charCount * 2;
377 // Get the maximum number of characters needed to decode a
378 // specified number of bytes.
379 public override int GetMaxCharCount(int byteCount)
381 if(byteCount < 0)
383 throw new ArgumentOutOfRangeException
384 ("byteCount",
385 Strings.GetString("ArgRange_NonNegative"));
387 return byteCount;
390 // Get a decoder that handles a rolling Shift-JIS state.
391 public override Decoder GetDecoder()
393 return new CP932Decoder(JISConvert.Convert);
396 #if !ECMA_COMPAT
398 // Get the mail body name for this encoding.
399 public override String BodyName {
400 get { return "iso-2022-jp"; }
403 // Get the human-readable name for this encoding.
404 public override String EncodingName {
405 get { return "Japanese (Shift-JIS)"; }
408 // Get the mail agent header name for this encoding.
409 public override String HeaderName {
410 get { return "iso-2022-jp"; }
413 // Determine if this encoding can be displayed in a Web browser.
414 public override bool IsBrowserDisplay {
415 get { return true; }
418 // Determine if this encoding can be saved from a Web browser.
419 public override bool IsBrowserSave {
420 get { return true; }
423 // Determine if this encoding can be displayed in a mail/news agent.
424 public override bool IsMailNewsDisplay {
425 get { return true; }
428 // Determine if this encoding can be saved from a mail/news agent.
429 public override bool IsMailNewsSave {
430 get { return true; }
433 // Get the IANA-preferred Web name for this encoding.
434 public override String WebName {
435 get { return "shift_jis"; }
438 // Get the Windows code page represented by this object.
439 public override int WindowsCodePage {
440 get { return SHIFTJIS_CODE_PAGE; }
443 }; // class CP932
445 #endif // !ECMA_COMPAT
447 // Decoder that handles a rolling Shift-JIS state.
448 sealed class CP932Decoder : DbcsEncoding.DbcsDecoder
450 private JISConvert convert;
451 private int last_byte_count;
452 private int last_byte_chars;
454 // Constructor.
455 public CP932Decoder(JISConvert convert)
456 : base (null)
458 this.convert = convert;
461 // Override inherited methods.
463 public override int GetCharCount (
464 byte [] bytes, int index, int count)
466 return GetCharCount (bytes, index, count, false);
469 public
470 #if NET_2_0
471 override
472 #endif
473 int GetCharCount (byte [] bytes, int index, int count, bool refresh)
475 CheckRange (bytes, index, count);
477 // Determine the total length of the converted string.
478 int length = 0;
479 int byteval;
480 int last = last_byte_count;
481 while(count > 0)
483 byteval = bytes[index++];
484 --count;
485 if(last == 0)
487 if((byteval >= 0x81 && byteval <= 0x9F) ||
488 (byteval >= 0xE0 && byteval <= 0xEF))
490 // First byte in a double-byte sequence.
491 last = byteval;
493 ++length;
495 else
497 // Second byte in a double-byte sequence.
498 last = 0;
501 if (refresh) {
502 if (last != 0)
503 length++;
504 last_byte_count = '\0';
506 else
507 last_byte_count = last;
509 // Return the total length.
510 return length;
513 public override int GetChars (
514 byte [] bytes, int byteIndex, int byteCount,
515 char [] chars, int charIndex)
517 return GetChars (bytes, byteIndex, byteCount,
518 chars, charIndex, false);
521 public
522 #if NET_2_0
523 override
524 #endif
525 int GetChars (
526 byte [] bytes, int byteIndex, int byteCount,
527 char [] chars, int charIndex, bool refresh)
529 CheckRange (bytes, byteIndex, byteCount,
530 chars, charIndex);
532 // Decode the bytes in the buffer.
533 int posn = charIndex;
534 int charLength = chars.Length;
535 int byteval, value;
536 int last = last_byte_chars;
537 #if __PNET__
538 byte *table = convert.jisx0208ToUnicode;
539 #else
540 byte[] table = convert.jisx0208ToUnicode;
541 #endif
542 while(byteCount > 0)
544 byteval = bytes[byteIndex++];
545 --byteCount;
546 if(last == 0)
548 if(posn >= charLength)
550 throw new ArgumentException
551 (Strings.GetString
552 ("Arg_InsufficientSpace"), "chars");
554 if((byteval >= 0x81 && byteval <= 0x9F) ||
555 (byteval >= 0xE0 && byteval <= 0xEF))
557 // First byte in a double-byte sequence.
558 last = byteval;
560 else if(byteval < 0x80)
562 // Ordinary ASCII/Latin1 character.
563 chars[posn++] = (char)byteval;
565 else if(byteval >= 0xA1 && byteval <= 0xDF)
567 // Half-width katakana character.
568 chars[posn++] = (char)(byteval - 0xA1 + 0xFF61);
570 else
572 // Invalid first byte.
573 chars[posn++] = '?';
576 else
578 // Second byte in a double-byte sequence.
579 if(last >= 0x81 && last <= 0x9F)
581 value = (last - 0x81) * 0xBC;
583 else if (last >= 0xF0 && last <= 0xFC && byteval <= 0xFC)
585 // PrivateUse
586 value = 0xE000 + (last - 0xF0) * 0xBC + byteval;
587 if (byteval > 0x7F)
588 value--;
590 else
592 value = (last - 0xE0 + (0xA0 - 0x81)) * 0xBC;
594 last = 0;
595 if(byteval >= 0x40 && byteval <= 0x7E)
597 value += (byteval - 0x40);
599 else if(byteval >= 0x80 && byteval <= 0xFC)
601 value += (byteval - 0x80 + 0x3F);
603 else
605 // Invalid second byte.
606 chars[posn++] = '?';
607 continue;
609 value *= 2;
610 value = ((int)(table[value])) |
611 (((int)(table[value + 1])) << 8);
612 if(value != 0)
614 chars[posn++] = (char)value;
616 else
618 chars[posn++] = '?';
622 if (refresh) {
623 if (last != 0)
624 chars[posn++] = '\u30FB';
625 last_byte_chars = '\0';
627 else
628 last_byte_chars = last;
630 // Return the final length to the caller.
631 return posn - charIndex;
634 } // class CP932Decoder
636 [Serializable]
637 public class ENCshift_jis : CP932
639 public ENCshift_jis() : base() {}
641 }; // class ENCshift_jis
643 }; // namespace I18N.CJK