2 * CP932.cs - Japanese (Shift-JIS) code page.
4 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
26 // Copyright (C) 2005-2006 Novell, Inc.
37 public unsafe class CP932
: MonoEncoding
39 // Magic number used by Windows for the Shift-JIS code page.
40 private const int SHIFTJIS_CODE_PAGE
= 932;
43 public CP932() : base(SHIFTJIS_CODE_PAGE
)
47 // Get the number of bytes needed to encode a character buffer.
48 public unsafe override int GetByteCountImpl (char* chars
, int count
)
52 // Determine the length of the final output.
56 byte *cjkToJis
= JISConvert
.Convert
.cjkToJis
;
57 byte *extraToJis
= JISConvert
.Convert
.extraToJis
;
59 byte[] cjkToJis
= JISConvert
.Convert
.cjkToJis
;
60 byte[] extraToJis
= JISConvert
.Convert
.extraToJis
;
69 // Character maps to itself.
74 // Check for special Latin 1 characters that
75 // can be mapped to double-byte code points.
76 if(ch
== 0x00A2 || ch
== 0x00A3 || ch
== 0x00A7 ||
77 ch
== 0x00A8 || ch
== 0x00AC || ch
== 0x00B0 ||
78 ch
== 0x00B1 || ch
== 0x00B4 || ch
== 0x00B6 ||
79 ch
== 0x00D7 || ch
== 0x00F7)
84 else if(ch
>= 0x0391 && ch
<= 0x0451)
86 // Greek subset characters.
89 else if(ch
>= 0x2010 && ch
<= 0x9FA5)
91 // This range contains the bulk of the CJK set.
92 value = (ch
- 0x2010) * 2;
93 value = ((int)(cjkToJis
[value])) |
94 (((int)(cjkToJis
[value + 1])) << 8);
100 else if(ch
>= 0xE000 && ch
<= 0xE757)
103 else if(ch
>= 0xFF01 && ch
<= 0xFFEF)
105 // This range contains extra characters,
106 // including half-width katakana.
107 value = (ch
- 0xFF01) * 2;
108 value = ((int)(extraToJis
[value])) |
109 (((int)(extraToJis
[value + 1])) << 8);
117 // Return the length to the caller.
121 // Get the bytes that result from encoding a character buffer.
122 public unsafe override int GetBytesImpl (
123 char* chars
, int charCount
, byte* bytes
, int byteCount
)
128 EncoderFallbackBuffer buffer
= null;
131 // Convert the characters into their byte form.
132 int posn
= byteIndex
;
133 int byteLength
= byteCount
;
136 byte *cjkToJis
= JISConvert
.Convert
.cjkToJis
;
137 byte *greekToJis
= JISConvert
.Convert
.greekToJis
;
138 byte *extraToJis
= JISConvert
.Convert
.extraToJis
;
140 byte[] cjkToJis
= JISConvert
.Convert
.cjkToJis
;
141 byte[] greekToJis
= JISConvert
.Convert
.greekToJis
;
142 byte[] extraToJis
= JISConvert
.Convert
.extraToJis
;
146 ch
= chars
[charIndex
++];
148 if(posn
>= byteLength
)
150 throw new ArgumentException
151 (Strings
.GetString("Arg_InsufficientSpace"),
156 // Character maps to itself.
157 bytes
[posn
++] = (byte)ch
;
162 // Check for special Latin 1 characters that
163 // can be mapped to double-byte code points.
164 if(ch
== 0x00A2 || ch
== 0x00A3 || ch
== 0x00A7 ||
165 ch
== 0x00A8 || ch
== 0x00AC || ch
== 0x00B0 ||
166 ch
== 0x00B1 || ch
== 0x00B4 || ch
== 0x00B6 ||
167 ch
== 0x00D7 || ch
== 0x00F7)
169 if((posn
+ 1) >= byteLength
)
171 throw new ArgumentException
173 ("Arg_InsufficientSpace"), "bytes");
178 bytes
[posn
++] = (byte)0x81;
179 bytes
[posn
++] = (byte)0x91;
183 bytes
[posn
++] = (byte)0x81;
184 bytes
[posn
++] = (byte)0x92;
188 bytes
[posn
++] = (byte)0x81;
189 bytes
[posn
++] = (byte)0x98;
193 bytes
[posn
++] = (byte)0x81;
194 bytes
[posn
++] = (byte)0x4E;
198 bytes
[posn
++] = (byte)0x81;
199 bytes
[posn
++] = (byte)0xCA;
203 bytes
[posn
++] = (byte)0x81;
204 bytes
[posn
++] = (byte)0x8B;
208 bytes
[posn
++] = (byte)0x81;
209 bytes
[posn
++] = (byte)0x7D;
213 bytes
[posn
++] = (byte)0x81;
214 bytes
[posn
++] = (byte)0x4C;
218 bytes
[posn
++] = (byte)0x81;
219 bytes
[posn
++] = (byte)0xF7;
223 bytes
[posn
++] = (byte)0x81;
224 bytes
[posn
++] = (byte)0x7E;
228 bytes
[posn
++] = (byte)0x81;
229 bytes
[posn
++] = (byte)0x80;
233 else if(ch
== 0x00A5)
236 bytes
[posn
++] = (byte)0x5C;
241 HandleFallback (ref buffer
,
242 chars
, ref charIndex
, ref charCount
,
243 bytes
, ref posn
, ref byteCount
);
245 // Invalid character.
246 bytes
[posn
++] = (byte)'?';
251 else if(ch
>= 0x0391 && ch
<= 0x0451)
253 // Greek subset characters.
254 value = (ch
- 0x0391) * 2;
255 value = ((int)(greekToJis
[value])) |
256 (((int)(greekToJis
[value + 1])) << 8);
258 else if(ch
>= 0x2010 && ch
<= 0x9FA5)
260 // This range contains the bulk of the CJK set.
261 value = (ch
- 0x2010) * 2;
262 value = ((int)(cjkToJis
[value])) |
263 (((int)(cjkToJis
[value + 1])) << 8);
265 else if(ch
>= 0xE000 && ch
<= 0xE757)
268 int diff
= ch
- 0xE000;
269 value = ((int) (diff
/ 0xBC) << 8)
272 if (value % 0x100 >= 0x7F)
275 else if(ch
>= 0xFF01 && ch
<= 0xFF60)
277 value = (ch
- 0xFF01) * 2;
278 value = ((int)(extraToJis
[value])) |
279 (((int)(extraToJis
[value + 1])) << 8);
281 else if(ch
>= 0xFF60 && ch
<= 0xFFA0)
283 value = ch
- 0xFF60 + 0xA0;
287 // Invalid character.
293 HandleFallback (ref buffer
,
294 chars
, ref charIndex
, ref charCount
,
295 bytes
, ref posn
, ref byteCount
);
297 bytes
[posn
++] = (byte)'?';
300 else if(value < 0x0100)
302 bytes
[posn
++] = (byte)value;
304 else if((posn
+ 1) >= byteLength
)
306 throw new ArgumentException
307 (Strings
.GetString("Arg_InsufficientSpace"),
310 else if(value < 0x8000)
312 // JIS X 0208 character.
315 value = (value % 0xBC) + 0x40;
320 if(ch
< (0x9F - 0x80))
322 bytes
[posn
++] = (byte)(ch
+ 0x81);
326 bytes
[posn
++] = (byte)(ch
- (0x9F - 0x80) + 0xE0);
328 bytes
[posn
++] = (byte)value;
330 else if (value >= 0xF040 && value <= 0xF9FC)
333 bytes
[posn
++] = (byte) (value / 0x100);
334 bytes
[posn
++] = (byte) (value % 0x100);
338 // JIS X 0212 character, which Shift-JIS doesn't
339 // support, but we've already allocated two slots.
340 bytes
[posn
++] = (byte)'?';
341 bytes
[posn
++] = (byte)'?';
345 // Return the final length to the caller.
346 return posn
- byteIndex
;
349 public override int GetCharCount (byte [] bytes
, int index
, int count
)
351 return new CP932Decoder (JISConvert
.Convert
).GetCharCount (
352 bytes
, index
, count
, true);
355 public override int GetChars (
356 byte [] bytes
, int byteIndex
, int byteCount
,
357 char [] chars
, int charIndex
)
359 return new CP932Decoder (JISConvert
.Convert
).GetChars (bytes
,
360 byteIndex
, byteCount
, chars
, charIndex
,
364 // Get the maximum number of bytes needed to encode a
365 // specified number of characters.
366 public override int GetMaxByteCount(int charCount
)
370 throw new ArgumentOutOfRangeException
372 Strings
.GetString("ArgRange_NonNegative"));
374 return charCount
* 2;
377 // Get the maximum number of characters needed to decode a
378 // specified number of bytes.
379 public override int GetMaxCharCount(int byteCount
)
383 throw new ArgumentOutOfRangeException
385 Strings
.GetString("ArgRange_NonNegative"));
390 // Get a decoder that handles a rolling Shift-JIS state.
391 public override Decoder
GetDecoder()
393 return new CP932Decoder(JISConvert
.Convert
);
398 // Get the mail body name for this encoding.
399 public override String BodyName
{
400 get { return "iso-2022-jp"; }
403 // Get the human-readable name for this encoding.
404 public override String EncodingName
{
405 get { return "Japanese (Shift-JIS)"; }
408 // Get the mail agent header name for this encoding.
409 public override String HeaderName
{
410 get { return "iso-2022-jp"; }
413 // Determine if this encoding can be displayed in a Web browser.
414 public override bool IsBrowserDisplay
{
418 // Determine if this encoding can be saved from a Web browser.
419 public override bool IsBrowserSave
{
423 // Determine if this encoding can be displayed in a mail/news agent.
424 public override bool IsMailNewsDisplay
{
428 // Determine if this encoding can be saved from a mail/news agent.
429 public override bool IsMailNewsSave
{
433 // Get the IANA-preferred Web name for this encoding.
434 public override String WebName
{
435 get { return "shift_jis"; }
438 // Get the Windows code page represented by this object.
439 public override int WindowsCodePage
{
440 get { return SHIFTJIS_CODE_PAGE; }
445 #endif // !ECMA_COMPAT
447 // Decoder that handles a rolling Shift-JIS state.
448 sealed class CP932Decoder
: DbcsEncoding
.DbcsDecoder
450 private JISConvert convert
;
451 private int last_byte_count
;
452 private int last_byte_chars
;
455 public CP932Decoder(JISConvert convert
)
458 this.convert
= convert
;
461 // Override inherited methods.
463 public override int GetCharCount (
464 byte [] bytes
, int index
, int count
)
466 return GetCharCount (bytes
, index
, count
, false);
473 int GetCharCount (byte [] bytes
, int index
, int count
, bool refresh
)
475 CheckRange (bytes
, index
, count
);
477 // Determine the total length of the converted string.
480 int last
= last_byte_count
;
483 byteval
= bytes
[index
++];
487 if((byteval
>= 0x81 && byteval
<= 0x9F) ||
488 (byteval
>= 0xE0 && byteval
<= 0xEF))
490 // First byte in a double-byte sequence.
497 // Second byte in a double-byte sequence.
504 last_byte_count
= '\0';
507 last_byte_count
= last
;
509 // Return the total length.
513 public override int GetChars (
514 byte [] bytes
, int byteIndex
, int byteCount
,
515 char [] chars
, int charIndex
)
517 return GetChars (bytes
, byteIndex
, byteCount
,
518 chars
, charIndex
, false);
526 byte [] bytes
, int byteIndex
, int byteCount
,
527 char [] chars
, int charIndex
, bool refresh
)
529 CheckRange (bytes
, byteIndex
, byteCount
,
532 // Decode the bytes in the buffer.
533 int posn
= charIndex
;
534 int charLength
= chars
.Length
;
536 int last
= last_byte_chars
;
538 byte *table
= convert
.jisx0208ToUnicode
;
540 byte[] table
= convert
.jisx0208ToUnicode
;
544 byteval
= bytes
[byteIndex
++];
548 if(posn
>= charLength
)
550 throw new ArgumentException
552 ("Arg_InsufficientSpace"), "chars");
554 if((byteval
>= 0x81 && byteval
<= 0x9F) ||
555 (byteval
>= 0xE0 && byteval
<= 0xEF))
557 // First byte in a double-byte sequence.
560 else if(byteval
< 0x80)
562 // Ordinary ASCII/Latin1 character.
563 chars
[posn
++] = (char)byteval
;
565 else if(byteval
>= 0xA1 && byteval
<= 0xDF)
567 // Half-width katakana character.
568 chars
[posn
++] = (char)(byteval
- 0xA1 + 0xFF61);
572 // Invalid first byte.
578 // Second byte in a double-byte sequence.
579 if(last
>= 0x81 && last
<= 0x9F)
581 value = (last
- 0x81) * 0xBC;
583 else if (last
>= 0xF0 && last
<= 0xFC && byteval
<= 0xFC)
586 value = 0xE000 + (last
- 0xF0) * 0xBC + byteval
;
592 value = (last
- 0xE0 + (0xA0 - 0x81)) * 0xBC;
595 if(byteval
>= 0x40 && byteval
<= 0x7E)
597 value += (byteval
- 0x40);
599 else if(byteval
>= 0x80 && byteval
<= 0xFC)
601 value += (byteval
- 0x80 + 0x3F);
605 // Invalid second byte.
610 value = ((int)(table
[value])) |
611 (((int)(table
[value + 1])) << 8);
614 chars
[posn
++] = (char)value;
624 chars
[posn
++] = '\u30FB';
625 last_byte_chars
= '\0';
628 last_byte_chars
= last
;
630 // Return the final length to the caller.
631 return posn
- charIndex
;
634 } // class CP932Decoder
637 public class ENCshift_jis
: CP932
639 public ENCshift_jis() : base() {}
641 }; // class ENCshift_jis
643 }; // namespace I18N.CJK