2 * CP51932.cs - Japanese EUC-JP code page.
4 * It is based on CP932.cs from Portable.NET
7 * Atsushi Enomoto <atsushi@ximian.com>
9 * Below are original (CP932.cs) copyright lines
13 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
15 * Permission is hereby granted, free of charge, to any person obtaining
16 * a copy of this software and associated documentation files (the "Software"),
17 * to deal in the Software without restriction, including without limitation
18 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19 * and/or sell copies of the Software, and to permit persons to whom the
20 * Software is furnished to do so, subject to the following conditions:
22 * The above copyright notice and this permission notice shall be included
23 * in all copies or substantial portions of the Software.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
26 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
28 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
29 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
30 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
31 * OTHER DEALINGS IN THE SOFTWARE.
36 Well, there looks no jis.table source. Thus, it seems like it is
37 generated from text files from Unicode Home Page such like
38 ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT
39 However, it is non-normative and in Japan it is contains many problem.
41 FIXME: Some characters such as 0xFF0B (wide "plus") are missing in
46 0x00-0x1F, 0x7F : control characters
48 0xA1A1-0xFEFE : Kanji (precisely, both bytes contain only A1-FE)
49 0x8EA1-0x8EDF : half-width Katakana
50 0x8FA1A1-0x8FFEFE : Complemental Kanji
61 public class CP51932
: Encoding
63 // Magic number used by Windows for the EUC-JP code page.
64 private const int EUC_JP_CODE_PAGE
= 51932;
67 private JISConvert convert
;
69 // Conversion cache (note that encoding is not thread safe)
73 public CP51932 () : base (EUC_JP_CODE_PAGE
)
75 // Load the JIS conversion tables.
76 convert
= JISConvert
.Convert
;
79 // Get the number of bytes needed to encode a character buffer.
80 public override int GetByteCount (char [] chars
, int index
, int count
)
82 // Validate the parameters.
84 throw new ArgumentNullException("chars");
86 if (index
< 0 || index
> chars
.Length
)
87 throw new ArgumentOutOfRangeException
88 ("index", Strings
.GetString ("ArgRange_Array"));
90 if (count
< 0 || count
> (chars
.Length
- index
))
91 throw new ArgumentOutOfRangeException
92 ("count", Strings
.GetString ("ArgRange_Array"));
94 // Determine the length of the final output.
97 byte [] cjkToJis
= convert
.cjkToJis
;
98 byte [] extraToJis
= convert
.extraToJis
;
101 ch
= chars
[index
++];
105 // Character maps to itself.
107 } else if (ch
< 0x0100) {
108 // Check for special Latin 1 characters that
109 // can be mapped to double-byte code points.
110 if(ch
== 0x00A2 || ch
== 0x00A3 || ch
== 0x00A7 ||
111 ch
== 0x00A8 || ch
== 0x00AC || ch
== 0x00B0 ||
112 ch
== 0x00B1 || ch
== 0x00B4 || ch
== 0x00B6 ||
113 ch
== 0x00D7 || ch
== 0x00F7)
117 } else if (ch
>= 0x0391 && ch
<= 0x0451) {
118 // Greek subset characters.
120 } else if (ch
>= 0x2010 && ch
<= 0x9FA5) {
121 // This range contains the bulk of the CJK set.
122 value = (ch
- 0x2010) * 2;
123 value = ((int) (cjkToJis
[value])) | (((int)(cjkToJis
[value + 1])) << 8);
128 } else if(ch
>= 0xFF01 && ch
<= 0xFFEF) {
129 // This range contains extra characters,
130 // including half-width katakana.
131 value = (ch
- 0xFF01) * 2;
132 value = ((int)(extraToJis
[value])) |
133 (((int)(extraToJis
[value + 1])) << 8);
137 // Return the length to the caller.
141 // Get the bytes that result from encoding a character buffer.
142 public override int GetBytes (char[] chars
, int charIndex
, int charCount
,
143 byte[] bytes
, int byteIndex
)
145 // Validate the parameters.
148 throw new ArgumentNullException("chars");
152 throw new ArgumentNullException("bytes");
154 if(charIndex
< 0 || charIndex
> chars
.Length
)
156 throw new ArgumentOutOfRangeException
157 ("charIndex", Strings
.GetString("ArgRange_Array"));
159 if(charCount
< 0 || charCount
> (chars
.Length
- charIndex
))
161 throw new ArgumentOutOfRangeException
162 ("charCount", Strings
.GetString("ArgRange_Array"));
164 if(byteIndex
< 0 || byteIndex
> bytes
.Length
)
166 throw new ArgumentOutOfRangeException
167 ("byteIndex", Strings
.GetString("ArgRange_Array"));
170 // Convert the characters into their byte form.
171 int posn
= byteIndex
;
172 int byteLength
= bytes
.Length
;
175 byte[] cjkToJis
= convert
.cjkToJis
;
176 byte[] greekToJis
= convert
.greekToJis
;
177 byte[] extraToJis
= convert
.extraToJis
;
179 while (charCount
> 0) {
180 ch
= chars
[charIndex
++];
182 if (posn
>= byteLength
) {
183 throw new ArgumentException (Strings
.GetString ("Arg_InsufficientSpace"), "bytes");
187 // Character maps to itself.
188 bytes
[posn
++] = (byte)ch
;
190 } else if (ch
>= 0x0391 && ch
<= 0x0451) {
191 // Greek subset characters.
192 value = (ch
- 0x0391) * 2;
193 value = ((int)(greekToJis
[value])) |
194 (((int)(greekToJis
[value + 1])) << 8);
195 } else if (ch
>= 0x2010 && ch
<= 0x9FA5) {
196 // This range contains the bulk of the CJK set.
197 value = (ch
- 0x2010) * 2;
198 value = ((int) (cjkToJis
[value])) |
199 (((int)(cjkToJis
[value + 1])) << 8);
200 } else if (ch
>= 0xFF01 && ch
<= 0xFFEF) {
201 // This range contains extra characters,
202 // including half-width katakana.
203 value = (ch
- 0xFF01) * 2;
204 value = ((int) (extraToJis
[value])) |
205 (((int) (extraToJis
[value + 1])) << 8);
207 // Invalid character.
212 bytes
[posn
++] = (byte) '?';
213 } else if (value < 0x0100) {
214 bytes
[posn
++] = (byte) value;
215 } else if ((posn
+ 1) >= byteLength
) {
216 throw new ArgumentException (Strings
.GetString ("Arg_InsufficientSpace"), "bytes");
217 } else if (value < 0x8000) {
218 // general 2byte glyph/kanji
220 bytes
[posn
++] = (byte) (value / 0x5E + 0xA1);
221 bytes
[posn
++] = (byte) (value % 0x5E + 0xA1);
222 //Console.WriteLine ("{0:X04}", ch);
227 // FIXME: JIS X 0212 support is not implemented.
228 bytes
[posn
++] = (byte)'?';
229 bytes
[posn
++] = (byte)'?';
233 // Return the final length to the caller.
234 return posn
- byteIndex
;
237 // Get the number of characters needed to decode a byte buffer.
239 public override int GetCharCount (byte [] bytes
, int index
, int count
)
241 // Validate the parameters.
243 throw new ArgumentNullException ("bytes");
245 if (index
< 0 || index
> bytes
.Length
)
246 throw new ArgumentOutOfRangeException
247 ("index", Strings
.GetString("ArgRange_Array"));
249 if (count
< 0 || count
> (bytes
.Length
- index
))
250 throw new ArgumentOutOfRangeException
251 ("count", Strings
.GetString("ArgRange_Array"));
253 // Determine the total length of the converted string.
258 byteval
= bytes
[index
++];
262 if (byteval
< 0x80) {
263 // Ordinary ASCII/Latin1 character, or the
264 // single-byte Yen or overline signs.
267 else if (byteval
== 0xFF) {
273 ++length
; // "??" for invalid 3-byte character
278 // Missing second byte.
285 // Return the total length.
289 public override int GetChars (byte[] bytes
, int byteIndex
,
290 int byteCount
, char[] chars
,
293 // Validate the parameters.
296 throw new ArgumentNullException("bytes");
300 throw new ArgumentNullException("chars");
302 if(byteIndex
< 0 || byteIndex
> bytes
.Length
)
304 throw new ArgumentOutOfRangeException
305 ("byteIndex", Strings
.GetString("ArgRange_Array"));
307 if(byteCount
< 0 || byteCount
> (bytes
.Length
- byteIndex
))
309 throw new ArgumentOutOfRangeException
310 ("byteCount", Strings
.GetString("ArgRange_Array"));
312 if(charIndex
< 0 || charIndex
> chars
.Length
)
314 throw new ArgumentOutOfRangeException
315 ("charIndex", Strings
.GetString("ArgRange_Array"));
318 // Decode the bytes in the buffer.
319 int posn
= charIndex
;
320 int charLength
= chars
.Length
;
323 byte[] table0208
= convert
.jisx0208ToUnicode
;
324 byte[] table0212
= convert
.jisx0212ToUnicode
;
326 while (byteCount
> 0) {
327 byteval
= bytes
[byteIndex
++];
330 if (posn
>= charLength
)
331 throw new ArgumentException
333 ("Arg_InsufficientSpace"), "chars");
335 if (byteval
== 0x8F) {
337 // Invalid second byte of a 3-byte character
338 // FIXME: What should we do?
341 // First byte in a triple-byte sequence
344 } else if (byteval
<= 0x7F) {
345 // Ordinary ASCII/Latin1/Control character.
346 chars
[posn
++] = (char) byteval
;
347 } else if (byteval
>= 0xA1 && byteval
<= 0xFE) {
348 // First byte in a double-byte sequence.
351 // Invalid first byte.
352 chars
[posn
++] = '?';
355 else if (last
== 0x8F) {
357 // FIXME: currently not supported yet
362 // Second byte in a double-byte sequence.
363 value = (last
- 0xA1) * 0x5E;
365 if (byteval
>= 0xA1 && byteval
<= 0xFE)
367 value += (byteval
- 0xA1);
371 // Invalid second byte.
373 chars
[posn
++] = '?';
378 value = ((int) (table0208
[value]))
379 | (((int) (table0208
[value + 1])) << 8);
381 value = ((int) (table0212
[value]))
382 | (((int) (table0212
[value + 1])) << 8);
384 chars
[posn
++] = (char)value;
386 chars
[posn
++] = '?';
391 // Return the final length to the caller.
392 return posn
- charIndex
;
395 // Get the maximum number of bytes needed to encode a
396 // specified number of characters.
397 public override int GetMaxByteCount(int charCount
)
401 throw new ArgumentOutOfRangeException
403 Strings
.GetString("ArgRange_NonNegative"));
405 return charCount
* 3;
408 // Get the maximum number of characters needed to decode a
409 // specified number of bytes.
410 public override int GetMaxCharCount(int byteCount
)
414 throw new ArgumentOutOfRangeException
416 Strings
.GetString ("ArgRange_NonNegative"));
421 /* Use default implementation
422 public override Decoder GetDecoder()
424 return new CP51932Decoder(convert);
430 // Get the mail body name for this encoding.
431 public override String BodyName
{
432 get { return "euc-jp"; }
435 // Get the human-readable name for this encoding.
436 public override String EncodingName
{
437 get { return "Japanese (EUC)"; }
440 // Get the mail agent header name for this encoding.
441 public override String HeaderName
{
442 get { return "euc-jp"; }
445 // Determine if this encoding can be displayed in a Web browser.
446 public override bool IsBrowserDisplay
{
450 // Determine if this encoding can be saved from a Web browser.
451 public override bool IsBrowserSave
{
455 // Determine if this encoding can be displayed in a mail/news agent.
456 public override bool IsMailNewsDisplay
{
460 // Determine if this encoding can be saved from a mail/news agent.
461 public override bool IsMailNewsSave
{
465 // Get the IANA-preferred Web name for this encoding.
466 public override String WebName
{
467 get { return "euc-jp"; }
470 // Get the Windows code page represented by this object.
471 public override int WindowsCodePage
{
472 get { return EUC_JP_CODE_PAGE; }
475 #endif // !ECMA_COMPAT
478 public class ENCeuc_jp
: CP51932
480 public ENCeuc_jp () : base() {}
484 }; // namespace I18N.CJK