(DISTFILES): Comment out a few missing files.
[mono-project.git] / mcs / class / I18N / CJK / CP51932.cs
blob5c32d7a69843001339a5e18430c799361a34d74d
1 /*
2 * CP51932.cs - Japanese EUC-JP code page.
4 * It is based on CP932.cs from Portable.NET
6 * Author:
7 * Atsushi Enomoto <atsushi@ximian.com>
9 * Below are original (CP932.cs) copyright lines
11 * (C)2004 Novell Inc.
13 * Copyright (c) 2002 Southern Storm Software, Pty Ltd
15 * Permission is hereby granted, free of charge, to any person obtaining
16 * a copy of this software and associated documentation files (the "Software"),
17 * to deal in the Software without restriction, including without limitation
18 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19 * and/or sell copies of the Software, and to permit persons to whom the
20 * Software is furnished to do so, subject to the following conditions:
22 * The above copyright notice and this permission notice shall be included
23 * in all copies or substantial portions of the Software.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
26 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
28 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
29 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
30 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
31 * OTHER DEALINGS IN THE SOFTWARE.
36 Well, there looks no jis.table source. Thus, it seems like it is
37 generated from text files from Unicode Home Page such like
38 ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT
39 However, it is non-normative and in Japan it is contains many problem.
41 FIXME: Some characters such as 0xFF0B (wide "plus") are missing in
42 that table.
46 0x00-0x1F, 0x7F : control characters
47 0x20-0x7E : ASCII
48 0xA1A1-0xFEFE : Kanji (precisely, both bytes contain only A1-FE)
49 0x8EA1-0x8EDF : half-width Katakana
50 0x8FA1A1-0x8FFEFE : Complemental Kanji
54 namespace I18N.CJK
57 using System;
58 using System.Text;
59 using I18N.Common;
61 public class CP51932 : Encoding
63 // Magic number used by Windows for the EUC-JP code page.
64 private const int EUC_JP_CODE_PAGE = 51932;
66 // Internal state.
67 private JISConvert convert;
69 // Conversion cache (note that encoding is not thread safe)
70 int lastByte;
72 // Constructor.
73 public CP51932 () : base (EUC_JP_CODE_PAGE)
75 // Load the JIS conversion tables.
76 convert = JISConvert.Convert;
79 // Get the number of bytes needed to encode a character buffer.
80 public override int GetByteCount (char [] chars, int index, int count)
82 // Validate the parameters.
83 if (chars == null)
84 throw new ArgumentNullException("chars");
86 if (index < 0 || index > chars.Length)
87 throw new ArgumentOutOfRangeException
88 ("index", Strings.GetString ("ArgRange_Array"));
90 if (count < 0 || count > (chars.Length - index))
91 throw new ArgumentOutOfRangeException
92 ("count", Strings.GetString ("ArgRange_Array"));
94 // Determine the length of the final output.
95 int length = 0;
96 int ch, value;
97 byte [] cjkToJis = convert.cjkToJis;
98 byte [] extraToJis = convert.extraToJis;
100 while (count > 0) {
101 ch = chars [index++];
102 --count;
103 ++length;
104 if (ch < 0x0080) {
105 // Character maps to itself.
106 continue;
107 } else if (ch < 0x0100) {
108 // Check for special Latin 1 characters that
109 // can be mapped to double-byte code points.
110 if(ch == 0x00A2 || ch == 0x00A3 || ch == 0x00A7 ||
111 ch == 0x00A8 || ch == 0x00AC || ch == 0x00B0 ||
112 ch == 0x00B1 || ch == 0x00B4 || ch == 0x00B6 ||
113 ch == 0x00D7 || ch == 0x00F7)
115 ++length;
117 } else if (ch >= 0x0391 && ch <= 0x0451) {
118 // Greek subset characters.
119 ++length;
120 } else if (ch >= 0x2010 && ch <= 0x9FA5) {
121 // This range contains the bulk of the CJK set.
122 value = (ch - 0x2010) * 2;
123 value = ((int) (cjkToJis[value])) | (((int)(cjkToJis[value + 1])) << 8);
124 if(value >= 0x0100)
126 ++length;
128 } else if(ch >= 0xFF01 && ch <= 0xFFEF) {
129 // This range contains extra characters,
130 // including half-width katakana.
131 value = (ch - 0xFF01) * 2;
132 value = ((int)(extraToJis[value])) |
133 (((int)(extraToJis[value + 1])) << 8);
137 // Return the length to the caller.
138 return length;
141 // Get the bytes that result from encoding a character buffer.
142 public override int GetBytes (char[] chars, int charIndex, int charCount,
143 byte[] bytes, int byteIndex)
145 // Validate the parameters.
146 if(chars == null)
148 throw new ArgumentNullException("chars");
150 if(bytes == null)
152 throw new ArgumentNullException("bytes");
154 if(charIndex < 0 || charIndex > chars.Length)
156 throw new ArgumentOutOfRangeException
157 ("charIndex", Strings.GetString("ArgRange_Array"));
159 if(charCount < 0 || charCount > (chars.Length - charIndex))
161 throw new ArgumentOutOfRangeException
162 ("charCount", Strings.GetString("ArgRange_Array"));
164 if(byteIndex < 0 || byteIndex > bytes.Length)
166 throw new ArgumentOutOfRangeException
167 ("byteIndex", Strings.GetString("ArgRange_Array"));
170 // Convert the characters into their byte form.
171 int posn = byteIndex;
172 int byteLength = bytes.Length;
173 int ch, value;
175 byte[] cjkToJis = convert.cjkToJis;
176 byte[] greekToJis = convert.greekToJis;
177 byte[] extraToJis = convert.extraToJis;
179 while (charCount > 0) {
180 ch = chars [charIndex++];
181 --charCount;
182 if (posn >= byteLength) {
183 throw new ArgumentException (Strings.GetString ("Arg_InsufficientSpace"), "bytes");
186 if (ch < 0x0080) {
187 // Character maps to itself.
188 bytes[posn++] = (byte)ch;
189 continue;
190 } else if (ch >= 0x0391 && ch <= 0x0451) {
191 // Greek subset characters.
192 value = (ch - 0x0391) * 2;
193 value = ((int)(greekToJis[value])) |
194 (((int)(greekToJis[value + 1])) << 8);
195 } else if (ch >= 0x2010 && ch <= 0x9FA5) {
196 // This range contains the bulk of the CJK set.
197 value = (ch - 0x2010) * 2;
198 value = ((int) (cjkToJis[value])) |
199 (((int)(cjkToJis[value + 1])) << 8);
200 } else if (ch >= 0xFF01 && ch <= 0xFFEF) {
201 // This range contains extra characters,
202 // including half-width katakana.
203 value = (ch - 0xFF01) * 2;
204 value = ((int) (extraToJis [value])) |
205 (((int) (extraToJis [value + 1])) << 8);
206 } else {
207 // Invalid character.
208 value = 0;
211 if (value == 0) {
212 bytes [posn++] = (byte) '?';
213 } else if (value < 0x0100) {
214 bytes [posn++] = (byte) value;
215 } else if ((posn + 1) >= byteLength) {
216 throw new ArgumentException (Strings.GetString ("Arg_InsufficientSpace"), "bytes");
217 } else if (value < 0x8000) {
218 // general 2byte glyph/kanji
219 value -= 0x0100;
220 bytes [posn++] = (byte) (value / 0x5E + 0xA1);
221 bytes [posn++] = (byte) (value % 0x5E + 0xA1);
222 //Console.WriteLine ("{0:X04}", ch);
223 continue;
225 else
227 // FIXME: JIS X 0212 support is not implemented.
228 bytes[posn++] = (byte)'?';
229 bytes[posn++] = (byte)'?';
233 // Return the final length to the caller.
234 return posn - byteIndex;
237 // Get the number of characters needed to decode a byte buffer.
238 // TODO: check
239 public override int GetCharCount (byte [] bytes, int index, int count)
241 // Validate the parameters.
242 if (bytes == null)
243 throw new ArgumentNullException ("bytes");
245 if (index < 0 || index > bytes.Length)
246 throw new ArgumentOutOfRangeException
247 ("index", Strings.GetString("ArgRange_Array"));
249 if (count < 0 || count > (bytes.Length - index))
250 throw new ArgumentOutOfRangeException
251 ("count", Strings.GetString("ArgRange_Array"));
253 // Determine the total length of the converted string.
254 int length = 0;
255 int byteval;
256 int last = 0;
257 while (count > 0) {
258 byteval = bytes [index++];
259 --count;
260 ++length;
262 if (byteval < 0x80) {
263 // Ordinary ASCII/Latin1 character, or the
264 // single-byte Yen or overline signs.
265 continue;
267 else if (byteval == 0xFF) {
268 if (count >= 2) {
269 count -= 2;
270 ++length;
271 } else {
272 count--;
273 ++length; // "??" for invalid 3-byte character
275 continue;
277 if(count == 0) {
278 // Missing second byte.
279 continue;
281 ++index;
282 --count;
285 // Return the total length.
286 return length;
289 public override int GetChars (byte[] bytes, int byteIndex,
290 int byteCount, char[] chars,
291 int charIndex)
293 // Validate the parameters.
294 if(bytes == null)
296 throw new ArgumentNullException("bytes");
298 if(chars == null)
300 throw new ArgumentNullException("chars");
302 if(byteIndex < 0 || byteIndex > bytes.Length)
304 throw new ArgumentOutOfRangeException
305 ("byteIndex", Strings.GetString("ArgRange_Array"));
307 if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))
309 throw new ArgumentOutOfRangeException
310 ("byteCount", Strings.GetString("ArgRange_Array"));
312 if(charIndex < 0 || charIndex > chars.Length)
314 throw new ArgumentOutOfRangeException
315 ("charIndex", Strings.GetString("ArgRange_Array"));
318 // Decode the bytes in the buffer.
319 int posn = charIndex;
320 int charLength = chars.Length;
321 int byteval, value;
322 int last = lastByte;
323 byte[] table0208 = convert.jisx0208ToUnicode;
324 byte[] table0212 = convert.jisx0212ToUnicode;
326 while (byteCount > 0) {
327 byteval = bytes [byteIndex++];
328 --byteCount;
329 if (last == 0) {
330 if (posn >= charLength)
331 throw new ArgumentException
332 (Strings.GetString
333 ("Arg_InsufficientSpace"), "chars");
335 if (byteval == 0x8F) {
336 if (byteval != 0) {
337 // Invalid second byte of a 3-byte character
338 // FIXME: What should we do?
339 last = 0;
341 // First byte in a triple-byte sequence
342 else
343 last = byteval;
344 } else if (byteval <= 0x7F) {
345 // Ordinary ASCII/Latin1/Control character.
346 chars [posn++] = (char) byteval;
347 } else if (byteval >= 0xA1 && byteval <= 0xFE) {
348 // First byte in a double-byte sequence.
349 last = byteval;
350 } else {
351 // Invalid first byte.
352 chars [posn++] = '?';
355 else if (last == 0x8F) {
356 // 3-byte character
357 // FIXME: currently not supported yet
358 last = byteval;
360 else
362 // Second byte in a double-byte sequence.
363 value = (last - 0xA1) * 0x5E;
364 last = 0;
365 if (byteval >= 0xA1 && byteval <= 0xFE)
367 value += (byteval - 0xA1);
369 else
371 // Invalid second byte.
372 lastByte = 0;
373 chars [posn++] = '?';
374 continue;
377 value *= 2;
378 value = ((int) (table0208 [value]))
379 | (((int) (table0208 [value + 1])) << 8);
380 if (value == 0)
381 value = ((int) (table0212 [value]))
382 | (((int) (table0212 [value + 1])) << 8);
383 if (value != 0)
384 chars [posn++] = (char)value;
385 else
386 chars [posn++] = '?';
389 lastByte = last;
391 // Return the final length to the caller.
392 return posn - charIndex;
395 // Get the maximum number of bytes needed to encode a
396 // specified number of characters.
397 public override int GetMaxByteCount(int charCount)
399 if(charCount < 0)
401 throw new ArgumentOutOfRangeException
402 ("charCount",
403 Strings.GetString("ArgRange_NonNegative"));
405 return charCount * 3;
408 // Get the maximum number of characters needed to decode a
409 // specified number of bytes.
410 public override int GetMaxCharCount(int byteCount)
412 if(byteCount < 0)
414 throw new ArgumentOutOfRangeException
415 ("byteCount",
416 Strings.GetString ("ArgRange_NonNegative"));
418 return byteCount;
421 /* Use default implementation
422 public override Decoder GetDecoder()
424 return new CP51932Decoder(convert);
428 #if !ECMA_COMPAT
430 // Get the mail body name for this encoding.
431 public override String BodyName {
432 get { return "euc-jp"; }
435 // Get the human-readable name for this encoding.
436 public override String EncodingName {
437 get { return "Japanese (EUC)"; }
440 // Get the mail agent header name for this encoding.
441 public override String HeaderName {
442 get { return "euc-jp"; }
445 // Determine if this encoding can be displayed in a Web browser.
446 public override bool IsBrowserDisplay {
447 get { return true; }
450 // Determine if this encoding can be saved from a Web browser.
451 public override bool IsBrowserSave {
452 get { return true; }
455 // Determine if this encoding can be displayed in a mail/news agent.
456 public override bool IsMailNewsDisplay {
457 get { return true; }
460 // Determine if this encoding can be saved from a mail/news agent.
461 public override bool IsMailNewsSave {
462 get { return true; }
465 // Get the IANA-preferred Web name for this encoding.
466 public override String WebName {
467 get { return "euc-jp"; }
470 // Get the Windows code page represented by this object.
471 public override int WindowsCodePage {
472 get { return EUC_JP_CODE_PAGE; }
475 #endif // !ECMA_COMPAT
476 }; // class CP51932
478 public class ENCeuc_jp : CP51932
480 public ENCeuc_jp () : base() {}
482 }; // class ENCeucjp
484 }; // namespace I18N.CJK