**** Merged from MCS ****
[mono-project.git] / mcs / class / I18N / CJK / CP949.cs
blobbff64a41557a9817fba3b9b15bb4cb411aa7623c
1 //
2 // I18N.CJK.CP949
3 //
4 // Author:
5 // Hye-Shik Chang (perky@FreeBSD.org)
6 //
8 using System;
9 using System.Text;
10 using I18N.Common;
12 namespace I18N.CJK
14 internal class CP949 : DbcsEncoding
16 // Magic number used by Windows for the UHC code page.
17 private const int UHC_CODE_PAGE = 949;
19 // Constructor.
20 public CP949() : base(UHC_CODE_PAGE) {
21 convert = KSConvert.Convert;
24 // Get the bytes that result from encoding a character buffer.
25 public override int GetBytes(char[] chars, int charIndex, int charCount,
26 byte[] bytes, int byteIndex)
28 // 00 00 - FF FF
29 base.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
30 int origIndex = byteIndex;
31 while (charCount-- > 0) {
32 char c = chars[charIndex++];
33 if (c <= 0x80 || c == 0xFF) { // ASCII
34 bytes[byteIndex++] = (byte)c;
35 continue;
37 byte b1 = convert.u2n[((int)c) * 2];
38 byte b2 = convert.u2n[((int)c) * 2 + 1];
39 if (b1 == 0 && b2 == 0) {
40 bytes[byteIndex++] = (byte)'?';
41 } else {
42 bytes[byteIndex++] = b1;
43 bytes[byteIndex++] = b2;
46 return byteIndex - origIndex;
49 // Get the characters that result from decoding a byte buffer.
50 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
51 char[] chars, int charIndex)
53 base.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
54 int origIndex = charIndex;
55 int lastByte = 0;
57 while (byteCount-- > 0) {
58 int b = bytes[byteIndex++];
59 if (lastByte == 0) {
60 if (b <= 0x80 || b == 0xFF) { // ASCII
61 chars[charIndex++] = (char)b;
62 continue;
63 } else {
64 lastByte = b;
65 continue;
69 char c1;
70 if (lastByte < 0xa1) { // UHC Level 1
71 int ord = 8836 + (lastByte - 0x81) * 178;
73 if (b >= 0x41 && b <= 0x5A)
74 ord += b - 0x41;
75 else if (b >= 0x61 && b <= 0x7A)
76 ord += b - 0x61 + 26;
77 else if (b >= 0x81 && b <= 0xFE)
78 ord += b - 0x81 + 52;
79 else
80 ord = -1;
82 if (ord >= 0)
83 c1 = (char)(convert.n2u[ord*2] +
84 convert.n2u[ord*2 + 1] * 256);
85 else
86 c1 = (char)0;
87 } else if (lastByte <= 0xC6 && b < 0xa1) { // UHC Level 2
88 int ord = 14532 + (lastByte - 0x81) * 84;
90 if (b >= 0x41 && b <= 0x5A)
91 ord += b - 0x41;
92 else if (b >= 0x61 && b <= 0x7A)
93 ord += b - 0x61 + 26;
94 else if (b >= 0x81 && b <= 0xA0)
95 ord += b - 0x81 + 52;
96 else
97 ord = -1;
99 if (ord >= 0)
100 c1 = (char)(convert.n2u[ord*2] +
101 convert.n2u[ord*2 + 1] * 256);
102 else
103 c1 = (char)0;
104 } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
105 int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
107 c1 = (char)(convert.n2u[ord] +
108 convert.n2u[ord + 1] * 256);
109 } else
110 c1 = (char)0;
112 if (c1 == 0)
113 chars[charIndex++] = '?';
114 else
115 chars[charIndex++] = c1;
116 lastByte = 0;
118 return charIndex - origIndex;
121 // Get a decoder that handles a rolling UHC state.
122 public override Decoder GetDecoder()
124 return new CP949Decoder(convert);
127 // Get the mail body name for this encoding.
128 public override String BodyName
130 get { return "ks_c_5601-1987"; }
133 // Get the human-readable name for this encoding.
134 public override String EncodingName
136 get { return "Korean (UHC)"; }
139 // Get the mail agent header name for this encoding.
140 public override String HeaderName
142 get { return "ks_c_5601-1987"; }
145 // Get the IANA-preferred Web name for this encoding.
146 public override String WebName
148 get { return "euc-kr"; }
152 // Get the Windows code page represented by this object.
153 public override int WindowsCodePage
155 get { return UHC_PAGE; }
159 // Decoder that handles a rolling UHC state.
160 private sealed class CP949Decoder : DbcsDecoder
162 // Constructor.
163 public CP949Decoder(DbcsConvert convert) : base(convert) {}
165 public override int GetChars(byte[] bytes, int byteIndex,
166 int byteCount, char[] chars, int charIndex)
168 base.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
169 int origIndex = charIndex;
170 while (byteCount-- > 0) {
171 int b = bytes[byteIndex++];
172 if (lastByte == 0) {
173 if (b <= 0x80 || b == 0xFF) { // ASCII
174 chars[charIndex++] = (char)b;
175 continue;
176 } else {
177 lastByte = b;
178 continue;
182 char c1;
183 if (lastByte < 0xa1) { // UHC Level 1
184 int ord = 8836 + (lastByte - 0x81) * 178;
186 if (b >= 0x41 && b <= 0x5A)
187 ord += b - 0x41;
188 else if (b >= 0x61 && b <= 0x7A)
189 ord += b - 0x61 + 26;
190 else if (b >= 0x81 && b <= 0xFE)
191 ord += b - 0x81 + 52;
192 else
193 ord = -1;
195 if (ord >= 0)
196 c1 = (char)(convert.n2u[ord*2] +
197 convert.n2u[ord*2 + 1] * 256);
198 else
199 c1 = (char)0;
200 } else if (lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
201 int ord = 14532 + (lastByte - 0xA1) * 84;
203 if (b >= 0x41 && b <= 0x5A)
204 ord += b - 0x41;
205 else if (b >= 0x61 && b <= 0x7A)
206 ord += b - 0x61 + 26;
207 else if (b >= 0x81 && b <= 0xA0)
208 ord += b - 0x81 + 52;
209 else
210 ord = -1;
212 if (ord >= 0)
213 c1 = (char)(convert.n2u[ord*2] +
214 convert.n2u[ord*2 + 1] * 256);
215 else
216 c1 = (char)0;
217 } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
218 int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
220 c1 = (char)(convert.n2u[ord] +
221 convert.n2u[ord + 1] * 256);
222 } else
223 c1 = (char)0;
225 if (c1 == 0)
226 chars[charIndex++] = '?';
227 else
228 chars[charIndex++] = c1;
229 lastByte = 0;
231 return charIndex - origIndex;
236 internal class ENCuhc : CP949
238 public ENCuhc() {}
242 // ex: ts=8 sts=4 et