5 // Alan Tam Siu Lung (Tam@SiuLung.com)
6 // Atsushi Enomoto <atsushi@ximian.com>
16 internal class CP950
: DbcsEncoding
18 // Magic number used by Windows for the Big5 code page.
19 private const int BIG5_CODE_PAGE
= 950;
22 public CP950() : base(BIG5_CODE_PAGE
) {
25 internal override DbcsConvert
GetConvert ()
27 return DbcsConvert
.Big5
;
30 // Get the bytes that result from encoding a character buffer.
31 public unsafe override int GetByteCountImpl (char* chars
, int count
)
33 DbcsConvert convert
= GetConvert ();
38 char c
= chars
[index
++];
39 if (c
<= 0x80 || c
== 0xFF) { // ASCII
43 byte b1
= convert
.u2n
[((int)c
) * 2 + 1];
44 byte b2
= convert
.u2n
[((int)c
) * 2];
45 if (b1
== 0 && b2
== 0) {
47 // FIXME: handle fallback for GetByteCountImpl().
59 // Get the bytes that result from encoding a character buffer.
60 public unsafe override int GetBytesImpl (char* chars
, int charCount
,
61 byte* bytes
, int byteCount
)
63 DbcsConvert convert
= GetConvert ();
67 EncoderFallbackBuffer buffer
= null;
70 int origIndex
= byteIndex
;
71 while (charCount
-- > 0) {
72 char c
= chars
[charIndex
++];
73 if (c
<= 0x80 || c
== 0xFF) { // ASCII
74 bytes
[byteIndex
++] = (byte)c
;
77 byte b1
= convert
.u2n
[((int)c
) * 2 + 1];
78 byte b2
= convert
.u2n
[((int)c
) * 2];
79 if (b1
== 0 && b2
== 0) {
81 HandleFallback (ref buffer
, chars
,
82 ref charIndex
, ref charCount
,
83 bytes
, ref byteIndex
, ref byteCount
);
85 bytes
[byteIndex
++] = (byte)'?';
88 bytes
[byteIndex
++] = b1
;
89 bytes
[byteIndex
++] = b2
;
92 return byteIndex
- origIndex
;
95 // Get the characters that result from decoding a byte buffer.
96 public override int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
97 char[] chars
, int charIndex
)
100 DbcsConvert convert = GetConvert ();
102 base.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
103 int origIndex = charIndex;
105 while (byteCount-- > 0) {
106 int b = bytes[byteIndex++];
108 if (b <= 0x80 || b == 0xFF) { // ASCII
109 chars[charIndex++] = (char)b;
110 } else if (b < 0xA1 || b >= 0xFA) {
111 // incorrect first byte.
112 chars[charIndex++] = '?';
113 byteCount--; // cut one more byte.
119 int ord = ((lastByte - 0xA1) * 191 + b - 0x40) * 2;
120 char c1 = ord < 0 || ord > convert.n2u.Length ?
122 (char)(convert.n2u[ord] + convert.n2u[ord + 1] * 256);
124 chars[charIndex++] = '?';
126 chars[charIndex++] = c1;
130 chars[charIndex++] = '?';
132 return charIndex - origIndex;
135 return GetDecoder ().GetChars (bytes
, byteIndex
, byteCount
, chars
, charIndex
);
138 // Get a decoder that handles a rolling Big5 state.
139 public override Decoder
GetDecoder()
141 return new CP950Decoder(GetConvert ());
144 // Get the mail body name for this encoding.
145 public override String BodyName
147 get { return "big5"; }
150 // Get the human-readable name for this encoding.
151 public override String EncodingName
153 get { return "Chinese Traditional (Big5)"; }
156 // Get the mail agent header name for this encoding.
157 public override String HeaderName
159 get { return "big5"; }
162 // Get the IANA-preferred Web name for this encoding.
163 public override String WebName
165 get { return "big5"; }
169 // Get the Windows code page represented by this object.
170 public override int WindowsCodePage
172 get { return BIG5_PAGE; }
176 // Decoder that handles a rolling Big5 state.
177 private sealed class CP950Decoder
: DbcsDecoder
180 public CP950Decoder(DbcsConvert convert
) : base(convert
) {}
181 int last_byte_count
, last_byte_conv
;
183 public override int GetCharCount (byte[] bytes
, int index
, int count
)
185 return GetCharCount (bytes
, index
, count
, false);
191 int GetCharCount (byte[] bytes
, int index
, int count
, bool refresh
)
193 CheckRange (bytes
, index
, count
);
195 int lastByte
= last_byte_count
;
198 while (count
-- > 0) {
199 int b
= bytes
[index
++];
201 if (b
<= 0x80 || b
== 0xFF) { // ASCII
203 } else if (b
< 0xA1 || b
>= 0xFA) {
204 // incorrect first byte.
206 count
--; // cut one more byte.
212 int ord
= ((lastByte
- 0xA1) * 191 + b
- 0x40) * 2;
213 char c1
= ord
< 0 || ord
> convert
.n2u
.Length
?
215 (char)(convert
.n2u
[ord
] + convert
.n2u
[ord
+ 1] * 256);
229 last_byte_count
= lastByte
;
234 public override int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
235 char[] chars
, int charIndex
)
237 return GetChars (bytes
, byteIndex
, byteCount
, chars
, charIndex
, false);
243 int GetChars(byte[] bytes
, int byteIndex
, int byteCount
,
244 char[] chars
, int charIndex
, bool refresh
)
246 CheckRange (bytes
, byteIndex
, byteCount
, chars
, charIndex
);
248 int origIndex
= charIndex
;
249 int lastByte
= last_byte_conv
;
251 while (byteCount
-- > 0) {
252 int b
= bytes
[byteIndex
++];
254 if (b
<= 0x80 || b
== 0xFF) { // ASCII
255 chars
[charIndex
++] = (char)b
;
256 } else if (b
< 0xA1 || b
>= 0xFA) {
257 // incorrect first byte.
258 chars
[charIndex
++] = '?';
259 byteCount
--; // cut one more byte.
265 int ord
= ((lastByte
- 0xA1) * 191 + b
- 0x40) * 2;
266 char c1
= ord
< 0 || ord
> convert
.n2u
.Length
?
268 (char)(convert
.n2u
[ord
] + convert
.n2u
[ord
+ 1] * 256);
270 chars
[charIndex
++] = '?';
272 chars
[charIndex
++] = c1
;
278 chars
[charIndex
++] = '?';
280 last_byte_conv
= lastByte
;
282 return charIndex
- origIndex
;
288 internal class ENCbig5
: CP950