2010-04-15 Jb Evain <jbevain@novell.com>
[mcs.git] / class / I18N / CJK / CP936.cs
blob7b83448939c0ad9f93fb1f77339d243c53be0d81
1 //
2 // I18N.CJK.CP936.cs
3 //
4 // Author:
5 // Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // (new implementation based on CP950.)
8 //
10 using System;
11 using System.Text;
12 using I18N.Common;
14 namespace I18N.CJK
16 [Serializable]
17 internal class CP936 : DbcsEncoding
19 // Magic number used by Windows for the Gb2312 code page.
20 private const int GB2312_CODE_PAGE = 936;
22 // Constructor.
23 public CP936() : base(GB2312_CODE_PAGE) {
26 internal override DbcsConvert GetConvert ()
28 return DbcsConvert.Gb2312;
31 // Get the bytes that result from encoding a character buffer.
32 public unsafe override int GetByteCountImpl (
33 char* chars, int count)
35 DbcsConvert gb2312 = GetConvert ();
36 int index = 0;
37 int length = 0;
38 while (count-- > 0) {
39 char c = chars[index++];
40 if (c <= 0x80 || c == 0xFF) { // ASCII
41 length++;
42 continue;
44 byte b1 = gb2312.u2n[((int)c) * 2 + 1];
45 byte b2 = gb2312.u2n[((int)c) * 2];
46 if (b1 == 0 && b2 == 0) {
47 #if NET_2_0
48 // FIXME: handle fallback for GetByteCount().
49 #else
50 length++;
51 #endif
53 else
54 length += 2;
56 return length;
59 // Get the bytes that result from encoding a character buffer.
60 public unsafe override int GetBytesImpl (char* chars, int charCount,
61 byte* bytes, int byteCount)
63 DbcsConvert gb2312 = GetConvert ();
64 int charIndex = 0;
65 int byteIndex = 0;
66 #if NET_2_0
67 EncoderFallbackBuffer buffer = null;
68 #endif
70 int origIndex = byteIndex;
71 while (charCount-- > 0) {
72 char c = chars[charIndex++];
73 if (c <= 0x80 || c == 0xFF) { // ASCII
74 bytes[byteIndex++] = (byte)c;
75 continue;
77 byte b1 = gb2312.u2n[((int)c) * 2 + 1];
78 byte b2 = gb2312.u2n[((int)c) * 2];
79 if (b1 == 0 && b2 == 0) {
80 #if NET_2_0
81 HandleFallback (ref buffer, chars,
82 ref charIndex, ref charCount,
83 bytes, ref byteIndex, ref byteCount);
84 #else
85 bytes[byteIndex++] = (byte)'?';
86 #endif
87 } else {
88 bytes[byteIndex++] = b1;
89 bytes[byteIndex++] = b2;
92 return byteIndex - origIndex;
95 // Get the characters that result from decoding a byte buffer.
96 public override int GetCharCount (byte [] bytes, int index, int count)
98 return GetDecoder ().GetCharCount (bytes, index, count);
101 // Get the characters that result from decoding a byte buffer.
102 public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
103 char[] chars, int charIndex)
105 return GetDecoder ().GetChars (
106 bytes, byteIndex, byteCount, chars, charIndex);
109 // Get a decoder that handles a rolling Gb2312 state.
110 public override Decoder GetDecoder()
112 return new CP936Decoder(GetConvert ());
115 // Get the mail body name for this encoding.
116 public override String BodyName
118 get { return("gb2312"); }
121 // Get the human-readable name for this encoding.
122 public override String EncodingName
124 get { return("Chinese Simplified (GB2312)"); }
127 // Get the mail agent header name for this encoding.
128 public override String HeaderName
130 get { return("gb2312"); }
133 // Determine if this encoding can be displayed in a Web browser.
134 public override bool IsBrowserDisplay
136 get { return(true); }
139 // Determine if this encoding can be saved from a Web browser.
140 public override bool IsBrowserSave
142 get { return(true); }
145 // Determine if this encoding can be displayed in a mail/news agent.
146 public override bool IsMailNewsDisplay
148 get { return(true); }
151 // Determine if this encoding can be saved from a mail/news agent.
152 public override bool IsMailNewsSave
154 get { return(true); }
157 // Get the IANA-preferred Web name for this encoding.
158 public override String WebName
160 get { return("gb2312"); }
164 // Decoder that handles a rolling Gb2312 state.
165 sealed class CP936Decoder : DbcsEncoding.DbcsDecoder
167 // Constructor.
168 public CP936Decoder (DbcsConvert convert)
169 : base (convert)
173 int last_byte_count, last_byte_bytes;
175 // Get the characters that result from decoding a byte buffer.
176 public override int GetCharCount (byte [] bytes, int index, int count)
178 return GetCharCount (bytes, index, count, false);
181 #if NET_2_0
182 public override
183 #endif
184 int GetCharCount (byte [] bytes, int index, int count, bool refresh)
186 CheckRange (bytes, index, count);
188 int lastByte = last_byte_count;
189 last_byte_count = 0;
190 int length = 0;
191 while (count-- > 0) {
192 int b = bytes [index++];
193 if (lastByte == 0) {
194 if (b <= 0x80 || b == 0xFF) { // ASCII
195 length++;
196 continue;
197 } else {
198 lastByte = b;
199 continue;
202 length++;
203 lastByte = 0;
206 if (lastByte != 0) {
207 if (refresh) {
208 length++;
209 last_byte_count = 0;
211 else
212 last_byte_count = lastByte;
215 return length;
218 public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
219 char[] chars, int charIndex)
221 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
224 #if NET_2_0
225 public override
226 #endif
227 int GetChars (byte [] bytes, int byteIndex, int byteCount,
228 char [] chars, int charIndex, bool refresh)
230 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
232 int origIndex = charIndex;
233 int lastByte = last_byte_bytes;
234 last_byte_bytes = 0;
235 while (byteCount-- > 0) {
236 int b = bytes[byteIndex++];
237 if (lastByte == 0) {
238 if (b <= 0x80 || b == 0xFF) { // ASCII
239 chars[charIndex++] = (char)b;
240 continue;
241 } else if (b < 0x81 || b >= 0xFF) {
242 continue;
243 } else {
244 lastByte = b;
245 continue;
248 int ord = ((lastByte - 0x81) * 191 + b - 0x40) * 2;
249 char c1 = ord < 0 || ord >= convert.n2u.Length ?
250 '\0' : (char) (convert.n2u[ord] + convert.n2u[ord + 1] * 256);
251 if (c1 == 0)
252 chars[charIndex++] = '?';
253 else
254 chars[charIndex++] = c1;
255 lastByte = 0;
258 if (lastByte != 0) {
259 if (refresh) {
260 // FIXME: handle fallback
261 chars [charIndex++] = '?';
262 last_byte_bytes = 0;
264 else
265 last_byte_bytes = lastByte;
268 return charIndex - origIndex;
272 [Serializable]
273 internal class ENCgb2312 : CP936
275 public ENCgb2312(): base () {}