5 // Atsushi Enomoto <atsushi@ximian.com>
8 using System
.Reflection
;
15 internal class ENCgb18030
: GB18030Encoding
17 public ENCgb18030 (): base () {}
21 public class CP54936
: GB18030Encoding { }
24 public class GB18030Encoding
: MonoEncoding
27 public GB18030Encoding ()
32 public override string EncodingName
{
33 get { return "Chinese Simplified (GB18030)"; }
36 public override string HeaderName
{
37 get { return "GB18030"; }
40 public override string BodyName
{
41 get { return "GB18030"; }
44 public override string WebName
{
45 get { return "GB18030"; }
48 public override bool IsMailNewsDisplay
{
52 public override bool IsMailNewsSave
{
56 public override bool IsBrowserDisplay
{
60 public override bool IsBrowserSave
{
64 public override int GetMaxByteCount (int len
)
66 // non-GB2312 characters in \u0080 - \uFFFF
70 public override int GetMaxCharCount (int len
)
75 public override int GetByteCount (char [] chars
, int index
, int length
)
77 return new GB18030Encoder (this).GetByteCount (chars
, index
, length
, true);
80 public unsafe override int GetByteCountImpl (char* chars
, int count
)
82 return new GB18030Encoder (this).GetByteCountImpl (chars
, count
, true);
85 public unsafe override int GetBytesImpl (char* chars
, int charCount
, byte* bytes
, int byteCount
)
87 return new GB18030Encoder (this).GetBytesImpl (chars
, charCount
, bytes
, byteCount
, true);
90 public override int GetCharCount (byte [] bytes
, int start
, int len
)
92 return new GB18030Decoder ().GetCharCount (bytes
, start
, len
);
95 public override int GetChars (byte [] bytes
, int byteIdx
, int srclen
, char [] chars
, int charIdx
)
97 return new GB18030Decoder ().GetChars (bytes
, byteIdx
, srclen
, chars
, charIdx
);
100 public override Encoder
GetEncoder ()
102 return new GB18030Encoder (this);
105 public override Decoder
GetDecoder ()
107 return new GB18030Decoder ();
111 class GB18030Decoder
: DbcsEncoding
.DbcsDecoder
113 static DbcsConvert gb2312
= DbcsConvert
.Gb2312
;
114 // for now incomplete block is not supported - should we?
115 // int incomplete1 = -1, incomplete2 = -1, incomplete3 = -1;
117 public GB18030Decoder ()
122 public override int GetCharCount (byte [] bytes
, int start
, int len
)
124 CheckRange (bytes
, start
, len
);
126 int end
= start
+ len
;
128 while (start
< end
) {
129 if (bytes
[start
] < 0x80) {
134 else if (bytes
[start
] == 0x80) {
135 // Euro sign - actually it is obsolete,
136 // now it's just reserved but not used
141 else if (bytes
[start
] == 0xFF) {
142 // invalid data - fill '?'
147 else if (start
+ 1 >= end
) {
148 // incomplete1 = bytes [start];
152 break; // incomplete tail.
155 byte second
= bytes
[start
+ 1];
156 if (second
== 0x7F || second
== 0xFF) {
162 else if (0x30 <= second
&& second
<= 0x39) {
164 if (start
+ 3 >= end
) {
166 // incomplete1 = bytes [start];
167 // incomplete2 = bytes [start + 1];
168 // if (start + 3 == end)
169 // incomplete3 = bytes [start + 2];
170 ret
+= start
+ 3 == end
? 3 : 2;
173 long value = GB18030Source
.FromGBX (bytes
, start
);
177 start
-= (int) value;
178 } else if (value >= 0x10000) {
196 public override int GetChars (byte [] bytes
, int byteIndex
, int byteCount
, char [] chars
, int charIndex
)
198 CheckRange (bytes
, byteIndex
, byteCount
, chars
, charIndex
);
200 int byteEnd
= byteIndex
+ byteCount
;
201 int charStart
= charIndex
;
203 while (byteIndex
< byteEnd
) {
204 if (bytes
[byteIndex
] < 0x80) {
205 chars
[charIndex
++] = (char) bytes
[byteIndex
++];
208 else if (bytes
[byteIndex
] == 0x80) {
209 // Euro sign - actually it is obsolete,
210 // now it's just reserved but not used
211 chars
[charIndex
++] = '\u20AC';
215 else if (bytes
[byteIndex
] == 0xFF) {
216 // invalid data - fill '?'
217 chars
[charIndex
++] = '?';
221 else if (byteIndex
+ 1 >= byteEnd
) {
222 //incomplete1 = bytes [byteIndex++];
225 break; // incomplete tail.
228 byte second
= bytes
[byteIndex
+ 1];
229 if (second
== 0x7F || second
== 0xFF) {
231 chars
[charIndex
++] = '?';
234 else if (0x30 <= second
&& second
<= 0x39) {
236 if (byteIndex
+ 3 >= byteEnd
) {
238 //incomplete1 = bytes [byteIndex];
239 //incomplete2 = bytes [byteIndex + 1];
240 //if (byteIndex + 3 == byteEnd)
241 // incomplete3 = bytes [byteIndex + 2];
244 long value = GB18030Source
.FromGBX (bytes
, byteIndex
);
247 chars
[charIndex
++] = '?';
248 byteIndex
-= (int) value;
249 } else if (value >= 0x10000) {
252 chars
[charIndex
++] = (char) (value / 0x400 + 0xD800);
253 chars
[charIndex
++] = (char) (value % 0x400 + 0xDC00);
257 chars
[charIndex
++] = (char) value;
261 byte first
= bytes
[byteIndex
];
262 int ord
= ((first
- 0x81) * 191 + second
- 0x40) * 2;
263 char c1
= ord
< 0 || ord
>= gb2312
.n2u
.Length
?
264 '\0' : (char) (gb2312
.n2u
[ord
] + gb2312
.n2u
[ord
+ 1] * 256);
266 chars
[charIndex
++] = '?';
268 chars
[charIndex
++] = c1
;
273 return charIndex
- charStart
;
277 class GB18030Encoder
: MonoEncoder
279 static DbcsConvert gb2312
= DbcsConvert
.Gb2312
;
281 public GB18030Encoder (MonoEncoding owner
)
286 char incomplete_byte_count
;
287 char incomplete_bytes
;
289 public unsafe override int GetByteCountImpl (char* chars
, int count
, bool refresh
)
294 while (start
< end
) {
295 char ch
= chars
[start
];
301 } else if (Char
.IsSurrogate (ch
)) {
303 if (start
+ 1 == end
) {
304 incomplete_byte_count
= ch
;
313 if (ch
< 0x80 || ch
== 0xFF) {
320 byte b1
= gb2312
.u2n
[((int) ch
) * 2 + 1];
321 byte b2
= gb2312
.u2n
[((int) ch
) * 2];
322 if (b1
!= 0 && b2
!= 0) {
330 long value = GB18030Source
.FromUCS (ch
);
339 if (incomplete_byte_count
!= char.MinValue
)
341 incomplete_byte_count
= char.MinValue
;
346 public unsafe override int GetBytesImpl (char* chars
, int charCount
, byte* bytes
, int byteCount
, bool refresh
)
351 int charEnd
= charIndex
+ charCount
;
352 int byteStart
= byteIndex
;
353 char ch
= incomplete_bytes
;
355 while (charIndex
< charEnd
) {
356 if (incomplete_bytes
== char.MinValue
)
357 ch
= chars
[charIndex
++];
359 incomplete_bytes
= char.MinValue
;
363 bytes
[byteIndex
++] = (byte) ch
;
365 } else if (Char
.IsSurrogate (ch
)) {
367 if (charIndex
== charEnd
) {
368 incomplete_bytes
= ch
;
371 char ch2
= chars
[charIndex
++];
372 if (!Char
.IsSurrogate (ch2
)) {
376 chars
, ref charIndex
, ref charCount
,
377 bytes
, ref byteIndex
, ref byteCount
);
379 bytes
[byteIndex
++] = (byte) '?';
383 int cp
= (ch
- 0xD800) * 0x400 + ch2
- 0xDC00;
384 GB18030Source
.Unlinear (bytes
+ byteIndex
, GB18030Source
.FromUCSSurrogate (cp
));
390 if (ch
<= 0x80 || ch
== 0xFF) {
391 // Character maps to itself
392 bytes
[byteIndex
++] = (byte) ch
;
396 byte b1
= gb2312
.u2n
[((int) ch
) * 2 + 1];
397 byte b2
= gb2312
.u2n
[((int) ch
) * 2];
398 if (b1
!= 0 && b2
!= 0) {
399 bytes
[byteIndex
++] = b1
;
400 bytes
[byteIndex
++] = b2
;
404 long value = GB18030Source
.FromUCS (ch
);
406 bytes
[byteIndex
++] = 0x3F; // invalid(?)
409 GB18030Source
.Unlinear (bytes
+ byteIndex
, value);
415 if (incomplete_bytes
!= char.MinValue
)
416 bytes
[byteIndex
++] = 0x3F; // incomplete
417 incomplete_bytes
= char.MinValue
;
420 return byteIndex
- byteStart
;