2 * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
7 #define USING_IMPORTED_MAPS
8 #define USING_BINARY_PAIR_SEARCH
9 #define EXTERN_JISX0213_PAIR
10 #define EMULATE_JISX0213_2000_ENCODE_INVALID MAP_UNMAPPABLE
11 #define EMULATE_JISX0213_2000_DECODE_INVALID MAP_UNMAPPABLE
13 #include "cjkcodecs.h"
14 #include "alg_jisx0201.h"
15 #include "emu_jisx0213_2000.h"
16 #include "mappings_jisx0213_pair.h"
24 |+-----+---- G0-3 Character Set
25 +----------- Is G0-3 double byte?
40 #define MAX_ESCSEQLEN 16
42 #define CHARSET_ISO8859_1 'A'
43 #define CHARSET_ASCII 'B'
44 #define CHARSET_ISO8859_7 'F'
45 #define CHARSET_JISX0201_K 'I'
46 #define CHARSET_JISX0201_R 'J'
48 #define CHARSET_GB2312 ('A'|CHARSET_DBCS)
49 #define CHARSET_JISX0208 ('B'|CHARSET_DBCS)
50 #define CHARSET_KSX1001 ('C'|CHARSET_DBCS)
51 #define CHARSET_JISX0212 ('D'|CHARSET_DBCS)
52 #define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS)
53 #define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS)
54 #define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS)
55 #define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
56 #define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS)
57 #define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
58 #define CHARSET_JISX0208_O ('@'|CHARSET_DBCS)
60 #define CHARSET_DBCS 0x80
61 #define ESCMARK(mark) ((mark) & 0x7f)
63 #define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
64 #define IS_ISO2022ESC(c2) \
65 ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
66 (c2) == '.' || (c2) == '&')
67 /* this is not a complete list of ISO-2022 escape sequence headers.
68 * but, it's enough to implement CJK instances of iso-2022. */
70 #define MAP_UNMAPPABLE 0xFFFF
71 #define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */
73 #define F_SHIFTED 0x01
74 #define F_ESCTHROUGHOUT 0x02
76 #define STATE_SETG(dn, v) ((state)->c[dn]) = (v);
77 #define STATE_GETG(dn) ((state)->c[dn])
79 #define STATE_G0 STATE_GETG(0)
80 #define STATE_G1 STATE_GETG(1)
81 #define STATE_G2 STATE_GETG(2)
82 #define STATE_G3 STATE_GETG(3)
83 #define STATE_SETG0(v) STATE_SETG(0, v)
84 #define STATE_SETG1(v) STATE_SETG(1, v)
85 #define STATE_SETG2(v) STATE_SETG(2, v)
86 #define STATE_SETG3(v) STATE_SETG(3, v)
88 #define STATE_SETFLAG(f) ((state)->c[4]) |= (f);
89 #define STATE_GETFLAG(f) ((state)->c[4] & (f))
90 #define STATE_CLEARFLAG(f) ((state)->c[4]) &= ~(f);
91 #define STATE_CLEARFLAGS() ((state)->c[4]) = 0;
93 #define ISO2022_CONFIG ((const struct iso2022_config *)config)
94 #define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))
95 #define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations)
97 /* iso2022_config.flags */
100 #define USE_JISX0208_EXT 0x04
102 /*-*- internal data structures -*-*/
104 typedef int (*iso2022_init_func
)(void);
105 typedef ucs4_t (*iso2022_decode_func
)(const unsigned char *data
);
106 typedef DBCHAR (*iso2022_encode_func
)(const ucs4_t
*data
, Py_ssize_t
*length
);
108 struct iso2022_designation
{
112 iso2022_init_func initializer
;
113 iso2022_decode_func decoder
;
114 iso2022_encode_func encoder
;
117 struct iso2022_config
{
119 const struct iso2022_designation
*designations
; /* non-ascii desigs */
122 /*-*- iso-2022 codec implementation -*-*/
126 const struct iso2022_designation
*desig
= CONFIG_DESIGNATIONS
;
127 for (desig
= CONFIG_DESIGNATIONS
; desig
->mark
; desig
++)
128 if (desig
->initializer
!= NULL
&& desig
->initializer() != 0)
133 ENCODER_INIT(iso2022
)
136 STATE_SETG0(CHARSET_ASCII
)
137 STATE_SETG1(CHARSET_ASCII
)
141 ENCODER_RESET(iso2022
)
143 if (STATE_GETFLAG(F_SHIFTED
)) {
146 STATE_CLEARFLAG(F_SHIFTED
)
148 if (STATE_G0
!= CHARSET_ASCII
) {
149 WRITE3(ESC
, '(', 'B')
151 STATE_SETG0(CHARSET_ASCII
)
159 const struct iso2022_designation
*dsg
;
165 if (STATE_G0
!= CHARSET_ASCII
) {
166 WRITE3(ESC
, '(', 'B')
167 STATE_SETG0(CHARSET_ASCII
)
170 if (STATE_GETFLAG(F_SHIFTED
)) {
172 STATE_CLEARFLAG(F_SHIFTED
)
175 WRITE1((unsigned char)c
)
181 insize
= GET_INSIZE(c
);
183 encoded
= MAP_UNMAPPABLE
;
184 for (dsg
= CONFIG_DESIGNATIONS
; dsg
->mark
; dsg
++) {
185 Py_ssize_t length
= 1;
186 encoded
= dsg
->encoder(&c
, &length
);
187 if (encoded
== MAP_MULTIPLE_AVAIL
) {
188 /* this implementation won't work for pair
189 * of non-bmp characters. */
191 if (!(flags
& MBENC_FLUSH
))
197 #if Py_UNICODE_SIZE == 2
200 u4in
[0] = (ucs4_t
)IN1
;
201 u4in
[1] = (ucs4_t
)IN2
;
202 encoded
= dsg
->encoder(u4in
, &length
);
204 encoded
= dsg
->encoder(&c
, &length
);
206 encoded
= dsg
->encoder(*inbuf
, &length
);
208 if (encoded
!= MAP_UNMAPPABLE
) {
213 else if (encoded
!= MAP_UNMAPPABLE
)
219 assert(dsg
->width
== 1 || dsg
->width
== 2);
221 switch (dsg
->plane
) {
223 if (STATE_GETFLAG(F_SHIFTED
)) {
225 STATE_CLEARFLAG(F_SHIFTED
)
228 if (STATE_G0
!= dsg
->mark
) {
229 if (dsg
->width
== 1) {
230 WRITE3(ESC
, '(', ESCMARK(dsg
->mark
))
231 STATE_SETG0(dsg
->mark
)
234 else if (dsg
->mark
== CHARSET_JISX0208
) {
235 WRITE3(ESC
, '$', ESCMARK(dsg
->mark
))
236 STATE_SETG0(dsg
->mark
)
240 WRITE4(ESC
, '$', '(',
242 STATE_SETG0(dsg
->mark
)
248 if (STATE_G1
!= dsg
->mark
) {
249 if (dsg
->width
== 1) {
250 WRITE3(ESC
, ')', ESCMARK(dsg
->mark
))
251 STATE_SETG1(dsg
->mark
)
255 WRITE4(ESC
, '$', ')',
257 STATE_SETG1(dsg
->mark
)
261 if (!STATE_GETFLAG(F_SHIFTED
)) {
263 STATE_SETFLAG(F_SHIFTED
)
267 default: /* G2 and G3 is not supported: no encoding in
268 * CJKCodecs are using them yet */
269 return MBERR_INTERNAL
;
272 if (dsg
->width
== 1) {
273 WRITE1((unsigned char)encoded
)
277 WRITE2(encoded
>> 8, encoded
& 0xff)
286 DECODER_INIT(iso2022
)
289 STATE_SETG0(CHARSET_ASCII
)
290 STATE_SETG1(CHARSET_ASCII
)
291 STATE_SETG2(CHARSET_ASCII
)
295 DECODER_RESET(iso2022
)
297 STATE_SETG0(CHARSET_ASCII
)
298 STATE_CLEARFLAG(F_SHIFTED
)
303 iso2022processesc(const void *config
, MultibyteCodec_State
*state
,
304 const unsigned char **inbuf
, Py_ssize_t
*inleft
)
306 unsigned char charset
, designation
;
307 Py_ssize_t i
, esclen
;
309 for (i
= 1;i
< MAX_ESCSEQLEN
;i
++) {
312 if (IS_ESCEND((*inbuf
)[i
])) {
316 else if (CONFIG_ISSET(USE_JISX0208_EXT
) && i
+1 < *inleft
&&
317 (*inbuf
)[i
] == '&' && (*inbuf
)[i
+1] == '@')
321 if (i
>= MAX_ESCSEQLEN
)
322 return 1; /* unterminated escape sequence */
327 charset
= IN3
| CHARSET_DBCS
;
332 if (IN2
== '(') designation
= 0;
333 else if (IN2
== ')') designation
= 1;
334 else if (CONFIG_ISSET(USE_G2
) && IN2
== '.')
343 charset
= IN4
| CHARSET_DBCS
;
344 if (IN3
== '(') designation
= 0;
345 else if (IN3
== ')') designation
= 1;
348 case 6: /* designation with prefix */
349 if (CONFIG_ISSET(USE_JISX0208_EXT
) &&
350 (*inbuf
)[3] == ESC
&& (*inbuf
)[4] == '$' &&
351 (*inbuf
)[5] == 'B') {
352 charset
= 'B' | CHARSET_DBCS
;
362 /* raise error when the charset is not designated for this encoding */
363 if (charset
!= CHARSET_ASCII
) {
364 const struct iso2022_designation
*dsg
;
366 for (dsg
= CONFIG_DESIGNATIONS
; dsg
->mark
; dsg
++)
367 if (dsg
->mark
== charset
)
373 STATE_SETG(designation
, charset
)
379 #define ISO8859_7_DECODE(c, assi) \
380 if ((c) < 0xa0) (assi) = (c); \
381 else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
383 else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
384 (0xbffffd77L & (1L << ((c)-0xb4))))) \
385 (assi) = 0x02d0 + (c); \
386 else if ((c) == 0xa1) (assi) = 0x2018; \
387 else if ((c) == 0xa2) (assi) = 0x2019; \
388 else if ((c) == 0xaf) (assi) = 0x2015;
391 iso2022processg2(const void *config
, MultibyteCodec_State
*state
,
392 const unsigned char **inbuf
, Py_ssize_t
*inleft
,
393 Py_UNICODE
**outbuf
, Py_ssize_t
*outleft
)
395 /* not written to use encoder, decoder functions because only few
396 * encodings use G2 designations in CJKCodecs */
397 if (STATE_G2
== CHARSET_ISO8859_1
) {
403 else if (STATE_G2
== CHARSET_ISO8859_7
) {
404 ISO8859_7_DECODE(IN3
^ 0x80, **outbuf
)
407 else if (STATE_G2
== CHARSET_ASCII
) {
408 if (IN3
& 0x80) return 3;
412 return MBERR_INTERNAL
;
423 const struct iso2022_designation
*dsgcache
= NULL
;
426 unsigned char c
= IN1
;
429 if (STATE_GETFLAG(F_ESCTHROUGHOUT
)) {
430 /* ESC throughout mode:
431 * for non-iso2022 escape sequences */
432 WRITE1(c
) /* assume as ISO-8859-1 */
435 STATE_CLEARFLAG(F_ESCTHROUGHOUT
)
443 if (IS_ISO2022ESC(IN2
)) {
444 err
= iso2022processesc(config
, state
,
449 else if (CONFIG_ISSET(USE_G2
) && IN2
== 'N') {/* SS2 */
451 err
= iso2022processg2(config
, state
,
452 inbuf
, &inleft
, outbuf
, &outleft
);
458 STATE_SETFLAG(F_ESCTHROUGHOUT
)
463 if (CONFIG_ISSET(NO_SHIFT
))
465 STATE_CLEARFLAG(F_SHIFTED
)
469 if (CONFIG_ISSET(NO_SHIFT
))
471 STATE_SETFLAG(F_SHIFTED
)
475 STATE_CLEARFLAG(F_SHIFTED
)
480 if (c
< 0x20) /* C0 */
485 const struct iso2022_designation
*dsg
;
486 unsigned char charset
;
489 if (STATE_GETFLAG(F_SHIFTED
))
494 if (charset
== CHARSET_ASCII
) {
500 if (dsgcache
!= NULL
&&
501 dsgcache
->mark
== charset
)
504 for (dsg
= CONFIG_DESIGNATIONS
;
511 assert(dsg
->mark
!= '\0');
515 REQUIRE_INBUF(dsg
->width
)
516 decoded
= dsg
->decoder(*inbuf
);
517 if (decoded
== MAP_UNMAPPABLE
)
520 if (decoded
< 0x10000) {
524 else if (decoded
< 0x30000) {
527 else { /* JIS X 0213 pairs */
528 WRITE2(decoded
>> 16, decoded
& 0xffff)
539 /*-*- mapping table holders -*-*/
541 #define ENCMAP(enc) static const encode_map *enc##_encmap = NULL;
542 #define DECMAP(enc) static const decode_map *enc##_decmap = NULL;
553 DECMAP(jisx0213_1_bmp
)
554 DECMAP(jisx0213_2_bmp
)
556 DECMAP(jisx0213_1_emp
)
557 DECMAP(jisx0213_2_emp
)
565 /*-*- mapping access functions -*-*/
570 static int initialized
= 0;
572 if (!initialized
&& (
573 IMPORT_MAP(kr
, cp949
, &cp949_encmap
, NULL
) ||
574 IMPORT_MAP(kr
, ksx1001
, NULL
, &ksx1001_decmap
)))
581 ksx1001_decoder(const unsigned char *data
)
584 TRYMAP_DEC(ksx1001
, u
, data
[0], data
[1])
587 return MAP_UNMAPPABLE
;
591 ksx1001_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
594 assert(*length
== 1);
595 if (*data
< 0x10000) {
596 TRYMAP_ENC(cp949
, coded
, *data
)
597 if (!(coded
& 0x8000))
600 return MAP_UNMAPPABLE
;
606 static int initialized
= 0;
608 if (!initialized
&& (
609 IMPORT_MAP(jp
, jisxcommon
, &jisxcommon_encmap
, NULL
) ||
610 IMPORT_MAP(jp
, jisx0208
, NULL
, &jisx0208_decmap
)))
617 jisx0208_decoder(const unsigned char *data
)
620 if (data
[0] == 0x21 && data
[1] == 0x40) /* F/W REVERSE SOLIDUS */
622 else TRYMAP_DEC(jisx0208
, u
, data
[0], data
[1])
625 return MAP_UNMAPPABLE
;
629 jisx0208_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
632 assert(*length
== 1);
633 if (*data
< 0x10000) {
634 if (*data
== 0xff3c) /* F/W REVERSE SOLIDUS */
636 else TRYMAP_ENC(jisxcommon
, coded
, *data
) {
637 if (!(coded
& 0x8000))
641 return MAP_UNMAPPABLE
;
647 static int initialized
= 0;
649 if (!initialized
&& (
650 IMPORT_MAP(jp
, jisxcommon
, &jisxcommon_encmap
, NULL
) ||
651 IMPORT_MAP(jp
, jisx0212
, NULL
, &jisx0212_decmap
)))
658 jisx0212_decoder(const unsigned char *data
)
661 TRYMAP_DEC(jisx0212
, u
, data
[0], data
[1])
664 return MAP_UNMAPPABLE
;
668 jisx0212_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
671 assert(*length
== 1);
672 if (*data
< 0x10000) {
673 TRYMAP_ENC(jisxcommon
, coded
, *data
) {
675 return coded
& 0x7fff;
678 return MAP_UNMAPPABLE
;
684 static int initialized
= 0;
686 if (!initialized
&& (
688 IMPORT_MAP(jp
, jisx0213_bmp
,
689 &jisx0213_bmp_encmap
, NULL
) ||
690 IMPORT_MAP(jp
, jisx0213_1_bmp
,
691 NULL
, &jisx0213_1_bmp_decmap
) ||
692 IMPORT_MAP(jp
, jisx0213_2_bmp
,
693 NULL
, &jisx0213_2_bmp_decmap
) ||
694 IMPORT_MAP(jp
, jisx0213_emp
,
695 &jisx0213_emp_encmap
, NULL
) ||
696 IMPORT_MAP(jp
, jisx0213_1_emp
,
697 NULL
, &jisx0213_1_emp_decmap
) ||
698 IMPORT_MAP(jp
, jisx0213_2_emp
,
699 NULL
, &jisx0213_2_emp_decmap
) ||
700 IMPORT_MAP(jp
, jisx0213_pair
, &jisx0213_pair_encmap
,
701 &jisx0213_pair_decmap
)))
707 #define config ((void *)2000)
709 jisx0213_2000_1_decoder(const unsigned char *data
)
712 EMULATE_JISX0213_2000_DECODE_PLANE1(u
, data
[0], data
[1])
713 else if (data
[0] == 0x21 && data
[1] == 0x40) /* F/W REVERSE SOLIDUS */
715 else TRYMAP_DEC(jisx0208
, u
, data
[0], data
[1]);
716 else TRYMAP_DEC(jisx0213_1_bmp
, u
, data
[0], data
[1]);
717 else TRYMAP_DEC(jisx0213_1_emp
, u
, data
[0], data
[1])
719 else TRYMAP_DEC(jisx0213_pair
, u
, data
[0], data
[1]);
721 return MAP_UNMAPPABLE
;
726 jisx0213_2000_2_decoder(const unsigned char *data
)
729 EMULATE_JISX0213_2000_DECODE_PLANE2(u
, data
[0], data
[1])
730 TRYMAP_DEC(jisx0213_2_bmp
, u
, data
[0], data
[1]);
731 else TRYMAP_DEC(jisx0213_2_emp
, u
, data
[0], data
[1])
734 return MAP_UNMAPPABLE
;
740 jisx0213_2004_1_decoder(const unsigned char *data
)
743 if (data
[0] == 0x21 && data
[1] == 0x40) /* F/W REVERSE SOLIDUS */
745 else TRYMAP_DEC(jisx0208
, u
, data
[0], data
[1]);
746 else TRYMAP_DEC(jisx0213_1_bmp
, u
, data
[0], data
[1]);
747 else TRYMAP_DEC(jisx0213_1_emp
, u
, data
[0], data
[1])
749 else TRYMAP_DEC(jisx0213_pair
, u
, data
[0], data
[1]);
751 return MAP_UNMAPPABLE
;
756 jisx0213_2004_2_decoder(const unsigned char *data
)
759 TRYMAP_DEC(jisx0213_2_bmp
, u
, data
[0], data
[1]);
760 else TRYMAP_DEC(jisx0213_2_emp
, u
, data
[0], data
[1])
763 return MAP_UNMAPPABLE
;
768 jisx0213_encoder(const ucs4_t
*data
, Py_ssize_t
*length
, void *config
)
773 case 1: /* first character */
774 if (*data
>= 0x10000) {
775 if ((*data
) >> 16 == 0x20000 >> 16) {
776 EMULATE_JISX0213_2000_ENCODE_EMP(coded
, *data
)
777 else TRYMAP_ENC(jisx0213_emp
, coded
,
781 return MAP_UNMAPPABLE
;
784 EMULATE_JISX0213_2000_ENCODE_BMP(coded
, *data
)
785 else TRYMAP_ENC(jisx0213_bmp
, coded
, *data
) {
787 return MAP_MULTIPLE_AVAIL
;
789 else TRYMAP_ENC(jisxcommon
, coded
, *data
) {
791 return MAP_UNMAPPABLE
;
794 return MAP_UNMAPPABLE
;
796 case 2: /* second character of unicode pair */
797 coded
= find_pairencmap((ucs2_t
)data
[0], (ucs2_t
)data
[1],
798 jisx0213_pair_encmap
, JISX0213_ENCPAIRS
);
799 if (coded
== DBCINV
) {
801 coded
= find_pairencmap((ucs2_t
)data
[0], 0,
802 jisx0213_pair_encmap
, JISX0213_ENCPAIRS
);
804 return MAP_UNMAPPABLE
;
808 case -1: /* flush unterminated */
810 coded
= find_pairencmap((ucs2_t
)data
[0], 0,
811 jisx0213_pair_encmap
, JISX0213_ENCPAIRS
);
813 return MAP_UNMAPPABLE
;
817 return MAP_UNMAPPABLE
;
822 jisx0213_2000_1_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
824 DBCHAR coded
= jisx0213_encoder(data
, length
, (void *)2000);
825 if (coded
== MAP_UNMAPPABLE
|| coded
== MAP_MULTIPLE_AVAIL
)
827 else if (coded
& 0x8000)
828 return MAP_UNMAPPABLE
;
834 jisx0213_2000_1_encoder_paironly(const ucs4_t
*data
, Py_ssize_t
*length
)
837 Py_ssize_t ilength
= *length
;
839 coded
= jisx0213_encoder(data
, length
, (void *)2000);
842 if (coded
== MAP_MULTIPLE_AVAIL
)
843 return MAP_MULTIPLE_AVAIL
;
845 return MAP_UNMAPPABLE
;
848 return MAP_UNMAPPABLE
;
852 return MAP_UNMAPPABLE
;
857 jisx0213_2000_2_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
859 DBCHAR coded
= jisx0213_encoder(data
, length
, (void *)2000);
860 if (coded
== MAP_UNMAPPABLE
|| coded
== MAP_MULTIPLE_AVAIL
)
862 else if (coded
& 0x8000)
863 return coded
& 0x7fff;
865 return MAP_UNMAPPABLE
;
869 jisx0213_2004_1_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
871 DBCHAR coded
= jisx0213_encoder(data
, length
, NULL
);
872 if (coded
== MAP_UNMAPPABLE
|| coded
== MAP_MULTIPLE_AVAIL
)
874 else if (coded
& 0x8000)
875 return MAP_UNMAPPABLE
;
881 jisx0213_2004_1_encoder_paironly(const ucs4_t
*data
, Py_ssize_t
*length
)
884 Py_ssize_t ilength
= *length
;
886 coded
= jisx0213_encoder(data
, length
, NULL
);
889 if (coded
== MAP_MULTIPLE_AVAIL
)
890 return MAP_MULTIPLE_AVAIL
;
892 return MAP_UNMAPPABLE
;
895 return MAP_UNMAPPABLE
;
899 return MAP_UNMAPPABLE
;
904 jisx0213_2004_2_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
906 DBCHAR coded
= jisx0213_encoder(data
, length
, NULL
);
907 if (coded
== MAP_UNMAPPABLE
|| coded
== MAP_MULTIPLE_AVAIL
)
909 else if (coded
& 0x8000)
910 return coded
& 0x7fff;
912 return MAP_UNMAPPABLE
;
916 jisx0201_r_decoder(const unsigned char *data
)
919 JISX0201_R_DECODE(*data
, u
)
920 else return MAP_UNMAPPABLE
;
925 jisx0201_r_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
928 JISX0201_R_ENCODE(*data
, coded
)
929 else return MAP_UNMAPPABLE
;
934 jisx0201_k_decoder(const unsigned char *data
)
937 JISX0201_K_DECODE(*data
^ 0x80, u
)
938 else return MAP_UNMAPPABLE
;
943 jisx0201_k_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
946 JISX0201_K_ENCODE(*data
, coded
)
947 else return MAP_UNMAPPABLE
;
954 static int initialized
= 0;
956 if (!initialized
&& (
957 IMPORT_MAP(cn
, gbcommon
, &gbcommon_encmap
, NULL
) ||
958 IMPORT_MAP(cn
, gb2312
, NULL
, &gb2312_decmap
)))
965 gb2312_decoder(const unsigned char *data
)
968 TRYMAP_DEC(gb2312
, u
, data
[0], data
[1])
971 return MAP_UNMAPPABLE
;
975 gb2312_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
978 assert(*length
== 1);
979 if (*data
< 0x10000) {
980 TRYMAP_ENC(gbcommon
, coded
, *data
) {
981 if (!(coded
& 0x8000))
985 return MAP_UNMAPPABLE
;
990 dummy_decoder(const unsigned char *data
)
992 return MAP_UNMAPPABLE
;
996 dummy_encoder(const ucs4_t
*data
, Py_ssize_t
*length
)
998 return MAP_UNMAPPABLE
;
1001 /*-*- registry tables -*-*/
1003 #define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \
1005 ksx1001_decoder, ksx1001_encoder }
1006 #define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \
1008 ksx1001_decoder, ksx1001_encoder }
1009 #define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \
1011 jisx0201_r_decoder, jisx0201_r_encoder }
1012 #define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \
1014 jisx0201_k_decoder, jisx0201_k_encoder }
1015 #define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \
1017 jisx0208_decoder, jisx0208_encoder }
1018 #define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \
1020 jisx0208_decoder, jisx0208_encoder }
1021 #define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \
1023 jisx0212_decoder, jisx0212_encoder }
1024 #define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \
1026 jisx0213_2000_1_decoder, \
1027 jisx0213_2000_1_encoder }
1028 #define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \
1030 jisx0213_2000_1_decoder, \
1031 jisx0213_2000_1_encoder_paironly }
1032 #define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \
1034 jisx0213_2000_2_decoder, \
1035 jisx0213_2000_2_encoder }
1036 #define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \
1038 jisx0213_2004_1_decoder, \
1039 jisx0213_2004_1_encoder }
1040 #define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \
1042 jisx0213_2004_1_decoder, \
1043 jisx0213_2004_1_encoder_paironly }
1044 #define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \
1046 jisx0213_2004_2_decoder, \
1047 jisx0213_2004_2_encoder }
1048 #define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \
1050 gb2312_decoder, gb2312_encoder }
1051 #define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \
1053 cns11643_1_decoder, cns11643_1_encoder }
1054 #define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \
1056 cns11643_2_decoder, cns11643_2_encoder }
1057 #define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \
1058 NULL, dummy_decoder, dummy_encoder }
1059 #define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \
1060 NULL, dummy_decoder, dummy_encoder }
1061 #define REGISTRY_SENTINEL { 0, }
1062 #define CONFIGDEF(var, attrs) \
1063 static const struct iso2022_config iso2022_##var##_config = { \
1064 attrs, iso2022_##var##_designations \
1067 static const struct iso2022_designation iso2022_kr_designations
[] = {
1068 REGISTRY_KSX1001_G1
, REGISTRY_SENTINEL
1072 static const struct iso2022_designation iso2022_jp_designations
[] = {
1073 REGISTRY_JISX0208
, REGISTRY_JISX0201_R
, REGISTRY_JISX0208_O
,
1076 CONFIGDEF(jp
, NO_SHIFT
| USE_JISX0208_EXT
)
1078 static const struct iso2022_designation iso2022_jp_1_designations
[] = {
1079 REGISTRY_JISX0208
, REGISTRY_JISX0212
, REGISTRY_JISX0201_R
,
1080 REGISTRY_JISX0208_O
, REGISTRY_SENTINEL
1082 CONFIGDEF(jp_1
, NO_SHIFT
| USE_JISX0208_EXT
)
1084 static const struct iso2022_designation iso2022_jp_2_designations
[] = {
1085 REGISTRY_JISX0208
, REGISTRY_JISX0212
, REGISTRY_KSX1001_G0
,
1086 REGISTRY_GB2312
, REGISTRY_JISX0201_R
, REGISTRY_JISX0208_O
,
1087 REGISTRY_ISO8859_1
, REGISTRY_ISO8859_7
, REGISTRY_SENTINEL
1089 CONFIGDEF(jp_2
, NO_SHIFT
| USE_G2
| USE_JISX0208_EXT
)
1091 static const struct iso2022_designation iso2022_jp_2004_designations
[] = {
1092 REGISTRY_JISX0213_2004_1_PAIRONLY
, REGISTRY_JISX0208
,
1093 REGISTRY_JISX0213_2004_1
, REGISTRY_JISX0213_2004_2
, REGISTRY_SENTINEL
1095 CONFIGDEF(jp_2004
, NO_SHIFT
| USE_JISX0208_EXT
)
1097 static const struct iso2022_designation iso2022_jp_3_designations
[] = {
1098 REGISTRY_JISX0213_2000_1_PAIRONLY
, REGISTRY_JISX0208
,
1099 REGISTRY_JISX0213_2000_1
, REGISTRY_JISX0213_2000_2
, REGISTRY_SENTINEL
1101 CONFIGDEF(jp_3
, NO_SHIFT
| USE_JISX0208_EXT
)
1103 static const struct iso2022_designation iso2022_jp_ext_designations
[] = {
1104 REGISTRY_JISX0208
, REGISTRY_JISX0212
, REGISTRY_JISX0201_R
,
1105 REGISTRY_JISX0201_K
, REGISTRY_JISX0208_O
, REGISTRY_SENTINEL
1107 CONFIGDEF(jp_ext
, NO_SHIFT
| USE_JISX0208_EXT
)
1111 /* no mapping table here */
1114 #define ISO2022_CODEC(variation) { \
1115 "iso2022_" #variation, \
1116 &iso2022_##variation##_config, \
1117 iso2022_codec_init, \
1118 _STATEFUL_METHODS(iso2022) \
1126 ISO2022_CODEC(jp_2004
)
1128 ISO2022_CODEC(jp_ext
)
1131 I_AM_A_MODULE_FOR(iso2022
)