2 * cjkcodecs.h: common header for cjkcodecs
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
10 #define PY_SSIZE_T_CLEAN
12 #include "multibytecodec.h"
15 /* a unicode "undefined" codepoint */
18 /* internal-use DBCS codepoints which aren't used by any charsets */
23 /* shorter macros to save source size of mapping tables */
31 unsigned char bottom
, top
;
33 typedef struct dbcs_index decode_map
;
35 struct widedbcs_index
{
37 unsigned char bottom
, top
;
39 typedef struct widedbcs_index widedecode_map
;
43 unsigned char bottom
, top
;
45 typedef struct unim_index encode_map
;
47 struct unim_index_bytebased
{
48 const unsigned char *map
;
49 unsigned char bottom
, top
;
54 const struct unim_index
*encmap
;
55 const struct dbcs_index
*decmap
;
58 struct pair_encodemap
{
63 static const MultibyteCodec
*codec_list
;
64 static const struct dbcs_map
*mapping_list
;
66 #define CODEC_INIT(encoding) \
67 static int encoding##_codec_init(const void *config)
69 #define ENCODER_INIT(encoding) \
70 static int encoding##_encode_init( \
71 MultibyteCodec_State *state, const void *config)
72 #define ENCODER(encoding) \
73 static Py_ssize_t encoding##_encode( \
74 MultibyteCodec_State *state, const void *config, \
75 const Py_UNICODE **inbuf, Py_ssize_t inleft, \
76 unsigned char **outbuf, Py_ssize_t outleft, int flags)
77 #define ENCODER_RESET(encoding) \
78 static Py_ssize_t encoding##_encode_reset( \
79 MultibyteCodec_State *state, const void *config, \
80 unsigned char **outbuf, Py_ssize_t outleft)
82 #define DECODER_INIT(encoding) \
83 static int encoding##_decode_init( \
84 MultibyteCodec_State *state, const void *config)
85 #define DECODER(encoding) \
86 static Py_ssize_t encoding##_decode( \
87 MultibyteCodec_State *state, const void *config, \
88 const unsigned char **inbuf, Py_ssize_t inleft, \
89 Py_UNICODE **outbuf, Py_ssize_t outleft)
90 #define DECODER_RESET(encoding) \
91 static Py_ssize_t encoding##_decode_reset( \
92 MultibyteCodec_State *state, const void *config)
94 #if Py_UNICODE_SIZE == 4
95 #define UCS4INVALID(code) \
96 if ((code) > 0xFFFF) \
99 #define UCS4INVALID(code) \
106 #define NEXT_OUT(o) \
110 NEXT_IN(i) NEXT_OUT(o)
112 #define REQUIRE_INBUF(n) \
115 #define REQUIRE_OUTBUF(n) \
117 return MBERR_TOOSMALL;
119 #define IN1 ((*inbuf)[0])
120 #define IN2 ((*inbuf)[1])
121 #define IN3 ((*inbuf)[2])
122 #define IN4 ((*inbuf)[3])
124 #define OUT1(c) ((*outbuf)[0]) = (c);
125 #define OUT2(c) ((*outbuf)[1]) = (c);
126 #define OUT3(c) ((*outbuf)[2]) = (c);
127 #define OUT4(c) ((*outbuf)[3]) = (c);
132 #define WRITE2(c1, c2) \
134 (*outbuf)[0] = (c1); \
136 #define WRITE3(c1, c2, c3) \
138 (*outbuf)[0] = (c1); \
139 (*outbuf)[1] = (c2); \
141 #define WRITE4(c1, c2, c3, c4) \
143 (*outbuf)[0] = (c1); \
144 (*outbuf)[1] = (c2); \
145 (*outbuf)[2] = (c3); \
148 #if Py_UNICODE_SIZE == 2
149 # define WRITEUCS4(c) \
151 (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
152 (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
155 # define WRITEUCS4(c) \
157 **outbuf = (Py_UNICODE)(c); \
161 #define _TRYMAP_ENC(m, assi, val) \
162 ((m)->map != NULL && (val) >= (m)->bottom && \
163 (val)<= (m)->top && ((assi) = (m)->map[(val) - \
164 (m)->bottom]) != NOCHAR)
165 #define TRYMAP_ENC_COND(charset, assi, uni) \
166 _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
167 #define TRYMAP_ENC(charset, assi, uni) \
168 if TRYMAP_ENC_COND(charset, assi, uni)
170 #define _TRYMAP_DEC(m, assi, val) \
171 ((m)->map != NULL && (val) >= (m)->bottom && \
172 (val)<= (m)->top && ((assi) = (m)->map[(val) - \
173 (m)->bottom]) != UNIINV)
174 #define TRYMAP_DEC(charset, assi, c1, c2) \
175 if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
177 #define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) \
178 ((m)->map != NULL && (val) >= (m)->bottom && \
179 (val)<= (m)->top && \
180 ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
181 (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
182 (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
183 #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
184 if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
185 assplane, asshi, asslo, (uni) & 0xff)
186 #define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) \
187 if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
189 #if Py_UNICODE_SIZE == 2
190 #define DECODE_SURROGATE(c) \
191 if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
193 if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
194 c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
195 ((ucs4_t)(IN2) - 0xdc00); \
198 #define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
200 #define DECODE_SURROGATE(c) {;}
201 #define GET_INSIZE(c) 1
204 #define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
205 #define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
206 #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
207 #define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
208 #define END_MAPPINGS_LIST \
209 {"", NULL, NULL} }; \
210 static const struct dbcs_map *mapping_list = \
211 (const struct dbcs_map *)_mapping_list;
213 #define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
214 #define _STATEFUL_METHODS(enc) \
217 enc##_encode_reset, \
221 #define _STATELESS_METHODS(enc) \
222 enc##_encode, NULL, NULL, \
223 enc##_decode, NULL, NULL,
224 #define CODEC_STATEFUL(enc) { \
226 _STATEFUL_METHODS(enc) \
228 #define CODEC_STATELESS(enc) { \
230 _STATELESS_METHODS(enc) \
232 #define CODEC_STATELESS_WINIT(enc) { \
235 _STATELESS_METHODS(enc) \
237 #define END_CODECS_LIST \
239 static const MultibyteCodec *codec_list = \
240 (const MultibyteCodec *)_codec_list;
243 getmultibytecodec(void)
245 static PyObject
*cofunc
= NULL
;
247 if (cofunc
== NULL
) {
248 PyObject
*mod
= PyImport_ImportModuleNoBlock("_multibytecodec");
251 cofunc
= PyObject_GetAttrString(mod
, "__create_codec");
258 getcodec(PyObject
*self
, PyObject
*encoding
)
260 PyObject
*codecobj
, *r
, *cofunc
;
261 const MultibyteCodec
*codec
;
264 if (!PyString_Check(encoding
)) {
265 PyErr_SetString(PyExc_TypeError
,
266 "encoding name must be a string.");
270 cofunc
= getmultibytecodec();
274 enc
= PyString_AS_STRING(encoding
);
275 for (codec
= codec_list
; codec
->encoding
[0]; codec
++)
276 if (strcmp(codec
->encoding
, enc
) == 0)
279 if (codec
->encoding
[0] == '\0') {
280 PyErr_SetString(PyExc_LookupError
,
281 "no such codec is supported.");
285 codecobj
= PyCObject_FromVoidPtr((void *)codec
, NULL
);
286 if (codecobj
== NULL
)
289 r
= PyObject_CallFunctionObjArgs(cofunc
, codecobj
, NULL
);
295 static struct PyMethodDef __methods
[] = {
296 {"getcodec", (PyCFunction
)getcodec
, METH_O
, ""},
301 register_maps(PyObject
*module
)
303 const struct dbcs_map
*h
;
305 for (h
= mapping_list
; h
->charset
[0] != '\0'; h
++) {
306 char mhname
[256] = "__map_";
308 strcpy(mhname
+ sizeof("__map_") - 1, h
->charset
);
309 r
= PyModule_AddObject(module
, mhname
,
310 PyCObject_FromVoidPtr((void *)h
, NULL
));
317 #ifdef USING_BINARY_PAIR_SEARCH
319 find_pairencmap(ucs2_t body
, ucs2_t modifier
,
320 const struct pair_encodemap
*haystack
, int haystacksize
)
323 ucs4_t value
= body
<< 16 | modifier
;
328 for (pos
= haystacksize
>> 1; min
!= max
; pos
= (min
+ max
) >> 1)
329 if (value
< haystack
[pos
].uniseq
) {
330 if (max
== pos
) break;
333 else if (value
> haystack
[pos
].uniseq
) {
334 if (min
== pos
) break;
340 if (value
== haystack
[pos
].uniseq
)
341 return haystack
[pos
].code
;
347 #ifdef USING_IMPORTED_MAPS
348 #define IMPORT_MAP(locale, charset, encmap, decmap) \
349 importmap("_codecs_" #locale, "__map_" #charset, \
350 (const void**)encmap, (const void**)decmap)
353 importmap(const char *modname
, const char *symbol
,
354 const void **encmap
, const void **decmap
)
358 mod
= PyImport_ImportModule((char *)modname
);
362 o
= PyObject_GetAttrString(mod
, (char*)symbol
);
365 else if (!PyCObject_Check(o
)) {
366 PyErr_SetString(PyExc_ValueError
,
367 "map data must be a CObject.");
371 struct dbcs_map
*map
;
372 map
= PyCObject_AsVoidPtr(o
);
374 *encmap
= map
->encmap
;
376 *decmap
= map
->decmap
;
389 #define I_AM_A_MODULE_FOR(loc) \
391 init_codecs_##loc(void) \
393 PyObject *m = Py_InitModule("_codecs_" #loc, __methods);\
395 (void)register_maps(m); \