Merged revisions 81782 via svnmerge from
[python/dscho.git] / Modules / _codecsmodule.c
blob0958f9cd0a34b1afdb6f7ac47f81e1e5d9248166
1 /* ------------------------------------------------------------------------
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
9 The codec registry is accessible via:
11 register(search_function) -> None
13 lookup(encoding) -> CodecInfo object
15 The builtin Unicode codecs use the following interface:
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
23 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
27 These <encoding>s are available: utf_8, unicode_escape,
28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
32 Written by Marc-Andre Lemburg (mal@lemburg.com).
34 Copyright (c) Corporation for National Research Initiatives.
36 ------------------------------------------------------------------------ */
38 #define PY_SSIZE_T_CLEAN
39 #include "Python.h"
41 /* --- Registry ----------------------------------------------------------- */
43 PyDoc_STRVAR(register__doc__,
44 "register(search_function)\n\
45 \n\
46 Register a codec search function. Search functions are expected to take\n\
47 one argument, the encoding name in all lower case letters, and return\n\
48 a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49 (or a CodecInfo object).");
51 static
52 PyObject *codec_register(PyObject *self, PyObject *search_function)
54 if (PyCodec_Register(search_function))
55 return NULL;
57 Py_RETURN_NONE;
60 PyDoc_STRVAR(lookup__doc__,
61 "lookup(encoding) -> CodecInfo\n\
62 \n\
63 Looks up a codec tuple in the Python codec registry and returns\n\
64 a CodecInfo object.");
66 static
67 PyObject *codec_lookup(PyObject *self, PyObject *args)
69 char *encoding;
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
72 return NULL;
74 return _PyCodec_Lookup(encoding);
77 PyDoc_STRVAR(encode__doc__,
78 "encode(obj, [encoding[,errors]]) -> object\n\
79 \n\
80 Encodes obj using the codec registered for encoding. encoding defaults\n\
81 to the default encoding. errors may be given to set a different error\n\
82 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83 a ValueError. Other possible values are 'ignore', 'replace' and\n\
84 'xmlcharrefreplace' as well as any other name registered with\n\
85 codecs.register_error that can handle ValueErrors.");
87 static PyObject *
88 codec_encode(PyObject *self, PyObject *args)
90 const char *encoding = NULL;
91 const char *errors = NULL;
92 PyObject *v;
94 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
97 if (encoding == NULL)
98 encoding = PyUnicode_GetDefaultEncoding();
100 /* Encode via the codec registry */
101 return PyCodec_Encode(v, encoding, errors);
104 PyDoc_STRVAR(decode__doc__,
105 "decode(obj, [encoding[,errors]]) -> object\n\
107 Decodes obj using the codec registered for encoding. encoding defaults\n\
108 to the default encoding. errors may be given to set a different error\n\
109 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
110 a ValueError. Other possible values are 'ignore' and 'replace'\n\
111 as well as any other name registered with codecs.register_error that is\n\
112 able to handle ValueErrors.");
114 static PyObject *
115 codec_decode(PyObject *self, PyObject *args)
117 const char *encoding = NULL;
118 const char *errors = NULL;
119 PyObject *v;
121 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
122 return NULL;
124 if (encoding == NULL)
125 encoding = PyUnicode_GetDefaultEncoding();
127 /* Decode via the codec registry */
128 return PyCodec_Decode(v, encoding, errors);
131 /* --- Helpers ------------------------------------------------------------ */
133 static
134 PyObject *codec_tuple(PyObject *unicode,
135 Py_ssize_t len)
137 PyObject *v;
138 if (unicode == NULL)
139 return NULL;
140 v = Py_BuildValue("On", unicode, len);
141 Py_DECREF(unicode);
142 return v;
145 /* --- String codecs ------------------------------------------------------ */
146 static PyObject *
147 escape_decode(PyObject *self,
148 PyObject *args)
150 const char *errors = NULL;
151 const char *data;
152 Py_ssize_t size;
154 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
155 &data, &size, &errors))
156 return NULL;
157 return codec_tuple(PyBytes_DecodeEscape(data, size, errors, 0, NULL),
158 size);
161 static PyObject *
162 escape_encode(PyObject *self,
163 PyObject *args)
165 static const char *hexdigits = "0123456789abcdef";
166 PyObject *str;
167 Py_ssize_t size;
168 Py_ssize_t newsize;
169 const char *errors = NULL;
170 PyObject *v;
172 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
173 &PyBytes_Type, &str, &errors))
174 return NULL;
176 size = PyBytes_GET_SIZE(str);
177 newsize = 4*size;
178 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != size) {
179 PyErr_SetString(PyExc_OverflowError,
180 "string is too large to encode");
181 return NULL;
183 v = PyBytes_FromStringAndSize(NULL, newsize);
185 if (v == NULL) {
186 return NULL;
188 else {
189 register Py_ssize_t i;
190 register char c;
191 register char *p = PyBytes_AS_STRING(v);
193 for (i = 0; i < size; i++) {
194 /* There's at least enough room for a hex escape */
195 assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
196 c = PyBytes_AS_STRING(str)[i];
197 if (c == '\'' || c == '\\')
198 *p++ = '\\', *p++ = c;
199 else if (c == '\t')
200 *p++ = '\\', *p++ = 't';
201 else if (c == '\n')
202 *p++ = '\\', *p++ = 'n';
203 else if (c == '\r')
204 *p++ = '\\', *p++ = 'r';
205 else if (c < ' ' || c >= 0x7f) {
206 *p++ = '\\';
207 *p++ = 'x';
208 *p++ = hexdigits[(c & 0xf0) >> 4];
209 *p++ = hexdigits[c & 0xf];
211 else
212 *p++ = c;
214 *p = '\0';
215 if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
216 return NULL;
220 return codec_tuple(v, PyBytes_Size(v));
223 /* --- Decoder ------------------------------------------------------------ */
225 static PyObject *
226 unicode_internal_decode(PyObject *self,
227 PyObject *args)
229 PyObject *obj;
230 const char *errors = NULL;
231 const char *data;
232 Py_ssize_t size;
234 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
235 &obj, &errors))
236 return NULL;
238 if (PyUnicode_Check(obj)) {
239 Py_INCREF(obj);
240 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
242 else {
243 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
244 return NULL;
246 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
247 size);
251 static PyObject *
252 utf_7_decode(PyObject *self,
253 PyObject *args)
255 Py_buffer pbuf;
256 const char *errors = NULL;
257 int final = 0;
258 Py_ssize_t consumed;
259 PyObject *decoded = NULL;
261 if (!PyArg_ParseTuple(args, "y*|zi:utf_7_decode",
262 &pbuf, &errors, &final))
263 return NULL;
264 consumed = pbuf.len;
266 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
267 final ? NULL : &consumed);
268 PyBuffer_Release(&pbuf);
269 if (decoded == NULL)
270 return NULL;
271 return codec_tuple(decoded, consumed);
274 static PyObject *
275 utf_8_decode(PyObject *self,
276 PyObject *args)
278 Py_buffer pbuf;
279 const char *errors = NULL;
280 int final = 0;
281 Py_ssize_t consumed;
282 PyObject *decoded = NULL;
284 if (!PyArg_ParseTuple(args, "y*|zi:utf_8_decode",
285 &pbuf, &errors, &final))
286 return NULL;
287 consumed = pbuf.len;
289 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
290 final ? NULL : &consumed);
291 PyBuffer_Release(&pbuf);
292 if (decoded == NULL)
293 return NULL;
294 return codec_tuple(decoded, consumed);
297 static PyObject *
298 utf_16_decode(PyObject *self,
299 PyObject *args)
301 Py_buffer pbuf;
302 const char *errors = NULL;
303 int byteorder = 0;
304 int final = 0;
305 Py_ssize_t consumed;
306 PyObject *decoded;
308 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_decode",
309 &pbuf, &errors, &final))
310 return NULL;
311 consumed = pbuf.len; /* This is overwritten unless final is true. */
312 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
313 &byteorder, final ? NULL : &consumed);
314 PyBuffer_Release(&pbuf);
315 if (decoded == NULL)
316 return NULL;
317 return codec_tuple(decoded, consumed);
320 static PyObject *
321 utf_16_le_decode(PyObject *self,
322 PyObject *args)
324 Py_buffer pbuf;
325 const char *errors = NULL;
326 int byteorder = -1;
327 int final = 0;
328 Py_ssize_t consumed;
329 PyObject *decoded = NULL;
331 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_le_decode",
332 &pbuf, &errors, &final))
333 return NULL;
335 consumed = pbuf.len; /* This is overwritten unless final is true. */
336 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
337 &byteorder, final ? NULL : &consumed);
338 PyBuffer_Release(&pbuf);
339 if (decoded == NULL)
340 return NULL;
341 return codec_tuple(decoded, consumed);
344 static PyObject *
345 utf_16_be_decode(PyObject *self,
346 PyObject *args)
348 Py_buffer pbuf;
349 const char *errors = NULL;
350 int byteorder = 1;
351 int final = 0;
352 Py_ssize_t consumed;
353 PyObject *decoded = NULL;
355 if (!PyArg_ParseTuple(args, "y*|zi:utf_16_be_decode",
356 &pbuf, &errors, &final))
357 return NULL;
359 consumed = pbuf.len; /* This is overwritten unless final is true. */
360 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
361 &byteorder, final ? NULL : &consumed);
362 PyBuffer_Release(&pbuf);
363 if (decoded == NULL)
364 return NULL;
365 return codec_tuple(decoded, consumed);
368 /* This non-standard version also provides access to the byteorder
369 parameter of the builtin UTF-16 codec.
371 It returns a tuple (unicode, bytesread, byteorder) with byteorder
372 being the value in effect at the end of data.
376 static PyObject *
377 utf_16_ex_decode(PyObject *self,
378 PyObject *args)
380 Py_buffer pbuf;
381 const char *errors = NULL;
382 int byteorder = 0;
383 PyObject *unicode, *tuple;
384 int final = 0;
385 Py_ssize_t consumed;
387 if (!PyArg_ParseTuple(args, "y*|zii:utf_16_ex_decode",
388 &pbuf, &errors, &byteorder, &final))
389 return NULL;
390 consumed = pbuf.len; /* This is overwritten unless final is true. */
391 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
392 &byteorder, final ? NULL : &consumed);
393 PyBuffer_Release(&pbuf);
394 if (unicode == NULL)
395 return NULL;
396 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
397 Py_DECREF(unicode);
398 return tuple;
401 static PyObject *
402 utf_32_decode(PyObject *self,
403 PyObject *args)
405 Py_buffer pbuf;
406 const char *errors = NULL;
407 int byteorder = 0;
408 int final = 0;
409 Py_ssize_t consumed;
410 PyObject *decoded;
412 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_decode",
413 &pbuf, &errors, &final))
414 return NULL;
415 consumed = pbuf.len; /* This is overwritten unless final is true. */
416 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
417 &byteorder, final ? NULL : &consumed);
418 PyBuffer_Release(&pbuf);
419 if (decoded == NULL)
420 return NULL;
421 return codec_tuple(decoded, consumed);
424 static PyObject *
425 utf_32_le_decode(PyObject *self,
426 PyObject *args)
428 Py_buffer pbuf;
429 const char *errors = NULL;
430 int byteorder = -1;
431 int final = 0;
432 Py_ssize_t consumed;
433 PyObject *decoded;
435 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_le_decode",
436 &pbuf, &errors, &final))
437 return NULL;
438 consumed = pbuf.len; /* This is overwritten unless final is true. */
439 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
440 &byteorder, final ? NULL : &consumed);
441 PyBuffer_Release(&pbuf);
442 if (decoded == NULL)
443 return NULL;
444 return codec_tuple(decoded, consumed);
447 static PyObject *
448 utf_32_be_decode(PyObject *self,
449 PyObject *args)
451 Py_buffer pbuf;
452 const char *errors = NULL;
453 int byteorder = 1;
454 int final = 0;
455 Py_ssize_t consumed;
456 PyObject *decoded;
458 if (!PyArg_ParseTuple(args, "y*|zi:utf_32_be_decode",
459 &pbuf, &errors, &final))
460 return NULL;
461 consumed = pbuf.len; /* This is overwritten unless final is true. */
462 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
463 &byteorder, final ? NULL : &consumed);
464 PyBuffer_Release(&pbuf);
465 if (decoded == NULL)
466 return NULL;
467 return codec_tuple(decoded, consumed);
470 /* This non-standard version also provides access to the byteorder
471 parameter of the builtin UTF-32 codec.
473 It returns a tuple (unicode, bytesread, byteorder) with byteorder
474 being the value in effect at the end of data.
478 static PyObject *
479 utf_32_ex_decode(PyObject *self,
480 PyObject *args)
482 Py_buffer pbuf;
483 const char *errors = NULL;
484 int byteorder = 0;
485 PyObject *unicode, *tuple;
486 int final = 0;
487 Py_ssize_t consumed;
489 if (!PyArg_ParseTuple(args, "y*|zii:utf_32_ex_decode",
490 &pbuf, &errors, &byteorder, &final))
491 return NULL;
492 consumed = pbuf.len; /* This is overwritten unless final is true. */
493 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
494 &byteorder, final ? NULL : &consumed);
495 PyBuffer_Release(&pbuf);
496 if (unicode == NULL)
497 return NULL;
498 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
499 Py_DECREF(unicode);
500 return tuple;
503 static PyObject *
504 unicode_escape_decode(PyObject *self,
505 PyObject *args)
507 Py_buffer pbuf;
508 const char *errors = NULL;
509 PyObject *unicode;
511 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
512 &pbuf, &errors))
513 return NULL;
515 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
516 PyBuffer_Release(&pbuf);
517 return codec_tuple(unicode, pbuf.len);
520 static PyObject *
521 raw_unicode_escape_decode(PyObject *self,
522 PyObject *args)
524 Py_buffer pbuf;
525 const char *errors = NULL;
526 PyObject *unicode;
528 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
529 &pbuf, &errors))
530 return NULL;
532 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
533 PyBuffer_Release(&pbuf);
534 return codec_tuple(unicode, pbuf.len);
537 static PyObject *
538 latin_1_decode(PyObject *self,
539 PyObject *args)
541 Py_buffer pbuf;
542 PyObject *unicode;
543 const char *errors = NULL;
545 if (!PyArg_ParseTuple(args, "y*|z:latin_1_decode",
546 &pbuf, &errors))
547 return NULL;
549 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
550 PyBuffer_Release(&pbuf);
551 return codec_tuple(unicode, pbuf.len);
554 static PyObject *
555 ascii_decode(PyObject *self,
556 PyObject *args)
558 Py_buffer pbuf;
559 PyObject *unicode;
560 const char *errors = NULL;
562 if (!PyArg_ParseTuple(args, "y*|z:ascii_decode",
563 &pbuf, &errors))
564 return NULL;
566 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
567 PyBuffer_Release(&pbuf);
568 return codec_tuple(unicode, pbuf.len);
571 static PyObject *
572 charmap_decode(PyObject *self,
573 PyObject *args)
575 Py_buffer pbuf;
576 PyObject *unicode;
577 const char *errors = NULL;
578 PyObject *mapping = NULL;
580 if (!PyArg_ParseTuple(args, "y*|zO:charmap_decode",
581 &pbuf, &errors, &mapping))
582 return NULL;
583 if (mapping == Py_None)
584 mapping = NULL;
586 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
587 PyBuffer_Release(&pbuf);
588 return codec_tuple(unicode, pbuf.len);
591 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
593 static PyObject *
594 mbcs_decode(PyObject *self,
595 PyObject *args)
597 Py_buffer pbuf;
598 const char *errors = NULL;
599 int final = 0;
600 Py_ssize_t consumed;
601 PyObject *decoded = NULL;
603 if (!PyArg_ParseTuple(args, "y*|zi:mbcs_decode",
604 &pbuf, &errors, &final))
605 return NULL;
606 consumed = pbuf.len;
608 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
609 final ? NULL : &consumed);
610 PyBuffer_Release(&pbuf);
611 if (decoded == NULL)
612 return NULL;
613 return codec_tuple(decoded, consumed);
616 #endif /* MS_WINDOWS */
618 /* --- Encoder ------------------------------------------------------------ */
620 static PyObject *
621 readbuffer_encode(PyObject *self,
622 PyObject *args)
624 Py_buffer pdata;
625 const char *data;
626 Py_ssize_t size;
627 const char *errors = NULL;
628 PyObject *result;
630 if (!PyArg_ParseTuple(args, "s*|z:readbuffer_encode",
631 &pdata, &errors))
632 return NULL;
633 data = pdata.buf;
634 size = pdata.len;
636 result = PyBytes_FromStringAndSize(data, size);
637 PyBuffer_Release(&pdata);
638 return codec_tuple(result, size);
641 static PyObject *
642 charbuffer_encode(PyObject *self,
643 PyObject *args)
645 const char *data;
646 Py_ssize_t size;
647 const char *errors = NULL;
649 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
650 &data, &size, &errors))
651 return NULL;
653 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
656 static PyObject *
657 unicode_internal_encode(PyObject *self,
658 PyObject *args)
660 PyObject *obj;
661 const char *errors = NULL;
662 const char *data;
663 Py_ssize_t size;
665 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
666 &obj, &errors))
667 return NULL;
669 if (PyUnicode_Check(obj)) {
670 data = PyUnicode_AS_DATA(obj);
671 size = PyUnicode_GET_DATA_SIZE(obj);
672 return codec_tuple(PyBytes_FromStringAndSize(data, size),
673 PyUnicode_GET_SIZE(obj));
675 else {
676 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
677 return NULL;
678 return codec_tuple(PyBytes_FromStringAndSize(data, size), size);
682 static PyObject *
683 utf_7_encode(PyObject *self,
684 PyObject *args)
686 PyObject *str, *v;
687 const char *errors = NULL;
689 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
690 &str, &errors))
691 return NULL;
693 str = PyUnicode_FromObject(str);
694 if (str == NULL)
695 return NULL;
696 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
697 PyUnicode_GET_SIZE(str),
700 errors),
701 PyUnicode_GET_SIZE(str));
702 Py_DECREF(str);
703 return v;
706 static PyObject *
707 utf_8_encode(PyObject *self,
708 PyObject *args)
710 PyObject *str, *v;
711 const char *errors = NULL;
713 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
714 &str, &errors))
715 return NULL;
717 str = PyUnicode_FromObject(str);
718 if (str == NULL)
719 return NULL;
720 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
721 PyUnicode_GET_SIZE(str),
722 errors),
723 PyUnicode_GET_SIZE(str));
724 Py_DECREF(str);
725 return v;
728 /* This version provides access to the byteorder parameter of the
729 builtin UTF-16 codecs as optional third argument. It defaults to 0
730 which means: use the native byte order and prepend the data with a
731 BOM mark.
735 static PyObject *
736 utf_16_encode(PyObject *self,
737 PyObject *args)
739 PyObject *str, *v;
740 const char *errors = NULL;
741 int byteorder = 0;
743 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
744 &str, &errors, &byteorder))
745 return NULL;
747 str = PyUnicode_FromObject(str);
748 if (str == NULL)
749 return NULL;
750 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
751 PyUnicode_GET_SIZE(str),
752 errors,
753 byteorder),
754 PyUnicode_GET_SIZE(str));
755 Py_DECREF(str);
756 return v;
759 static PyObject *
760 utf_16_le_encode(PyObject *self,
761 PyObject *args)
763 PyObject *str, *v;
764 const char *errors = NULL;
766 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
767 &str, &errors))
768 return NULL;
770 str = PyUnicode_FromObject(str);
771 if (str == NULL)
772 return NULL;
773 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
774 PyUnicode_GET_SIZE(str),
775 errors,
776 -1),
777 PyUnicode_GET_SIZE(str));
778 Py_DECREF(str);
779 return v;
782 static PyObject *
783 utf_16_be_encode(PyObject *self,
784 PyObject *args)
786 PyObject *str, *v;
787 const char *errors = NULL;
789 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
790 &str, &errors))
791 return NULL;
793 str = PyUnicode_FromObject(str);
794 if (str == NULL)
795 return NULL;
796 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
797 PyUnicode_GET_SIZE(str),
798 errors,
799 +1),
800 PyUnicode_GET_SIZE(str));
801 Py_DECREF(str);
802 return v;
805 /* This version provides access to the byteorder parameter of the
806 builtin UTF-32 codecs as optional third argument. It defaults to 0
807 which means: use the native byte order and prepend the data with a
808 BOM mark.
812 static PyObject *
813 utf_32_encode(PyObject *self,
814 PyObject *args)
816 PyObject *str, *v;
817 const char *errors = NULL;
818 int byteorder = 0;
820 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
821 &str, &errors, &byteorder))
822 return NULL;
824 str = PyUnicode_FromObject(str);
825 if (str == NULL)
826 return NULL;
827 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
828 PyUnicode_GET_SIZE(str),
829 errors,
830 byteorder),
831 PyUnicode_GET_SIZE(str));
832 Py_DECREF(str);
833 return v;
836 static PyObject *
837 utf_32_le_encode(PyObject *self,
838 PyObject *args)
840 PyObject *str, *v;
841 const char *errors = NULL;
843 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
844 &str, &errors))
845 return NULL;
847 str = PyUnicode_FromObject(str);
848 if (str == NULL)
849 return NULL;
850 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
851 PyUnicode_GET_SIZE(str),
852 errors,
853 -1),
854 PyUnicode_GET_SIZE(str));
855 Py_DECREF(str);
856 return v;
859 static PyObject *
860 utf_32_be_encode(PyObject *self,
861 PyObject *args)
863 PyObject *str, *v;
864 const char *errors = NULL;
866 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
867 &str, &errors))
868 return NULL;
870 str = PyUnicode_FromObject(str);
871 if (str == NULL)
872 return NULL;
873 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
874 PyUnicode_GET_SIZE(str),
875 errors,
876 +1),
877 PyUnicode_GET_SIZE(str));
878 Py_DECREF(str);
879 return v;
882 static PyObject *
883 unicode_escape_encode(PyObject *self,
884 PyObject *args)
886 PyObject *str, *v;
887 const char *errors = NULL;
889 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
890 &str, &errors))
891 return NULL;
893 str = PyUnicode_FromObject(str);
894 if (str == NULL)
895 return NULL;
896 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
897 PyUnicode_GET_SIZE(str)),
898 PyUnicode_GET_SIZE(str));
899 Py_DECREF(str);
900 return v;
903 static PyObject *
904 raw_unicode_escape_encode(PyObject *self,
905 PyObject *args)
907 PyObject *str, *v;
908 const char *errors = NULL;
910 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
911 &str, &errors))
912 return NULL;
914 str = PyUnicode_FromObject(str);
915 if (str == NULL)
916 return NULL;
917 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
918 PyUnicode_AS_UNICODE(str),
919 PyUnicode_GET_SIZE(str)),
920 PyUnicode_GET_SIZE(str));
921 Py_DECREF(str);
922 return v;
925 static PyObject *
926 latin_1_encode(PyObject *self,
927 PyObject *args)
929 PyObject *str, *v;
930 const char *errors = NULL;
932 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
933 &str, &errors))
934 return NULL;
936 str = PyUnicode_FromObject(str);
937 if (str == NULL)
938 return NULL;
939 v = codec_tuple(PyUnicode_EncodeLatin1(
940 PyUnicode_AS_UNICODE(str),
941 PyUnicode_GET_SIZE(str),
942 errors),
943 PyUnicode_GET_SIZE(str));
944 Py_DECREF(str);
945 return v;
948 static PyObject *
949 ascii_encode(PyObject *self,
950 PyObject *args)
952 PyObject *str, *v;
953 const char *errors = NULL;
955 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
956 &str, &errors))
957 return NULL;
959 str = PyUnicode_FromObject(str);
960 if (str == NULL)
961 return NULL;
962 v = codec_tuple(PyUnicode_EncodeASCII(
963 PyUnicode_AS_UNICODE(str),
964 PyUnicode_GET_SIZE(str),
965 errors),
966 PyUnicode_GET_SIZE(str));
967 Py_DECREF(str);
968 return v;
971 static PyObject *
972 charmap_encode(PyObject *self,
973 PyObject *args)
975 PyObject *str, *v;
976 const char *errors = NULL;
977 PyObject *mapping = NULL;
979 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
980 &str, &errors, &mapping))
981 return NULL;
982 if (mapping == Py_None)
983 mapping = NULL;
985 str = PyUnicode_FromObject(str);
986 if (str == NULL)
987 return NULL;
988 v = codec_tuple(PyUnicode_EncodeCharmap(
989 PyUnicode_AS_UNICODE(str),
990 PyUnicode_GET_SIZE(str),
991 mapping,
992 errors),
993 PyUnicode_GET_SIZE(str));
994 Py_DECREF(str);
995 return v;
998 static PyObject*
999 charmap_build(PyObject *self, PyObject *args)
1001 PyObject *map;
1002 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1003 return NULL;
1004 return PyUnicode_BuildEncodingMap(map);
1007 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1009 static PyObject *
1010 mbcs_encode(PyObject *self,
1011 PyObject *args)
1013 PyObject *str, *v;
1014 const char *errors = NULL;
1016 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
1017 &str, &errors))
1018 return NULL;
1020 str = PyUnicode_FromObject(str);
1021 if (str == NULL)
1022 return NULL;
1023 v = codec_tuple(PyUnicode_EncodeMBCS(
1024 PyUnicode_AS_UNICODE(str),
1025 PyUnicode_GET_SIZE(str),
1026 errors),
1027 PyUnicode_GET_SIZE(str));
1028 Py_DECREF(str);
1029 return v;
1032 #endif /* MS_WINDOWS */
1034 /* --- Error handler registry --------------------------------------------- */
1036 PyDoc_STRVAR(register_error__doc__,
1037 "register_error(errors, handler)\n\
1039 Register the specified error handler under the name\n\
1040 errors. handler must be a callable object, that\n\
1041 will be called with an exception instance containing\n\
1042 information about the location of the encoding/decoding\n\
1043 error and must return a (replacement, new position) tuple.");
1045 static PyObject *register_error(PyObject *self, PyObject *args)
1047 const char *name;
1048 PyObject *handler;
1050 if (!PyArg_ParseTuple(args, "sO:register_error",
1051 &name, &handler))
1052 return NULL;
1053 if (PyCodec_RegisterError(name, handler))
1054 return NULL;
1055 Py_RETURN_NONE;
1058 PyDoc_STRVAR(lookup_error__doc__,
1059 "lookup_error(errors) -> handler\n\
1061 Return the error handler for the specified error handling name\n\
1062 or raise a LookupError, if no handler exists under this name.");
1064 static PyObject *lookup_error(PyObject *self, PyObject *args)
1066 const char *name;
1068 if (!PyArg_ParseTuple(args, "s:lookup_error",
1069 &name))
1070 return NULL;
1071 return PyCodec_LookupError(name);
1074 /* --- Module API --------------------------------------------------------- */
1076 static PyMethodDef _codecs_functions[] = {
1077 {"register", codec_register, METH_O,
1078 register__doc__},
1079 {"lookup", codec_lookup, METH_VARARGS,
1080 lookup__doc__},
1081 {"encode", codec_encode, METH_VARARGS,
1082 encode__doc__},
1083 {"decode", codec_decode, METH_VARARGS,
1084 decode__doc__},
1085 {"escape_encode", escape_encode, METH_VARARGS},
1086 {"escape_decode", escape_decode, METH_VARARGS},
1087 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1088 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1089 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1090 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1091 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1092 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1093 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1094 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1095 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1096 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1097 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1098 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1099 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1100 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1101 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1102 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1103 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1104 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1105 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1106 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1107 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1108 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1109 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1110 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1111 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1112 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1113 {"ascii_encode", ascii_encode, METH_VARARGS},
1114 {"ascii_decode", ascii_decode, METH_VARARGS},
1115 {"charmap_encode", charmap_encode, METH_VARARGS},
1116 {"charmap_decode", charmap_decode, METH_VARARGS},
1117 {"charmap_build", charmap_build, METH_VARARGS},
1118 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1119 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
1120 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1121 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1122 {"mbcs_decode", mbcs_decode, METH_VARARGS},
1123 #endif
1124 {"register_error", register_error, METH_VARARGS,
1125 register_error__doc__},
1126 {"lookup_error", lookup_error, METH_VARARGS,
1127 lookup_error__doc__},
1128 {NULL, NULL} /* sentinel */
1131 static struct PyModuleDef codecsmodule = {
1132 PyModuleDef_HEAD_INIT,
1133 "_codecs",
1134 NULL,
1136 _codecs_functions,
1137 NULL,
1138 NULL,
1139 NULL,
1140 NULL
1143 PyMODINIT_FUNC
1144 PyInit__codecs(void)
1146 return PyModule_Create(&codecsmodule);