Added updates with respect to recent changes to TimedRotatingFileHandler.
[python.git] / Modules / _codecsmodule.c
blobd4eb0d5b9c0026cb5c8409ed9e7bd7087d153b03
1 /* ------------------------------------------------------------------------
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
9 The codec registry is accessible via:
11 register(search_function) -> None
13 lookup(encoding) -> CodecInfo object
15 The builtin Unicode codecs use the following interface:
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
23 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
27 These <encoding>s are available: utf_8, unicode_escape,
28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
32 Written by Marc-Andre Lemburg (mal@lemburg.com).
34 Copyright (c) Corporation for National Research Initiatives.
36 ------------------------------------------------------------------------ */
38 #define PY_SSIZE_T_CLEAN
39 #include "Python.h"
41 /* --- Registry ----------------------------------------------------------- */
43 PyDoc_STRVAR(register__doc__,
44 "register(search_function)\n\
45 \n\
46 Register a codec search function. Search functions are expected to take\n\
47 one argument, the encoding name in all lower case letters, and return\n\
48 a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49 (or a CodecInfo object).");
51 static
52 PyObject *codec_register(PyObject *self, PyObject *search_function)
54 if (PyCodec_Register(search_function))
55 return NULL;
57 Py_RETURN_NONE;
60 PyDoc_STRVAR(lookup__doc__,
61 "lookup(encoding) -> CodecInfo\n\
62 \n\
63 Looks up a codec tuple in the Python codec registry and returns\n\
64 a tuple of function (or a CodecInfo object).");
66 static
67 PyObject *codec_lookup(PyObject *self, PyObject *args)
69 char *encoding;
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
72 return NULL;
74 return _PyCodec_Lookup(encoding);
77 PyDoc_STRVAR(encode__doc__,
78 "encode(obj, [encoding[,errors]]) -> object\n\
79 \n\
80 Encodes obj using the codec registered for encoding. encoding defaults\n\
81 to the default encoding. errors may be given to set a different error\n\
82 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83 a ValueError. Other possible values are 'ignore', 'replace' and\n\
84 'xmlcharrefreplace' as well as any other name registered with\n\
85 codecs.register_error that can handle ValueErrors.");
87 static PyObject *
88 codec_encode(PyObject *self, PyObject *args)
90 const char *encoding = NULL;
91 const char *errors = NULL;
92 PyObject *v;
94 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
97 #ifdef Py_USING_UNICODE
98 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
100 #else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
105 #endif
107 /* Encode via the codec registry */
108 return PyCodec_Encode(v, encoding, errors);
111 PyDoc_STRVAR(decode__doc__,
112 "decode(obj, [encoding[,errors]]) -> object\n\
114 Decodes obj using the codec registered for encoding. encoding defaults\n\
115 to the default encoding. errors may be given to set a different error\n\
116 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117 a ValueError. Other possible values are 'ignore' and 'replace'\n\
118 as well as any other name registerd with codecs.register_error that is\n\
119 able to handle ValueErrors.");
121 static PyObject *
122 codec_decode(PyObject *self, PyObject *args)
124 const char *encoding = NULL;
125 const char *errors = NULL;
126 PyObject *v;
128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
131 #ifdef Py_USING_UNICODE
132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
134 #else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
139 #endif
141 /* Decode via the codec registry */
142 return PyCodec_Decode(v, encoding, errors);
145 /* --- Helpers ------------------------------------------------------------ */
147 static
148 PyObject *codec_tuple(PyObject *unicode,
149 Py_ssize_t len)
151 PyObject *v;
152 if (unicode == NULL)
153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
156 return v;
159 /* --- String codecs ------------------------------------------------------ */
160 static PyObject *
161 escape_decode(PyObject *self,
162 PyObject *args)
164 const char *errors = NULL;
165 const char *data;
166 Py_ssize_t size;
168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
172 size);
175 static PyObject *
176 escape_encode(PyObject *self,
177 PyObject *args)
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
182 Py_ssize_t len;
184 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
185 &PyString_Type, &str, &errors))
186 return NULL;
188 str = PyString_Repr(str, 0);
189 if (!str)
190 return NULL;
192 /* The string will be quoted. Unquote, similar to unicode-escape. */
193 buf = PyString_AS_STRING (str);
194 len = PyString_GET_SIZE (str);
195 memmove(buf, buf+1, len-2);
196 if (_PyString_Resize(&str, len-2) < 0)
197 return NULL;
199 return codec_tuple(str, PyString_Size(str));
202 #ifdef Py_USING_UNICODE
203 /* --- Decoder ------------------------------------------------------------ */
205 static PyObject *
206 unicode_internal_decode(PyObject *self,
207 PyObject *args)
209 PyObject *obj;
210 const char *errors = NULL;
211 const char *data;
212 Py_ssize_t size;
214 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
215 &obj, &errors))
216 return NULL;
218 if (PyUnicode_Check(obj)) {
219 Py_INCREF(obj);
220 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
222 else {
223 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
224 return NULL;
226 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
227 size);
231 static PyObject *
232 utf_7_decode(PyObject *self,
233 PyObject *args)
235 const char *data;
236 Py_ssize_t size;
237 const char *errors = NULL;
238 int final = 0;
239 Py_ssize_t consumed;
240 PyObject *decoded = NULL;
242 if (!PyArg_ParseTuple(args, "t#|zi:utf_7_decode",
243 &data, &size, &errors, &final))
244 return NULL;
245 consumed = size;
247 decoded = PyUnicode_DecodeUTF7Stateful(data, size, errors,
248 final ? NULL : &consumed);
249 if (decoded == NULL)
250 return NULL;
251 return codec_tuple(decoded, consumed);
254 static PyObject *
255 utf_8_decode(PyObject *self,
256 PyObject *args)
258 const char *data;
259 Py_ssize_t size;
260 const char *errors = NULL;
261 int final = 0;
262 Py_ssize_t consumed;
263 PyObject *decoded = NULL;
265 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
266 &data, &size, &errors, &final))
267 return NULL;
268 if (size < 0) {
269 PyErr_SetString(PyExc_ValueError, "negative argument");
270 return 0;
272 consumed = size;
274 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
275 final ? NULL : &consumed);
276 if (decoded == NULL)
277 return NULL;
278 return codec_tuple(decoded, consumed);
281 static PyObject *
282 utf_16_decode(PyObject *self,
283 PyObject *args)
285 const char *data;
286 Py_ssize_t size;
287 const char *errors = NULL;
288 int byteorder = 0;
289 int final = 0;
290 Py_ssize_t consumed;
291 PyObject *decoded;
293 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
294 &data, &size, &errors, &final))
295 return NULL;
296 if (size < 0) {
297 PyErr_SetString(PyExc_ValueError, "negative argument");
298 return 0;
300 consumed = size; /* This is overwritten unless final is true. */
301 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
302 final ? NULL : &consumed);
303 if (decoded == NULL)
304 return NULL;
305 return codec_tuple(decoded, consumed);
308 static PyObject *
309 utf_16_le_decode(PyObject *self,
310 PyObject *args)
312 const char *data;
313 Py_ssize_t size;
314 const char *errors = NULL;
315 int byteorder = -1;
316 int final = 0;
317 Py_ssize_t consumed;
318 PyObject *decoded = NULL;
320 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
321 &data, &size, &errors, &final))
322 return NULL;
324 if (size < 0) {
325 PyErr_SetString(PyExc_ValueError, "negative argument");
326 return 0;
328 consumed = size; /* This is overwritten unless final is true. */
329 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
330 &byteorder, final ? NULL : &consumed);
331 if (decoded == NULL)
332 return NULL;
333 return codec_tuple(decoded, consumed);
337 static PyObject *
338 utf_16_be_decode(PyObject *self,
339 PyObject *args)
341 const char *data;
342 Py_ssize_t size;
343 const char *errors = NULL;
344 int byteorder = 1;
345 int final = 0;
346 Py_ssize_t consumed;
347 PyObject *decoded = NULL;
349 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
350 &data, &size, &errors, &final))
351 return NULL;
352 if (size < 0) {
353 PyErr_SetString(PyExc_ValueError, "negative argument");
354 return 0;
356 consumed = size; /* This is overwritten unless final is true. */
357 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
358 &byteorder, final ? NULL : &consumed);
359 if (decoded == NULL)
360 return NULL;
361 return codec_tuple(decoded, consumed);
364 /* This non-standard version also provides access to the byteorder
365 parameter of the builtin UTF-16 codec.
367 It returns a tuple (unicode, bytesread, byteorder) with byteorder
368 being the value in effect at the end of data.
372 static PyObject *
373 utf_16_ex_decode(PyObject *self,
374 PyObject *args)
376 const char *data;
377 Py_ssize_t size;
378 const char *errors = NULL;
379 int byteorder = 0;
380 PyObject *unicode, *tuple;
381 int final = 0;
382 Py_ssize_t consumed;
384 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
385 &data, &size, &errors, &byteorder, &final))
386 return NULL;
387 if (size < 0) {
388 PyErr_SetString(PyExc_ValueError, "negative argument");
389 return 0;
391 consumed = size; /* This is overwritten unless final is true. */
392 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
393 final ? NULL : &consumed);
394 if (unicode == NULL)
395 return NULL;
396 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
397 Py_DECREF(unicode);
398 return tuple;
401 static PyObject *
402 utf_32_decode(PyObject *self,
403 PyObject *args)
405 const char *data;
406 Py_ssize_t size;
407 const char *errors = NULL;
408 int byteorder = 0;
409 int final = 0;
410 Py_ssize_t consumed;
411 PyObject *decoded;
413 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
414 &data, &size, &errors, &final))
415 return NULL;
416 if (size < 0) {
417 PyErr_SetString(PyExc_ValueError, "negative argument");
418 return 0;
420 consumed = size; /* This is overwritten unless final is true. */
421 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
422 final ? NULL : &consumed);
423 if (decoded == NULL)
424 return NULL;
425 return codec_tuple(decoded, consumed);
428 static PyObject *
429 utf_32_le_decode(PyObject *self,
430 PyObject *args)
432 const char *data;
433 Py_ssize_t size;
434 const char *errors = NULL;
435 int byteorder = -1;
436 int final = 0;
437 Py_ssize_t consumed;
438 PyObject *decoded = NULL;
440 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
441 &data, &size, &errors, &final))
442 return NULL;
444 if (size < 0) {
445 PyErr_SetString(PyExc_ValueError, "negative argument");
446 return 0;
448 consumed = size; /* This is overwritten unless final is true. */
449 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
450 &byteorder, final ? NULL : &consumed);
451 if (decoded == NULL)
452 return NULL;
453 return codec_tuple(decoded, consumed);
457 static PyObject *
458 utf_32_be_decode(PyObject *self,
459 PyObject *args)
461 const char *data;
462 Py_ssize_t size;
463 const char *errors = NULL;
464 int byteorder = 1;
465 int final = 0;
466 Py_ssize_t consumed;
467 PyObject *decoded = NULL;
469 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
470 &data, &size, &errors, &final))
471 return NULL;
472 if (size < 0) {
473 PyErr_SetString(PyExc_ValueError, "negative argument");
474 return 0;
476 consumed = size; /* This is overwritten unless final is true. */
477 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
478 &byteorder, final ? NULL : &consumed);
479 if (decoded == NULL)
480 return NULL;
481 return codec_tuple(decoded, consumed);
484 /* This non-standard version also provides access to the byteorder
485 parameter of the builtin UTF-32 codec.
487 It returns a tuple (unicode, bytesread, byteorder) with byteorder
488 being the value in effect at the end of data.
492 static PyObject *
493 utf_32_ex_decode(PyObject *self,
494 PyObject *args)
496 const char *data;
497 Py_ssize_t size;
498 const char *errors = NULL;
499 int byteorder = 0;
500 PyObject *unicode, *tuple;
501 int final = 0;
502 Py_ssize_t consumed;
504 if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
505 &data, &size, &errors, &byteorder, &final))
506 return NULL;
507 if (size < 0) {
508 PyErr_SetString(PyExc_ValueError, "negative argument");
509 return 0;
511 consumed = size; /* This is overwritten unless final is true. */
512 unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
513 final ? NULL : &consumed);
514 if (unicode == NULL)
515 return NULL;
516 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
517 Py_DECREF(unicode);
518 return tuple;
521 static PyObject *
522 unicode_escape_decode(PyObject *self,
523 PyObject *args)
525 const char *data;
526 Py_ssize_t size;
527 const char *errors = NULL;
529 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
530 &data, &size, &errors))
531 return NULL;
533 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
534 size);
537 static PyObject *
538 raw_unicode_escape_decode(PyObject *self,
539 PyObject *args)
541 const char *data;
542 Py_ssize_t size;
543 const char *errors = NULL;
545 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
546 &data, &size, &errors))
547 return NULL;
549 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
550 size);
553 static PyObject *
554 latin_1_decode(PyObject *self,
555 PyObject *args)
557 const char *data;
558 Py_ssize_t size;
559 const char *errors = NULL;
561 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
562 &data, &size, &errors))
563 return NULL;
565 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
566 size);
569 static PyObject *
570 ascii_decode(PyObject *self,
571 PyObject *args)
573 const char *data;
574 Py_ssize_t size;
575 const char *errors = NULL;
577 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
578 &data, &size, &errors))
579 return NULL;
581 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
582 size);
585 static PyObject *
586 charmap_decode(PyObject *self,
587 PyObject *args)
589 const char *data;
590 Py_ssize_t size;
591 const char *errors = NULL;
592 PyObject *mapping = NULL;
594 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
595 &data, &size, &errors, &mapping))
596 return NULL;
597 if (mapping == Py_None)
598 mapping = NULL;
600 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
601 size);
604 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
606 static PyObject *
607 mbcs_decode(PyObject *self,
608 PyObject *args)
610 const char *data;
611 Py_ssize_t size, consumed;
612 const char *errors = NULL;
613 int final = 0;
614 PyObject *decoded;
616 if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
617 &data, &size, &errors, &final))
618 return NULL;
620 decoded = PyUnicode_DecodeMBCSStateful(
621 data, size, errors, final ? NULL : &consumed);
622 if (!decoded)
623 return NULL;
624 return codec_tuple(decoded, final ? size : consumed);
627 #endif /* MS_WINDOWS */
629 /* --- Encoder ------------------------------------------------------------ */
631 static PyObject *
632 readbuffer_encode(PyObject *self,
633 PyObject *args)
635 const char *data;
636 Py_ssize_t size;
637 const char *errors = NULL;
639 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
640 &data, &size, &errors))
641 return NULL;
643 return codec_tuple(PyString_FromStringAndSize(data, size),
644 size);
647 static PyObject *
648 charbuffer_encode(PyObject *self,
649 PyObject *args)
651 const char *data;
652 Py_ssize_t size;
653 const char *errors = NULL;
655 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
656 &data, &size, &errors))
657 return NULL;
659 return codec_tuple(PyString_FromStringAndSize(data, size),
660 size);
663 static PyObject *
664 unicode_internal_encode(PyObject *self,
665 PyObject *args)
667 PyObject *obj;
668 const char *errors = NULL;
669 const char *data;
670 Py_ssize_t size;
672 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
673 &obj, &errors))
674 return NULL;
676 if (PyUnicode_Check(obj)) {
677 data = PyUnicode_AS_DATA(obj);
678 size = PyUnicode_GET_DATA_SIZE(obj);
679 return codec_tuple(PyString_FromStringAndSize(data, size),
680 size);
682 else {
683 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
684 return NULL;
685 return codec_tuple(PyString_FromStringAndSize(data, size),
686 size);
690 static PyObject *
691 utf_7_encode(PyObject *self,
692 PyObject *args)
694 PyObject *str, *v;
695 const char *errors = NULL;
697 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
698 &str, &errors))
699 return NULL;
701 str = PyUnicode_FromObject(str);
702 if (str == NULL)
703 return NULL;
704 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
705 PyUnicode_GET_SIZE(str),
708 errors),
709 PyUnicode_GET_SIZE(str));
710 Py_DECREF(str);
711 return v;
714 static PyObject *
715 utf_8_encode(PyObject *self,
716 PyObject *args)
718 PyObject *str, *v;
719 const char *errors = NULL;
721 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
722 &str, &errors))
723 return NULL;
725 str = PyUnicode_FromObject(str);
726 if (str == NULL)
727 return NULL;
728 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
729 PyUnicode_GET_SIZE(str),
730 errors),
731 PyUnicode_GET_SIZE(str));
732 Py_DECREF(str);
733 return v;
736 /* This version provides access to the byteorder parameter of the
737 builtin UTF-16 codecs as optional third argument. It defaults to 0
738 which means: use the native byte order and prepend the data with a
739 BOM mark.
743 static PyObject *
744 utf_16_encode(PyObject *self,
745 PyObject *args)
747 PyObject *str, *v;
748 const char *errors = NULL;
749 int byteorder = 0;
751 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
752 &str, &errors, &byteorder))
753 return NULL;
755 str = PyUnicode_FromObject(str);
756 if (str == NULL)
757 return NULL;
758 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
759 PyUnicode_GET_SIZE(str),
760 errors,
761 byteorder),
762 PyUnicode_GET_SIZE(str));
763 Py_DECREF(str);
764 return v;
767 static PyObject *
768 utf_16_le_encode(PyObject *self,
769 PyObject *args)
771 PyObject *str, *v;
772 const char *errors = NULL;
774 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
775 &str, &errors))
776 return NULL;
778 str = PyUnicode_FromObject(str);
779 if (str == NULL)
780 return NULL;
781 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
782 PyUnicode_GET_SIZE(str),
783 errors,
784 -1),
785 PyUnicode_GET_SIZE(str));
786 Py_DECREF(str);
787 return v;
790 static PyObject *
791 utf_16_be_encode(PyObject *self,
792 PyObject *args)
794 PyObject *str, *v;
795 const char *errors = NULL;
797 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
798 &str, &errors))
799 return NULL;
801 str = PyUnicode_FromObject(str);
802 if (str == NULL)
803 return NULL;
804 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
805 PyUnicode_GET_SIZE(str),
806 errors,
807 +1),
808 PyUnicode_GET_SIZE(str));
809 Py_DECREF(str);
810 return v;
813 /* This version provides access to the byteorder parameter of the
814 builtin UTF-32 codecs as optional third argument. It defaults to 0
815 which means: use the native byte order and prepend the data with a
816 BOM mark.
820 static PyObject *
821 utf_32_encode(PyObject *self,
822 PyObject *args)
824 PyObject *str, *v;
825 const char *errors = NULL;
826 int byteorder = 0;
828 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
829 &str, &errors, &byteorder))
830 return NULL;
832 str = PyUnicode_FromObject(str);
833 if (str == NULL)
834 return NULL;
835 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
836 PyUnicode_GET_SIZE(str),
837 errors,
838 byteorder),
839 PyUnicode_GET_SIZE(str));
840 Py_DECREF(str);
841 return v;
844 static PyObject *
845 utf_32_le_encode(PyObject *self,
846 PyObject *args)
848 PyObject *str, *v;
849 const char *errors = NULL;
851 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
852 &str, &errors))
853 return NULL;
855 str = PyUnicode_FromObject(str);
856 if (str == NULL)
857 return NULL;
858 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
859 PyUnicode_GET_SIZE(str),
860 errors,
861 -1),
862 PyUnicode_GET_SIZE(str));
863 Py_DECREF(str);
864 return v;
867 static PyObject *
868 utf_32_be_encode(PyObject *self,
869 PyObject *args)
871 PyObject *str, *v;
872 const char *errors = NULL;
874 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
875 &str, &errors))
876 return NULL;
878 str = PyUnicode_FromObject(str);
879 if (str == NULL)
880 return NULL;
881 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
882 PyUnicode_GET_SIZE(str),
883 errors,
884 +1),
885 PyUnicode_GET_SIZE(str));
886 Py_DECREF(str);
887 return v;
890 static PyObject *
891 unicode_escape_encode(PyObject *self,
892 PyObject *args)
894 PyObject *str, *v;
895 const char *errors = NULL;
897 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
898 &str, &errors))
899 return NULL;
901 str = PyUnicode_FromObject(str);
902 if (str == NULL)
903 return NULL;
904 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
905 PyUnicode_GET_SIZE(str)),
906 PyUnicode_GET_SIZE(str));
907 Py_DECREF(str);
908 return v;
911 static PyObject *
912 raw_unicode_escape_encode(PyObject *self,
913 PyObject *args)
915 PyObject *str, *v;
916 const char *errors = NULL;
918 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
919 &str, &errors))
920 return NULL;
922 str = PyUnicode_FromObject(str);
923 if (str == NULL)
924 return NULL;
925 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
926 PyUnicode_AS_UNICODE(str),
927 PyUnicode_GET_SIZE(str)),
928 PyUnicode_GET_SIZE(str));
929 Py_DECREF(str);
930 return v;
933 static PyObject *
934 latin_1_encode(PyObject *self,
935 PyObject *args)
937 PyObject *str, *v;
938 const char *errors = NULL;
940 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
941 &str, &errors))
942 return NULL;
944 str = PyUnicode_FromObject(str);
945 if (str == NULL)
946 return NULL;
947 v = codec_tuple(PyUnicode_EncodeLatin1(
948 PyUnicode_AS_UNICODE(str),
949 PyUnicode_GET_SIZE(str),
950 errors),
951 PyUnicode_GET_SIZE(str));
952 Py_DECREF(str);
953 return v;
956 static PyObject *
957 ascii_encode(PyObject *self,
958 PyObject *args)
960 PyObject *str, *v;
961 const char *errors = NULL;
963 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
964 &str, &errors))
965 return NULL;
967 str = PyUnicode_FromObject(str);
968 if (str == NULL)
969 return NULL;
970 v = codec_tuple(PyUnicode_EncodeASCII(
971 PyUnicode_AS_UNICODE(str),
972 PyUnicode_GET_SIZE(str),
973 errors),
974 PyUnicode_GET_SIZE(str));
975 Py_DECREF(str);
976 return v;
979 static PyObject *
980 charmap_encode(PyObject *self,
981 PyObject *args)
983 PyObject *str, *v;
984 const char *errors = NULL;
985 PyObject *mapping = NULL;
987 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
988 &str, &errors, &mapping))
989 return NULL;
990 if (mapping == Py_None)
991 mapping = NULL;
993 str = PyUnicode_FromObject(str);
994 if (str == NULL)
995 return NULL;
996 v = codec_tuple(PyUnicode_EncodeCharmap(
997 PyUnicode_AS_UNICODE(str),
998 PyUnicode_GET_SIZE(str),
999 mapping,
1000 errors),
1001 PyUnicode_GET_SIZE(str));
1002 Py_DECREF(str);
1003 return v;
1006 static PyObject*
1007 charmap_build(PyObject *self, PyObject *args)
1009 PyObject *map;
1010 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1011 return NULL;
1012 return PyUnicode_BuildEncodingMap(map);
1015 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1017 static PyObject *
1018 mbcs_encode(PyObject *self,
1019 PyObject *args)
1021 PyObject *str, *v;
1022 const char *errors = NULL;
1024 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
1025 &str, &errors))
1026 return NULL;
1028 str = PyUnicode_FromObject(str);
1029 if (str == NULL)
1030 return NULL;
1031 v = codec_tuple(PyUnicode_EncodeMBCS(
1032 PyUnicode_AS_UNICODE(str),
1033 PyUnicode_GET_SIZE(str),
1034 errors),
1035 PyUnicode_GET_SIZE(str));
1036 Py_DECREF(str);
1037 return v;
1040 #endif /* MS_WINDOWS */
1041 #endif /* Py_USING_UNICODE */
1043 /* --- Error handler registry --------------------------------------------- */
1045 PyDoc_STRVAR(register_error__doc__,
1046 "register_error(errors, handler)\n\
1048 Register the specified error handler under the name\n\
1049 errors. handler must be a callable object, that\n\
1050 will be called with an exception instance containing\n\
1051 information about the location of the encoding/decoding\n\
1052 error and must return a (replacement, new position) tuple.");
1054 static PyObject *register_error(PyObject *self, PyObject *args)
1056 const char *name;
1057 PyObject *handler;
1059 if (!PyArg_ParseTuple(args, "sO:register_error",
1060 &name, &handler))
1061 return NULL;
1062 if (PyCodec_RegisterError(name, handler))
1063 return NULL;
1064 Py_RETURN_NONE;
1067 PyDoc_STRVAR(lookup_error__doc__,
1068 "lookup_error(errors) -> handler\n\
1070 Return the error handler for the specified error handling name\n\
1071 or raise a LookupError, if no handler exists under this name.");
1073 static PyObject *lookup_error(PyObject *self, PyObject *args)
1075 const char *name;
1077 if (!PyArg_ParseTuple(args, "s:lookup_error",
1078 &name))
1079 return NULL;
1080 return PyCodec_LookupError(name);
1083 /* --- Module API --------------------------------------------------------- */
1085 static PyMethodDef _codecs_functions[] = {
1086 {"register", codec_register, METH_O,
1087 register__doc__},
1088 {"lookup", codec_lookup, METH_VARARGS,
1089 lookup__doc__},
1090 {"encode", codec_encode, METH_VARARGS,
1091 encode__doc__},
1092 {"decode", codec_decode, METH_VARARGS,
1093 decode__doc__},
1094 {"escape_encode", escape_encode, METH_VARARGS},
1095 {"escape_decode", escape_decode, METH_VARARGS},
1096 #ifdef Py_USING_UNICODE
1097 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1098 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1099 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1100 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1101 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1102 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1103 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1104 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1105 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1106 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1107 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1108 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1109 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1110 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1111 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1112 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1113 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1114 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1115 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1116 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1117 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1118 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1119 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1120 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1121 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1122 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1123 {"ascii_encode", ascii_encode, METH_VARARGS},
1124 {"ascii_decode", ascii_decode, METH_VARARGS},
1125 {"charmap_encode", charmap_encode, METH_VARARGS},
1126 {"charmap_decode", charmap_decode, METH_VARARGS},
1127 {"charmap_build", charmap_build, METH_VARARGS},
1128 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1129 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
1130 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1131 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1132 {"mbcs_decode", mbcs_decode, METH_VARARGS},
1133 #endif
1134 #endif /* Py_USING_UNICODE */
1135 {"register_error", register_error, METH_VARARGS,
1136 register_error__doc__},
1137 {"lookup_error", lookup_error, METH_VARARGS,
1138 lookup_error__doc__},
1139 {NULL, NULL} /* sentinel */
1142 PyMODINIT_FUNC
1143 init_codecs(void)
1145 Py_InitModule("_codecs", _codecs_functions);