Change to flush and close logic to fix #1760556.
[python.git] / Modules / _codecsmodule.c
blob0716f3a68d75b03feb4d5f9921c132cec752f8e3
1 /* ------------------------------------------------------------------------
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
9 The codec registry is accessible via:
11 register(search_function) -> None
13 lookup(encoding) -> CodecInfo object
15 The builtin Unicode codecs use the following interface:
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
23 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
27 These <encoding>s are available: utf_8, unicode_escape,
28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
32 Written by Marc-Andre Lemburg (mal@lemburg.com).
34 Copyright (c) Corporation for National Research Initiatives.
36 ------------------------------------------------------------------------ */
38 #define PY_SSIZE_T_CLEAN
39 #include "Python.h"
41 /* --- Registry ----------------------------------------------------------- */
43 PyDoc_STRVAR(register__doc__,
44 "register(search_function)\n\
45 \n\
46 Register a codec search function. Search functions are expected to take\n\
47 one argument, the encoding name in all lower case letters, and return\n\
48 a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49 (or a CodecInfo object).");
51 static
52 PyObject *codec_register(PyObject *self, PyObject *search_function)
54 if (PyCodec_Register(search_function))
55 return NULL;
57 Py_RETURN_NONE;
60 PyDoc_STRVAR(lookup__doc__,
61 "lookup(encoding) -> CodecInfo\n\
62 \n\
63 Looks up a codec tuple in the Python codec registry and returns\n\
64 a tuple of function (or a CodecInfo object).");
66 static
67 PyObject *codec_lookup(PyObject *self, PyObject *args)
69 char *encoding;
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
72 return NULL;
74 return _PyCodec_Lookup(encoding);
77 PyDoc_STRVAR(encode__doc__,
78 "encode(obj, [encoding[,errors]]) -> object\n\
79 \n\
80 Encodes obj using the codec registered for encoding. encoding defaults\n\
81 to the default encoding. errors may be given to set a different error\n\
82 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83 a ValueError. Other possible values are 'ignore', 'replace' and\n\
84 'xmlcharrefreplace' as well as any other name registered with\n\
85 codecs.register_error that can handle ValueErrors.");
87 static PyObject *
88 codec_encode(PyObject *self, PyObject *args)
90 const char *encoding = NULL;
91 const char *errors = NULL;
92 PyObject *v;
94 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
97 #ifdef Py_USING_UNICODE
98 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
100 #else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
105 #endif
107 /* Encode via the codec registry */
108 return PyCodec_Encode(v, encoding, errors);
111 PyDoc_STRVAR(decode__doc__,
112 "decode(obj, [encoding[,errors]]) -> object\n\
114 Decodes obj using the codec registered for encoding. encoding defaults\n\
115 to the default encoding. errors may be given to set a different error\n\
116 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117 a ValueError. Other possible values are 'ignore' and 'replace'\n\
118 as well as any other name registerd with codecs.register_error that is\n\
119 able to handle ValueErrors.");
121 static PyObject *
122 codec_decode(PyObject *self, PyObject *args)
124 const char *encoding = NULL;
125 const char *errors = NULL;
126 PyObject *v;
128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
131 #ifdef Py_USING_UNICODE
132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
134 #else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
139 #endif
141 /* Decode via the codec registry */
142 return PyCodec_Decode(v, encoding, errors);
145 /* --- Helpers ------------------------------------------------------------ */
147 static
148 PyObject *codec_tuple(PyObject *unicode,
149 Py_ssize_t len)
151 PyObject *v;
152 if (unicode == NULL)
153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
156 return v;
159 /* --- String codecs ------------------------------------------------------ */
160 static PyObject *
161 escape_decode(PyObject *self,
162 PyObject *args)
164 const char *errors = NULL;
165 const char *data;
166 Py_ssize_t size;
168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
172 size);
175 static PyObject *
176 escape_encode(PyObject *self,
177 PyObject *args)
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
182 Py_ssize_t len;
184 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
185 &PyString_Type, &str, &errors))
186 return NULL;
188 str = PyString_Repr(str, 0);
189 if (!str)
190 return NULL;
192 /* The string will be quoted. Unquote, similar to unicode-escape. */
193 buf = PyString_AS_STRING (str);
194 len = PyString_GET_SIZE (str);
195 memmove(buf, buf+1, len-2);
196 if (_PyString_Resize(&str, len-2) < 0)
197 return NULL;
199 return codec_tuple(str, PyString_Size(str));
202 #ifdef Py_USING_UNICODE
203 /* --- Decoder ------------------------------------------------------------ */
205 static PyObject *
206 unicode_internal_decode(PyObject *self,
207 PyObject *args)
209 PyObject *obj;
210 const char *errors = NULL;
211 const char *data;
212 Py_ssize_t size;
214 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
215 &obj, &errors))
216 return NULL;
218 if (PyUnicode_Check(obj)) {
219 Py_INCREF(obj);
220 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
222 else {
223 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
224 return NULL;
226 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
227 size);
231 static PyObject *
232 utf_7_decode(PyObject *self,
233 PyObject *args)
235 const char *data;
236 Py_ssize_t size;
237 const char *errors = NULL;
239 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
240 &data, &size, &errors))
241 return NULL;
243 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
244 size);
247 static PyObject *
248 utf_8_decode(PyObject *self,
249 PyObject *args)
251 const char *data;
252 Py_ssize_t size;
253 const char *errors = NULL;
254 int final = 0;
255 Py_ssize_t consumed;
256 PyObject *decoded = NULL;
258 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
259 &data, &size, &errors, &final))
260 return NULL;
261 if (size < 0) {
262 PyErr_SetString(PyExc_ValueError, "negative argument");
263 return 0;
265 consumed = size;
267 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
268 final ? NULL : &consumed);
269 if (decoded == NULL)
270 return NULL;
271 return codec_tuple(decoded, consumed);
274 static PyObject *
275 utf_16_decode(PyObject *self,
276 PyObject *args)
278 const char *data;
279 Py_ssize_t size;
280 const char *errors = NULL;
281 int byteorder = 0;
282 int final = 0;
283 Py_ssize_t consumed;
284 PyObject *decoded;
286 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
287 &data, &size, &errors, &final))
288 return NULL;
289 if (size < 0) {
290 PyErr_SetString(PyExc_ValueError, "negative argument");
291 return 0;
293 consumed = size; /* This is overwritten unless final is true. */
294 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
295 final ? NULL : &consumed);
296 if (decoded == NULL)
297 return NULL;
298 return codec_tuple(decoded, consumed);
301 static PyObject *
302 utf_16_le_decode(PyObject *self,
303 PyObject *args)
305 const char *data;
306 Py_ssize_t size;
307 const char *errors = NULL;
308 int byteorder = -1;
309 int final = 0;
310 Py_ssize_t consumed;
311 PyObject *decoded = NULL;
313 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
314 &data, &size, &errors, &final))
315 return NULL;
317 if (size < 0) {
318 PyErr_SetString(PyExc_ValueError, "negative argument");
319 return 0;
321 consumed = size; /* This is overwritten unless final is true. */
322 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
323 &byteorder, final ? NULL : &consumed);
324 if (decoded == NULL)
325 return NULL;
326 return codec_tuple(decoded, consumed);
330 static PyObject *
331 utf_16_be_decode(PyObject *self,
332 PyObject *args)
334 const char *data;
335 Py_ssize_t size;
336 const char *errors = NULL;
337 int byteorder = 1;
338 int final = 0;
339 Py_ssize_t consumed;
340 PyObject *decoded = NULL;
342 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
343 &data, &size, &errors, &final))
344 return NULL;
345 if (size < 0) {
346 PyErr_SetString(PyExc_ValueError, "negative argument");
347 return 0;
349 consumed = size; /* This is overwritten unless final is true. */
350 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
351 &byteorder, final ? NULL : &consumed);
352 if (decoded == NULL)
353 return NULL;
354 return codec_tuple(decoded, consumed);
357 /* This non-standard version also provides access to the byteorder
358 parameter of the builtin UTF-16 codec.
360 It returns a tuple (unicode, bytesread, byteorder) with byteorder
361 being the value in effect at the end of data.
365 static PyObject *
366 utf_16_ex_decode(PyObject *self,
367 PyObject *args)
369 const char *data;
370 Py_ssize_t size;
371 const char *errors = NULL;
372 int byteorder = 0;
373 PyObject *unicode, *tuple;
374 int final = 0;
375 Py_ssize_t consumed;
377 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
378 &data, &size, &errors, &byteorder, &final))
379 return NULL;
380 if (size < 0) {
381 PyErr_SetString(PyExc_ValueError, "negative argument");
382 return 0;
384 consumed = size; /* This is overwritten unless final is true. */
385 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
386 final ? NULL : &consumed);
387 if (unicode == NULL)
388 return NULL;
389 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
390 Py_DECREF(unicode);
391 return tuple;
394 static PyObject *
395 utf_32_decode(PyObject *self,
396 PyObject *args)
398 const char *data;
399 Py_ssize_t size;
400 const char *errors = NULL;
401 int byteorder = 0;
402 int final = 0;
403 Py_ssize_t consumed;
404 PyObject *decoded;
406 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_decode",
407 &data, &size, &errors, &final))
408 return NULL;
409 if (size < 0) {
410 PyErr_SetString(PyExc_ValueError, "negative argument");
411 return 0;
413 consumed = size; /* This is overwritten unless final is true. */
414 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
415 final ? NULL : &consumed);
416 if (decoded == NULL)
417 return NULL;
418 return codec_tuple(decoded, consumed);
421 static PyObject *
422 utf_32_le_decode(PyObject *self,
423 PyObject *args)
425 const char *data;
426 Py_ssize_t size;
427 const char *errors = NULL;
428 int byteorder = -1;
429 int final = 0;
430 Py_ssize_t consumed;
431 PyObject *decoded = NULL;
433 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_le_decode",
434 &data, &size, &errors, &final))
435 return NULL;
437 if (size < 0) {
438 PyErr_SetString(PyExc_ValueError, "negative argument");
439 return 0;
441 consumed = size; /* This is overwritten unless final is true. */
442 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
443 &byteorder, final ? NULL : &consumed);
444 if (decoded == NULL)
445 return NULL;
446 return codec_tuple(decoded, consumed);
450 static PyObject *
451 utf_32_be_decode(PyObject *self,
452 PyObject *args)
454 const char *data;
455 Py_ssize_t size;
456 const char *errors = NULL;
457 int byteorder = 1;
458 int final = 0;
459 Py_ssize_t consumed;
460 PyObject *decoded = NULL;
462 if (!PyArg_ParseTuple(args, "t#|zi:utf_32_be_decode",
463 &data, &size, &errors, &final))
464 return NULL;
465 if (size < 0) {
466 PyErr_SetString(PyExc_ValueError, "negative argument");
467 return 0;
469 consumed = size; /* This is overwritten unless final is true. */
470 decoded = PyUnicode_DecodeUTF32Stateful(data, size, errors,
471 &byteorder, final ? NULL : &consumed);
472 if (decoded == NULL)
473 return NULL;
474 return codec_tuple(decoded, consumed);
477 /* This non-standard version also provides access to the byteorder
478 parameter of the builtin UTF-32 codec.
480 It returns a tuple (unicode, bytesread, byteorder) with byteorder
481 being the value in effect at the end of data.
485 static PyObject *
486 utf_32_ex_decode(PyObject *self,
487 PyObject *args)
489 const char *data;
490 Py_ssize_t size;
491 const char *errors = NULL;
492 int byteorder = 0;
493 PyObject *unicode, *tuple;
494 int final = 0;
495 Py_ssize_t consumed;
497 if (!PyArg_ParseTuple(args, "t#|zii:utf_32_ex_decode",
498 &data, &size, &errors, &byteorder, &final))
499 return NULL;
500 if (size < 0) {
501 PyErr_SetString(PyExc_ValueError, "negative argument");
502 return 0;
504 consumed = size; /* This is overwritten unless final is true. */
505 unicode = PyUnicode_DecodeUTF32Stateful(data, size, errors, &byteorder,
506 final ? NULL : &consumed);
507 if (unicode == NULL)
508 return NULL;
509 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
510 Py_DECREF(unicode);
511 return tuple;
514 static PyObject *
515 unicode_escape_decode(PyObject *self,
516 PyObject *args)
518 const char *data;
519 Py_ssize_t size;
520 const char *errors = NULL;
522 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
523 &data, &size, &errors))
524 return NULL;
526 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
527 size);
530 static PyObject *
531 raw_unicode_escape_decode(PyObject *self,
532 PyObject *args)
534 const char *data;
535 Py_ssize_t size;
536 const char *errors = NULL;
538 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
539 &data, &size, &errors))
540 return NULL;
542 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
543 size);
546 static PyObject *
547 latin_1_decode(PyObject *self,
548 PyObject *args)
550 const char *data;
551 Py_ssize_t size;
552 const char *errors = NULL;
554 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
555 &data, &size, &errors))
556 return NULL;
558 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
559 size);
562 static PyObject *
563 ascii_decode(PyObject *self,
564 PyObject *args)
566 const char *data;
567 Py_ssize_t size;
568 const char *errors = NULL;
570 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
571 &data, &size, &errors))
572 return NULL;
574 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
575 size);
578 static PyObject *
579 charmap_decode(PyObject *self,
580 PyObject *args)
582 const char *data;
583 Py_ssize_t size;
584 const char *errors = NULL;
585 PyObject *mapping = NULL;
587 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
588 &data, &size, &errors, &mapping))
589 return NULL;
590 if (mapping == Py_None)
591 mapping = NULL;
593 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
594 size);
597 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
599 static PyObject *
600 mbcs_decode(PyObject *self,
601 PyObject *args)
603 const char *data;
604 Py_ssize_t size, consumed;
605 const char *errors = NULL;
606 int final = 0;
607 PyObject *decoded;
609 if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
610 &data, &size, &errors, &final))
611 return NULL;
613 decoded = PyUnicode_DecodeMBCSStateful(
614 data, size, errors, final ? NULL : &consumed);
615 if (!decoded)
616 return NULL;
617 return codec_tuple(decoded, final ? size : consumed);
620 #endif /* MS_WINDOWS */
622 /* --- Encoder ------------------------------------------------------------ */
624 static PyObject *
625 readbuffer_encode(PyObject *self,
626 PyObject *args)
628 const char *data;
629 Py_ssize_t size;
630 const char *errors = NULL;
632 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
633 &data, &size, &errors))
634 return NULL;
636 return codec_tuple(PyString_FromStringAndSize(data, size),
637 size);
640 static PyObject *
641 charbuffer_encode(PyObject *self,
642 PyObject *args)
644 const char *data;
645 Py_ssize_t size;
646 const char *errors = NULL;
648 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
649 &data, &size, &errors))
650 return NULL;
652 return codec_tuple(PyString_FromStringAndSize(data, size),
653 size);
656 static PyObject *
657 unicode_internal_encode(PyObject *self,
658 PyObject *args)
660 PyObject *obj;
661 const char *errors = NULL;
662 const char *data;
663 Py_ssize_t size;
665 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
666 &obj, &errors))
667 return NULL;
669 if (PyUnicode_Check(obj)) {
670 data = PyUnicode_AS_DATA(obj);
671 size = PyUnicode_GET_DATA_SIZE(obj);
672 return codec_tuple(PyString_FromStringAndSize(data, size),
673 size);
675 else {
676 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
677 return NULL;
678 return codec_tuple(PyString_FromStringAndSize(data, size),
679 size);
683 static PyObject *
684 utf_7_encode(PyObject *self,
685 PyObject *args)
687 PyObject *str, *v;
688 const char *errors = NULL;
690 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
691 &str, &errors))
692 return NULL;
694 str = PyUnicode_FromObject(str);
695 if (str == NULL)
696 return NULL;
697 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
698 PyUnicode_GET_SIZE(str),
701 errors),
702 PyUnicode_GET_SIZE(str));
703 Py_DECREF(str);
704 return v;
707 static PyObject *
708 utf_8_encode(PyObject *self,
709 PyObject *args)
711 PyObject *str, *v;
712 const char *errors = NULL;
714 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
715 &str, &errors))
716 return NULL;
718 str = PyUnicode_FromObject(str);
719 if (str == NULL)
720 return NULL;
721 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
722 PyUnicode_GET_SIZE(str),
723 errors),
724 PyUnicode_GET_SIZE(str));
725 Py_DECREF(str);
726 return v;
729 /* This version provides access to the byteorder parameter of the
730 builtin UTF-16 codecs as optional third argument. It defaults to 0
731 which means: use the native byte order and prepend the data with a
732 BOM mark.
736 static PyObject *
737 utf_16_encode(PyObject *self,
738 PyObject *args)
740 PyObject *str, *v;
741 const char *errors = NULL;
742 int byteorder = 0;
744 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
745 &str, &errors, &byteorder))
746 return NULL;
748 str = PyUnicode_FromObject(str);
749 if (str == NULL)
750 return NULL;
751 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
752 PyUnicode_GET_SIZE(str),
753 errors,
754 byteorder),
755 PyUnicode_GET_SIZE(str));
756 Py_DECREF(str);
757 return v;
760 static PyObject *
761 utf_16_le_encode(PyObject *self,
762 PyObject *args)
764 PyObject *str, *v;
765 const char *errors = NULL;
767 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
768 &str, &errors))
769 return NULL;
771 str = PyUnicode_FromObject(str);
772 if (str == NULL)
773 return NULL;
774 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
775 PyUnicode_GET_SIZE(str),
776 errors,
777 -1),
778 PyUnicode_GET_SIZE(str));
779 Py_DECREF(str);
780 return v;
783 static PyObject *
784 utf_16_be_encode(PyObject *self,
785 PyObject *args)
787 PyObject *str, *v;
788 const char *errors = NULL;
790 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
791 &str, &errors))
792 return NULL;
794 str = PyUnicode_FromObject(str);
795 if (str == NULL)
796 return NULL;
797 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
798 PyUnicode_GET_SIZE(str),
799 errors,
800 +1),
801 PyUnicode_GET_SIZE(str));
802 Py_DECREF(str);
803 return v;
806 /* This version provides access to the byteorder parameter of the
807 builtin UTF-32 codecs as optional third argument. It defaults to 0
808 which means: use the native byte order and prepend the data with a
809 BOM mark.
813 static PyObject *
814 utf_32_encode(PyObject *self,
815 PyObject *args)
817 PyObject *str, *v;
818 const char *errors = NULL;
819 int byteorder = 0;
821 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
822 &str, &errors, &byteorder))
823 return NULL;
825 str = PyUnicode_FromObject(str);
826 if (str == NULL)
827 return NULL;
828 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
829 PyUnicode_GET_SIZE(str),
830 errors,
831 byteorder),
832 PyUnicode_GET_SIZE(str));
833 Py_DECREF(str);
834 return v;
837 static PyObject *
838 utf_32_le_encode(PyObject *self,
839 PyObject *args)
841 PyObject *str, *v;
842 const char *errors = NULL;
844 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
845 &str, &errors))
846 return NULL;
848 str = PyUnicode_FromObject(str);
849 if (str == NULL)
850 return NULL;
851 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
852 PyUnicode_GET_SIZE(str),
853 errors,
854 -1),
855 PyUnicode_GET_SIZE(str));
856 Py_DECREF(str);
857 return v;
860 static PyObject *
861 utf_32_be_encode(PyObject *self,
862 PyObject *args)
864 PyObject *str, *v;
865 const char *errors = NULL;
867 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
868 &str, &errors))
869 return NULL;
871 str = PyUnicode_FromObject(str);
872 if (str == NULL)
873 return NULL;
874 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
875 PyUnicode_GET_SIZE(str),
876 errors,
877 +1),
878 PyUnicode_GET_SIZE(str));
879 Py_DECREF(str);
880 return v;
883 static PyObject *
884 unicode_escape_encode(PyObject *self,
885 PyObject *args)
887 PyObject *str, *v;
888 const char *errors = NULL;
890 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
891 &str, &errors))
892 return NULL;
894 str = PyUnicode_FromObject(str);
895 if (str == NULL)
896 return NULL;
897 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
898 PyUnicode_GET_SIZE(str)),
899 PyUnicode_GET_SIZE(str));
900 Py_DECREF(str);
901 return v;
904 static PyObject *
905 raw_unicode_escape_encode(PyObject *self,
906 PyObject *args)
908 PyObject *str, *v;
909 const char *errors = NULL;
911 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
912 &str, &errors))
913 return NULL;
915 str = PyUnicode_FromObject(str);
916 if (str == NULL)
917 return NULL;
918 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
919 PyUnicode_AS_UNICODE(str),
920 PyUnicode_GET_SIZE(str)),
921 PyUnicode_GET_SIZE(str));
922 Py_DECREF(str);
923 return v;
926 static PyObject *
927 latin_1_encode(PyObject *self,
928 PyObject *args)
930 PyObject *str, *v;
931 const char *errors = NULL;
933 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
934 &str, &errors))
935 return NULL;
937 str = PyUnicode_FromObject(str);
938 if (str == NULL)
939 return NULL;
940 v = codec_tuple(PyUnicode_EncodeLatin1(
941 PyUnicode_AS_UNICODE(str),
942 PyUnicode_GET_SIZE(str),
943 errors),
944 PyUnicode_GET_SIZE(str));
945 Py_DECREF(str);
946 return v;
949 static PyObject *
950 ascii_encode(PyObject *self,
951 PyObject *args)
953 PyObject *str, *v;
954 const char *errors = NULL;
956 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
957 &str, &errors))
958 return NULL;
960 str = PyUnicode_FromObject(str);
961 if (str == NULL)
962 return NULL;
963 v = codec_tuple(PyUnicode_EncodeASCII(
964 PyUnicode_AS_UNICODE(str),
965 PyUnicode_GET_SIZE(str),
966 errors),
967 PyUnicode_GET_SIZE(str));
968 Py_DECREF(str);
969 return v;
972 static PyObject *
973 charmap_encode(PyObject *self,
974 PyObject *args)
976 PyObject *str, *v;
977 const char *errors = NULL;
978 PyObject *mapping = NULL;
980 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
981 &str, &errors, &mapping))
982 return NULL;
983 if (mapping == Py_None)
984 mapping = NULL;
986 str = PyUnicode_FromObject(str);
987 if (str == NULL)
988 return NULL;
989 v = codec_tuple(PyUnicode_EncodeCharmap(
990 PyUnicode_AS_UNICODE(str),
991 PyUnicode_GET_SIZE(str),
992 mapping,
993 errors),
994 PyUnicode_GET_SIZE(str));
995 Py_DECREF(str);
996 return v;
999 static PyObject*
1000 charmap_build(PyObject *self, PyObject *args)
1002 PyObject *map;
1003 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
1004 return NULL;
1005 return PyUnicode_BuildEncodingMap(map);
1008 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1010 static PyObject *
1011 mbcs_encode(PyObject *self,
1012 PyObject *args)
1014 PyObject *str, *v;
1015 const char *errors = NULL;
1017 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
1018 &str, &errors))
1019 return NULL;
1021 str = PyUnicode_FromObject(str);
1022 if (str == NULL)
1023 return NULL;
1024 v = codec_tuple(PyUnicode_EncodeMBCS(
1025 PyUnicode_AS_UNICODE(str),
1026 PyUnicode_GET_SIZE(str),
1027 errors),
1028 PyUnicode_GET_SIZE(str));
1029 Py_DECREF(str);
1030 return v;
1033 #endif /* MS_WINDOWS */
1034 #endif /* Py_USING_UNICODE */
1036 /* --- Error handler registry --------------------------------------------- */
1038 PyDoc_STRVAR(register_error__doc__,
1039 "register_error(errors, handler)\n\
1041 Register the specified error handler under the name\n\
1042 errors. handler must be a callable object, that\n\
1043 will be called with an exception instance containing\n\
1044 information about the location of the encoding/decoding\n\
1045 error and must return a (replacement, new position) tuple.");
1047 static PyObject *register_error(PyObject *self, PyObject *args)
1049 const char *name;
1050 PyObject *handler;
1052 if (!PyArg_ParseTuple(args, "sO:register_error",
1053 &name, &handler))
1054 return NULL;
1055 if (PyCodec_RegisterError(name, handler))
1056 return NULL;
1057 Py_RETURN_NONE;
1060 PyDoc_STRVAR(lookup_error__doc__,
1061 "lookup_error(errors) -> handler\n\
1063 Return the error handler for the specified error handling name\n\
1064 or raise a LookupError, if no handler exists under this name.");
1066 static PyObject *lookup_error(PyObject *self, PyObject *args)
1068 const char *name;
1070 if (!PyArg_ParseTuple(args, "s:lookup_error",
1071 &name))
1072 return NULL;
1073 return PyCodec_LookupError(name);
1076 /* --- Module API --------------------------------------------------------- */
1078 static PyMethodDef _codecs_functions[] = {
1079 {"register", codec_register, METH_O,
1080 register__doc__},
1081 {"lookup", codec_lookup, METH_VARARGS,
1082 lookup__doc__},
1083 {"encode", codec_encode, METH_VARARGS,
1084 encode__doc__},
1085 {"decode", codec_decode, METH_VARARGS,
1086 decode__doc__},
1087 {"escape_encode", escape_encode, METH_VARARGS},
1088 {"escape_decode", escape_decode, METH_VARARGS},
1089 #ifdef Py_USING_UNICODE
1090 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1091 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1092 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1093 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1094 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1095 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1096 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1097 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1098 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1099 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1100 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1101 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1102 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1103 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1104 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1105 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1106 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1107 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1108 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1109 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1110 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1111 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1112 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1113 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1114 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1115 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1116 {"ascii_encode", ascii_encode, METH_VARARGS},
1117 {"ascii_decode", ascii_decode, METH_VARARGS},
1118 {"charmap_encode", charmap_encode, METH_VARARGS},
1119 {"charmap_decode", charmap_decode, METH_VARARGS},
1120 {"charmap_build", charmap_build, METH_VARARGS},
1121 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1122 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
1123 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1124 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1125 {"mbcs_decode", mbcs_decode, METH_VARARGS},
1126 #endif
1127 #endif /* Py_USING_UNICODE */
1128 {"register_error", register_error, METH_VARARGS,
1129 register_error__doc__},
1130 {"lookup_error", lookup_error, METH_VARARGS,
1131 lookup_error__doc__},
1132 {NULL, NULL} /* sentinel */
1135 PyMODINIT_FUNC
1136 init_codecs(void)
1138 Py_InitModule("_codecs", _codecs_functions);