Note in the intro to Extending... that ctypes can be a simpler, more portable solutio...
[python.git] / Modules / _codecsmodule.c
blob495e4ff53a46a8ea4ed9730fe3b085831c59305d
1 /* ------------------------------------------------------------------------
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
9 The codec registry is accessible via:
11 register(search_function) -> None
13 lookup(encoding) -> CodecInfo object
15 The builtin Unicode codecs use the following interface:
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
23 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
27 These <encoding>s are available: utf_8, unicode_escape,
28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
32 Written by Marc-Andre Lemburg (mal@lemburg.com).
34 Copyright (c) Corporation for National Research Initiatives.
36 ------------------------------------------------------------------------ */
38 #define PY_SSIZE_T_CLEAN
39 #include "Python.h"
41 /* --- Registry ----------------------------------------------------------- */
43 PyDoc_STRVAR(register__doc__,
44 "register(search_function)\n\
45 \n\
46 Register a codec search function. Search functions are expected to take\n\
47 one argument, the encoding name in all lower case letters, and return\n\
48 a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49 (or a CodecInfo object).");
51 static
52 PyObject *codec_register(PyObject *self, PyObject *search_function)
54 if (PyCodec_Register(search_function))
55 return NULL;
57 Py_RETURN_NONE;
60 PyDoc_STRVAR(lookup__doc__,
61 "lookup(encoding) -> CodecInfo\n\
62 \n\
63 Looks up a codec tuple in the Python codec registry and returns\n\
64 a CodecInfo object.");
66 static
67 PyObject *codec_lookup(PyObject *self, PyObject *args)
69 char *encoding;
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
72 return NULL;
74 return _PyCodec_Lookup(encoding);
77 PyDoc_STRVAR(encode__doc__,
78 "encode(obj, [encoding[,errors]]) -> object\n\
79 \n\
80 Encodes obj using the codec registered for encoding. encoding defaults\n\
81 to the default encoding. errors may be given to set a different error\n\
82 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83 a ValueError. Other possible values are 'ignore', 'replace' and\n\
84 'xmlcharrefreplace' as well as any other name registered with\n\
85 codecs.register_error that can handle ValueErrors.");
87 static PyObject *
88 codec_encode(PyObject *self, PyObject *args)
90 const char *encoding = NULL;
91 const char *errors = NULL;
92 PyObject *v;
94 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
97 #ifdef Py_USING_UNICODE
98 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
100 #else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
105 #endif
107 /* Encode via the codec registry */
108 return PyCodec_Encode(v, encoding, errors);
111 PyDoc_STRVAR(decode__doc__,
112 "decode(obj, [encoding[,errors]]) -> object\n\
114 Decodes obj using the codec registered for encoding. encoding defaults\n\
115 to the default encoding. errors may be given to set a different error\n\
116 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117 a ValueError. Other possible values are 'ignore' and 'replace'\n\
118 as well as any other name registered with codecs.register_error that is\n\
119 able to handle ValueErrors.");
121 static PyObject *
122 codec_decode(PyObject *self, PyObject *args)
124 const char *encoding = NULL;
125 const char *errors = NULL;
126 PyObject *v;
128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
131 #ifdef Py_USING_UNICODE
132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
134 #else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
139 #endif
141 /* Decode via the codec registry */
142 return PyCodec_Decode(v, encoding, errors);
145 /* --- Helpers ------------------------------------------------------------ */
147 static
148 PyObject *codec_tuple(PyObject *unicode,
149 Py_ssize_t len)
151 PyObject *v;
152 if (unicode == NULL)
153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
156 return v;
159 /* --- String codecs ------------------------------------------------------ */
160 static PyObject *
161 escape_decode(PyObject *self,
162 PyObject *args)
164 const char *errors = NULL;
165 const char *data;
166 Py_ssize_t size;
168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
172 size);
175 static PyObject *
176 escape_encode(PyObject *self,
177 PyObject *args)
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
182 Py_ssize_t len;
184 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
185 &PyString_Type, &str, &errors))
186 return NULL;
188 str = PyString_Repr(str, 0);
189 if (!str)
190 return NULL;
192 /* The string will be quoted. Unquote, similar to unicode-escape. */
193 buf = PyString_AS_STRING (str);
194 len = PyString_GET_SIZE (str);
195 memmove(buf, buf+1, len-2);
196 if (_PyString_Resize(&str, len-2) < 0)
197 return NULL;
199 return codec_tuple(str, PyString_Size(str));
202 #ifdef Py_USING_UNICODE
203 /* --- Decoder ------------------------------------------------------------ */
205 static PyObject *
206 unicode_internal_decode(PyObject *self,
207 PyObject *args)
209 PyObject *obj;
210 const char *errors = NULL;
211 const char *data;
212 Py_ssize_t size;
214 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
215 &obj, &errors))
216 return NULL;
218 if (PyUnicode_Check(obj)) {
219 Py_INCREF(obj);
220 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
222 else {
223 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
224 return NULL;
226 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
227 size);
231 static PyObject *
232 utf_7_decode(PyObject *self,
233 PyObject *args)
235 Py_buffer pbuf;
236 const char *errors = NULL;
237 int final = 0;
238 Py_ssize_t consumed;
239 PyObject *decoded = NULL;
241 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
242 &pbuf, &errors, &final))
243 return NULL;
244 consumed = pbuf.len;
246 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
247 final ? NULL : &consumed);
248 PyBuffer_Release(&pbuf);
249 if (decoded == NULL)
250 return NULL;
251 return codec_tuple(decoded, consumed);
254 static PyObject *
255 utf_8_decode(PyObject *self,
256 PyObject *args)
258 Py_buffer pbuf;
259 const char *errors = NULL;
260 int final = 0;
261 Py_ssize_t consumed;
262 PyObject *decoded = NULL;
264 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
265 &pbuf, &errors, &final))
266 return NULL;
267 consumed = pbuf.len;
269 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
270 final ? NULL : &consumed);
271 PyBuffer_Release(&pbuf);
272 if (decoded == NULL)
273 return NULL;
274 return codec_tuple(decoded, consumed);
277 static PyObject *
278 utf_16_decode(PyObject *self,
279 PyObject *args)
281 Py_buffer pbuf;
282 const char *errors = NULL;
283 int byteorder = 0;
284 int final = 0;
285 Py_ssize_t consumed;
286 PyObject *decoded;
288 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
289 &pbuf, &errors, &final))
290 return NULL;
291 consumed = pbuf.len; /* This is overwritten unless final is true. */
292 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
293 &byteorder, final ? NULL : &consumed);
294 PyBuffer_Release(&pbuf);
295 if (decoded == NULL)
296 return NULL;
297 return codec_tuple(decoded, consumed);
300 static PyObject *
301 utf_16_le_decode(PyObject *self,
302 PyObject *args)
304 Py_buffer pbuf;
305 const char *errors = NULL;
306 int byteorder = -1;
307 int final = 0;
308 Py_ssize_t consumed;
309 PyObject *decoded = NULL;
311 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
312 &pbuf, &errors, &final))
313 return NULL;
315 consumed = pbuf.len; /* This is overwritten unless final is true. */
316 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
317 &byteorder, final ? NULL : &consumed);
318 PyBuffer_Release(&pbuf);
319 if (decoded == NULL)
320 return NULL;
321 return codec_tuple(decoded, consumed);
324 static PyObject *
325 utf_16_be_decode(PyObject *self,
326 PyObject *args)
328 Py_buffer pbuf;
329 const char *errors = NULL;
330 int byteorder = 1;
331 int final = 0;
332 Py_ssize_t consumed;
333 PyObject *decoded = NULL;
335 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
336 &pbuf, &errors, &final))
337 return NULL;
339 consumed = pbuf.len; /* This is overwritten unless final is true. */
340 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
341 &byteorder, final ? NULL : &consumed);
342 PyBuffer_Release(&pbuf);
343 if (decoded == NULL)
344 return NULL;
345 return codec_tuple(decoded, consumed);
348 /* This non-standard version also provides access to the byteorder
349 parameter of the builtin UTF-16 codec.
351 It returns a tuple (unicode, bytesread, byteorder) with byteorder
352 being the value in effect at the end of data.
356 static PyObject *
357 utf_16_ex_decode(PyObject *self,
358 PyObject *args)
360 Py_buffer pbuf;
361 const char *errors = NULL;
362 int byteorder = 0;
363 PyObject *unicode, *tuple;
364 int final = 0;
365 Py_ssize_t consumed;
367 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
368 &pbuf, &errors, &byteorder, &final))
369 return NULL;
370 consumed = pbuf.len; /* This is overwritten unless final is true. */
371 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
372 &byteorder, final ? NULL : &consumed);
373 PyBuffer_Release(&pbuf);
374 if (unicode == NULL)
375 return NULL;
376 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
377 Py_DECREF(unicode);
378 return tuple;
381 static PyObject *
382 utf_32_decode(PyObject *self,
383 PyObject *args)
385 Py_buffer pbuf;
386 const char *errors = NULL;
387 int byteorder = 0;
388 int final = 0;
389 Py_ssize_t consumed;
390 PyObject *decoded;
392 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
393 &pbuf, &errors, &final))
394 return NULL;
395 consumed = pbuf.len; /* This is overwritten unless final is true. */
396 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
397 &byteorder, final ? NULL : &consumed);
398 PyBuffer_Release(&pbuf);
399 if (decoded == NULL)
400 return NULL;
401 return codec_tuple(decoded, consumed);
404 static PyObject *
405 utf_32_le_decode(PyObject *self,
406 PyObject *args)
408 Py_buffer pbuf;
409 const char *errors = NULL;
410 int byteorder = -1;
411 int final = 0;
412 Py_ssize_t consumed;
413 PyObject *decoded;
415 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
416 &pbuf, &errors, &final))
417 return NULL;
418 consumed = pbuf.len; /* This is overwritten unless final is true. */
419 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
420 &byteorder, final ? NULL : &consumed);
421 PyBuffer_Release(&pbuf);
422 if (decoded == NULL)
423 return NULL;
424 return codec_tuple(decoded, consumed);
427 static PyObject *
428 utf_32_be_decode(PyObject *self,
429 PyObject *args)
431 Py_buffer pbuf;
432 const char *errors = NULL;
433 int byteorder = 1;
434 int final = 0;
435 Py_ssize_t consumed;
436 PyObject *decoded;
438 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
439 &pbuf, &errors, &final))
440 return NULL;
441 consumed = pbuf.len; /* This is overwritten unless final is true. */
442 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
443 &byteorder, final ? NULL : &consumed);
444 PyBuffer_Release(&pbuf);
445 if (decoded == NULL)
446 return NULL;
447 return codec_tuple(decoded, consumed);
450 /* This non-standard version also provides access to the byteorder
451 parameter of the builtin UTF-32 codec.
453 It returns a tuple (unicode, bytesread, byteorder) with byteorder
454 being the value in effect at the end of data.
458 static PyObject *
459 utf_32_ex_decode(PyObject *self,
460 PyObject *args)
462 Py_buffer pbuf;
463 const char *errors = NULL;
464 int byteorder = 0;
465 PyObject *unicode, *tuple;
466 int final = 0;
467 Py_ssize_t consumed;
469 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
470 &pbuf, &errors, &byteorder, &final))
471 return NULL;
472 consumed = pbuf.len; /* This is overwritten unless final is true. */
473 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
474 &byteorder, final ? NULL : &consumed);
475 PyBuffer_Release(&pbuf);
476 if (unicode == NULL)
477 return NULL;
478 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
479 Py_DECREF(unicode);
480 return tuple;
483 static PyObject *
484 unicode_escape_decode(PyObject *self,
485 PyObject *args)
487 Py_buffer pbuf;
488 const char *errors = NULL;
489 PyObject *unicode;
491 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
492 &pbuf, &errors))
493 return NULL;
495 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
496 PyBuffer_Release(&pbuf);
497 return codec_tuple(unicode, pbuf.len);
500 static PyObject *
501 raw_unicode_escape_decode(PyObject *self,
502 PyObject *args)
504 Py_buffer pbuf;
505 const char *errors = NULL;
506 PyObject *unicode;
508 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
509 &pbuf, &errors))
510 return NULL;
512 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
513 PyBuffer_Release(&pbuf);
514 return codec_tuple(unicode, pbuf.len);
517 static PyObject *
518 latin_1_decode(PyObject *self,
519 PyObject *args)
521 Py_buffer pbuf;
522 PyObject *unicode;
523 const char *errors = NULL;
525 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
526 &pbuf, &errors))
527 return NULL;
529 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
530 PyBuffer_Release(&pbuf);
531 return codec_tuple(unicode, pbuf.len);
534 static PyObject *
535 ascii_decode(PyObject *self,
536 PyObject *args)
538 Py_buffer pbuf;
539 PyObject *unicode;
540 const char *errors = NULL;
542 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
543 &pbuf, &errors))
544 return NULL;
546 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
547 PyBuffer_Release(&pbuf);
548 return codec_tuple(unicode, pbuf.len);
551 static PyObject *
552 charmap_decode(PyObject *self,
553 PyObject *args)
555 Py_buffer pbuf;
556 PyObject *unicode;
557 const char *errors = NULL;
558 PyObject *mapping = NULL;
560 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
561 &pbuf, &errors, &mapping))
562 return NULL;
563 if (mapping == Py_None)
564 mapping = NULL;
566 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
567 PyBuffer_Release(&pbuf);
568 return codec_tuple(unicode, pbuf.len);
571 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
573 static PyObject *
574 mbcs_decode(PyObject *self,
575 PyObject *args)
577 Py_buffer pbuf;
578 const char *errors = NULL;
579 int final = 0;
580 Py_ssize_t consumed;
581 PyObject *decoded = NULL;
583 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
584 &pbuf, &errors, &final))
585 return NULL;
586 consumed = pbuf.len;
588 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
589 final ? NULL : &consumed);
590 PyBuffer_Release(&pbuf);
591 if (decoded == NULL)
592 return NULL;
593 return codec_tuple(decoded, consumed);
596 #endif /* MS_WINDOWS */
598 /* --- Encoder ------------------------------------------------------------ */
600 static PyObject *
601 readbuffer_encode(PyObject *self,
602 PyObject *args)
604 const char *data;
605 Py_ssize_t size;
606 const char *errors = NULL;
608 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
609 &data, &size, &errors))
610 return NULL;
612 return codec_tuple(PyString_FromStringAndSize(data, size),
613 size);
616 static PyObject *
617 charbuffer_encode(PyObject *self,
618 PyObject *args)
620 const char *data;
621 Py_ssize_t size;
622 const char *errors = NULL;
624 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
625 &data, &size, &errors))
626 return NULL;
628 return codec_tuple(PyString_FromStringAndSize(data, size),
629 size);
632 static PyObject *
633 unicode_internal_encode(PyObject *self,
634 PyObject *args)
636 PyObject *obj;
637 const char *errors = NULL;
638 const char *data;
639 Py_ssize_t size;
641 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
642 &obj, &errors))
643 return NULL;
645 if (PyUnicode_Check(obj)) {
646 data = PyUnicode_AS_DATA(obj);
647 size = PyUnicode_GET_DATA_SIZE(obj);
648 return codec_tuple(PyString_FromStringAndSize(data, size),
649 PyUnicode_GET_SIZE(obj));
651 else {
652 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
653 return NULL;
654 return codec_tuple(PyString_FromStringAndSize(data, size),
655 size);
659 static PyObject *
660 utf_7_encode(PyObject *self,
661 PyObject *args)
663 PyObject *str, *v;
664 const char *errors = NULL;
666 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
667 &str, &errors))
668 return NULL;
670 str = PyUnicode_FromObject(str);
671 if (str == NULL)
672 return NULL;
673 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
674 PyUnicode_GET_SIZE(str),
677 errors),
678 PyUnicode_GET_SIZE(str));
679 Py_DECREF(str);
680 return v;
683 static PyObject *
684 utf_8_encode(PyObject *self,
685 PyObject *args)
687 PyObject *str, *v;
688 const char *errors = NULL;
690 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
691 &str, &errors))
692 return NULL;
694 str = PyUnicode_FromObject(str);
695 if (str == NULL)
696 return NULL;
697 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
698 PyUnicode_GET_SIZE(str),
699 errors),
700 PyUnicode_GET_SIZE(str));
701 Py_DECREF(str);
702 return v;
705 /* This version provides access to the byteorder parameter of the
706 builtin UTF-16 codecs as optional third argument. It defaults to 0
707 which means: use the native byte order and prepend the data with a
708 BOM mark.
712 static PyObject *
713 utf_16_encode(PyObject *self,
714 PyObject *args)
716 PyObject *str, *v;
717 const char *errors = NULL;
718 int byteorder = 0;
720 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
721 &str, &errors, &byteorder))
722 return NULL;
724 str = PyUnicode_FromObject(str);
725 if (str == NULL)
726 return NULL;
727 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
728 PyUnicode_GET_SIZE(str),
729 errors,
730 byteorder),
731 PyUnicode_GET_SIZE(str));
732 Py_DECREF(str);
733 return v;
736 static PyObject *
737 utf_16_le_encode(PyObject *self,
738 PyObject *args)
740 PyObject *str, *v;
741 const char *errors = NULL;
743 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
744 &str, &errors))
745 return NULL;
747 str = PyUnicode_FromObject(str);
748 if (str == NULL)
749 return NULL;
750 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
751 PyUnicode_GET_SIZE(str),
752 errors,
753 -1),
754 PyUnicode_GET_SIZE(str));
755 Py_DECREF(str);
756 return v;
759 static PyObject *
760 utf_16_be_encode(PyObject *self,
761 PyObject *args)
763 PyObject *str, *v;
764 const char *errors = NULL;
766 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
767 &str, &errors))
768 return NULL;
770 str = PyUnicode_FromObject(str);
771 if (str == NULL)
772 return NULL;
773 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
774 PyUnicode_GET_SIZE(str),
775 errors,
776 +1),
777 PyUnicode_GET_SIZE(str));
778 Py_DECREF(str);
779 return v;
782 /* This version provides access to the byteorder parameter of the
783 builtin UTF-32 codecs as optional third argument. It defaults to 0
784 which means: use the native byte order and prepend the data with a
785 BOM mark.
789 static PyObject *
790 utf_32_encode(PyObject *self,
791 PyObject *args)
793 PyObject *str, *v;
794 const char *errors = NULL;
795 int byteorder = 0;
797 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
798 &str, &errors, &byteorder))
799 return NULL;
801 str = PyUnicode_FromObject(str);
802 if (str == NULL)
803 return NULL;
804 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
805 PyUnicode_GET_SIZE(str),
806 errors,
807 byteorder),
808 PyUnicode_GET_SIZE(str));
809 Py_DECREF(str);
810 return v;
813 static PyObject *
814 utf_32_le_encode(PyObject *self,
815 PyObject *args)
817 PyObject *str, *v;
818 const char *errors = NULL;
820 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
821 &str, &errors))
822 return NULL;
824 str = PyUnicode_FromObject(str);
825 if (str == NULL)
826 return NULL;
827 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
828 PyUnicode_GET_SIZE(str),
829 errors,
830 -1),
831 PyUnicode_GET_SIZE(str));
832 Py_DECREF(str);
833 return v;
836 static PyObject *
837 utf_32_be_encode(PyObject *self,
838 PyObject *args)
840 PyObject *str, *v;
841 const char *errors = NULL;
843 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
844 &str, &errors))
845 return NULL;
847 str = PyUnicode_FromObject(str);
848 if (str == NULL)
849 return NULL;
850 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
851 PyUnicode_GET_SIZE(str),
852 errors,
853 +1),
854 PyUnicode_GET_SIZE(str));
855 Py_DECREF(str);
856 return v;
859 static PyObject *
860 unicode_escape_encode(PyObject *self,
861 PyObject *args)
863 PyObject *str, *v;
864 const char *errors = NULL;
866 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
867 &str, &errors))
868 return NULL;
870 str = PyUnicode_FromObject(str);
871 if (str == NULL)
872 return NULL;
873 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
874 PyUnicode_GET_SIZE(str)),
875 PyUnicode_GET_SIZE(str));
876 Py_DECREF(str);
877 return v;
880 static PyObject *
881 raw_unicode_escape_encode(PyObject *self,
882 PyObject *args)
884 PyObject *str, *v;
885 const char *errors = NULL;
887 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
888 &str, &errors))
889 return NULL;
891 str = PyUnicode_FromObject(str);
892 if (str == NULL)
893 return NULL;
894 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
895 PyUnicode_AS_UNICODE(str),
896 PyUnicode_GET_SIZE(str)),
897 PyUnicode_GET_SIZE(str));
898 Py_DECREF(str);
899 return v;
902 static PyObject *
903 latin_1_encode(PyObject *self,
904 PyObject *args)
906 PyObject *str, *v;
907 const char *errors = NULL;
909 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
910 &str, &errors))
911 return NULL;
913 str = PyUnicode_FromObject(str);
914 if (str == NULL)
915 return NULL;
916 v = codec_tuple(PyUnicode_EncodeLatin1(
917 PyUnicode_AS_UNICODE(str),
918 PyUnicode_GET_SIZE(str),
919 errors),
920 PyUnicode_GET_SIZE(str));
921 Py_DECREF(str);
922 return v;
925 static PyObject *
926 ascii_encode(PyObject *self,
927 PyObject *args)
929 PyObject *str, *v;
930 const char *errors = NULL;
932 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
933 &str, &errors))
934 return NULL;
936 str = PyUnicode_FromObject(str);
937 if (str == NULL)
938 return NULL;
939 v = codec_tuple(PyUnicode_EncodeASCII(
940 PyUnicode_AS_UNICODE(str),
941 PyUnicode_GET_SIZE(str),
942 errors),
943 PyUnicode_GET_SIZE(str));
944 Py_DECREF(str);
945 return v;
948 static PyObject *
949 charmap_encode(PyObject *self,
950 PyObject *args)
952 PyObject *str, *v;
953 const char *errors = NULL;
954 PyObject *mapping = NULL;
956 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
957 &str, &errors, &mapping))
958 return NULL;
959 if (mapping == Py_None)
960 mapping = NULL;
962 str = PyUnicode_FromObject(str);
963 if (str == NULL)
964 return NULL;
965 v = codec_tuple(PyUnicode_EncodeCharmap(
966 PyUnicode_AS_UNICODE(str),
967 PyUnicode_GET_SIZE(str),
968 mapping,
969 errors),
970 PyUnicode_GET_SIZE(str));
971 Py_DECREF(str);
972 return v;
975 static PyObject*
976 charmap_build(PyObject *self, PyObject *args)
978 PyObject *map;
979 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
980 return NULL;
981 return PyUnicode_BuildEncodingMap(map);
984 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
986 static PyObject *
987 mbcs_encode(PyObject *self,
988 PyObject *args)
990 PyObject *str, *v;
991 const char *errors = NULL;
993 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
994 &str, &errors))
995 return NULL;
997 str = PyUnicode_FromObject(str);
998 if (str == NULL)
999 return NULL;
1000 v = codec_tuple(PyUnicode_EncodeMBCS(
1001 PyUnicode_AS_UNICODE(str),
1002 PyUnicode_GET_SIZE(str),
1003 errors),
1004 PyUnicode_GET_SIZE(str));
1005 Py_DECREF(str);
1006 return v;
1009 #endif /* MS_WINDOWS */
1010 #endif /* Py_USING_UNICODE */
1012 /* --- Error handler registry --------------------------------------------- */
1014 PyDoc_STRVAR(register_error__doc__,
1015 "register_error(errors, handler)\n\
1017 Register the specified error handler under the name\n\
1018 errors. handler must be a callable object, that\n\
1019 will be called with an exception instance containing\n\
1020 information about the location of the encoding/decoding\n\
1021 error and must return a (replacement, new position) tuple.");
1023 static PyObject *register_error(PyObject *self, PyObject *args)
1025 const char *name;
1026 PyObject *handler;
1028 if (!PyArg_ParseTuple(args, "sO:register_error",
1029 &name, &handler))
1030 return NULL;
1031 if (PyCodec_RegisterError(name, handler))
1032 return NULL;
1033 Py_RETURN_NONE;
1036 PyDoc_STRVAR(lookup_error__doc__,
1037 "lookup_error(errors) -> handler\n\
1039 Return the error handler for the specified error handling name\n\
1040 or raise a LookupError, if no handler exists under this name.");
1042 static PyObject *lookup_error(PyObject *self, PyObject *args)
1044 const char *name;
1046 if (!PyArg_ParseTuple(args, "s:lookup_error",
1047 &name))
1048 return NULL;
1049 return PyCodec_LookupError(name);
1052 /* --- Module API --------------------------------------------------------- */
1054 static PyMethodDef _codecs_functions[] = {
1055 {"register", codec_register, METH_O,
1056 register__doc__},
1057 {"lookup", codec_lookup, METH_VARARGS,
1058 lookup__doc__},
1059 {"encode", codec_encode, METH_VARARGS,
1060 encode__doc__},
1061 {"decode", codec_decode, METH_VARARGS,
1062 decode__doc__},
1063 {"escape_encode", escape_encode, METH_VARARGS},
1064 {"escape_decode", escape_decode, METH_VARARGS},
1065 #ifdef Py_USING_UNICODE
1066 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1067 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1068 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1069 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1070 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1071 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1072 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1073 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1074 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1075 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1076 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1077 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1078 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1079 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1080 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1081 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1082 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1083 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1084 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1085 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1086 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1087 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1088 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1089 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1090 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1091 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1092 {"ascii_encode", ascii_encode, METH_VARARGS},
1093 {"ascii_decode", ascii_decode, METH_VARARGS},
1094 {"charmap_encode", charmap_encode, METH_VARARGS},
1095 {"charmap_decode", charmap_decode, METH_VARARGS},
1096 {"charmap_build", charmap_build, METH_VARARGS},
1097 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1098 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
1099 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1100 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1101 {"mbcs_decode", mbcs_decode, METH_VARARGS},
1102 #endif
1103 #endif /* Py_USING_UNICODE */
1104 {"register_error", register_error, METH_VARARGS,
1105 register_error__doc__},
1106 {"lookup_error", lookup_error, METH_VARARGS,
1107 lookup_error__doc__},
1108 {NULL, NULL} /* sentinel */
1111 PyMODINIT_FUNC
1112 init_codecs(void)
1114 Py_InitModule("_codecs", _codecs_functions);