Modules/_codecsmodule.c

   1 /* ------------------------------------------------------------------------
   2
   3    _codecs -- Provides access to the codec registry and the builtin
   4               codecs.
   5
   6    This module should never be imported directly. The standard library
   7    module "codecs" wraps this builtin module for use within Python.
   8
   9    The codec registry is accessible via:
  10
  11      register(search_function) -> None
  12
  13      lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
  14
  15    The builtin Unicode codecs use the following interface:
  16
  17      <encoding>_encode(Unicode_object[,errors='strict']) ->
  18         (string object, bytes consumed)
  19
  20      <encoding>_decode(char_buffer_obj[,errors='strict']) ->
  21         (Unicode object, bytes consumed)
  22
  23    <encoding>_encode() interfaces also accept non-Unicode object as
  24    input. The objects are then converted to Unicode using
  25    PyUnicode_FromObject() prior to applying the conversion.
  26
  27    These <encoding>s are available: utf_8, unicode_escape,
  28    raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
  29    mbcs (on win32).
  30
  31
  32 Written by Marc-Andre Lemburg (mal@lemburg.com).
  33
  34 Copyright (c) Corporation for National Research Initiatives.
  35
  36    ------------------------------------------------------------------------ */
  37
  38 #define PY_SSIZE_T_CLEAN
  39 #include "Python.h"
  40
  41 /* --- Registry ----------------------------------------------------------- */
  42
  43 PyDoc_STRVAR(register__doc__,
  44 "register(search_function)\n\
  45 \n\
  46 Register a codec search function. Search functions are expected to take\n\
  47 one argument, the encoding name in all lower case letters, and return\n\
  48 a tuple of functions (encoder, decoder, stream_reader, stream_writer).");
  49
  50 static
  51 PyObject *codec_register(PyObject *self, PyObject *search_function)
  52 {
  53     if (PyCodec_Register(search_function))
  54         return NULL;
  55
  56     Py_RETURN_NONE;
  57 }
  58
  59 PyDoc_STRVAR(lookup__doc__,
  60 "lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)\n\
  61 \n\
  62 Looks up a codec tuple in the Python codec registry and returns\n\
  63 a tuple of functions.");
  64
  65 static
  66 PyObject *codec_lookup(PyObject *self, PyObject *args)
  67 {
  68     char *encoding;
  69
  70     if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
  71         return NULL;
  72
  73     return _PyCodec_Lookup(encoding);
  74 }
  75
  76 PyDoc_STRVAR(encode__doc__,
  77 "encode(obj, [encoding[,errors]]) -> object\n\
  78 \n\
  79 Encodes obj using the codec registered for encoding. encoding defaults\n\
  80 to the default encoding. errors may be given to set a different error\n\
  81 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
  82 a ValueError. Other possible values are 'ignore', 'replace' and\n\
  83 'xmlcharrefreplace' as well as any other name registered with\n\
  84 codecs.register_error that can handle ValueErrors.");
  85
  86 static PyObject *
  87 codec_encode(PyObject *self, PyObject *args)
  88 {
  89     const char *encoding = NULL;
  90     const char *errors = NULL;
  91     PyObject *v;
  92
  93     if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
  94         return NULL;
  95
  96 #ifdef Py_USING_UNICODE
  97     if (encoding == NULL)
  98         encoding = PyUnicode_GetDefaultEncoding();
  99 #else
 100     if (encoding == NULL) {
 101         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 102         return NULL;
 103     }
 104 #endif
 105
 106     /* Encode via the codec registry */
 107     return PyCodec_Encode(v, encoding, errors);
 108 }
 109
 110 PyDoc_STRVAR(decode__doc__,
 111 "decode(obj, [encoding[,errors]]) -> object\n\
 112 \n\
 113 Decodes obj using the codec registered for encoding. encoding defaults\n\
 114 to the default encoding. errors may be given to set a different error\n\
 115 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
 116 a ValueError. Other possible values are 'ignore' and 'replace'\n\
 117 as well as any other name registerd with codecs.register_error that is\n\
 118 able to handle ValueErrors.");
 119
 120 static PyObject *
 121 codec_decode(PyObject *self, PyObject *args)
 122 {
 123     const char *encoding = NULL;
 124     const char *errors = NULL;
 125     PyObject *v;
 126
 127     if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
 128         return NULL;
 129
 130 #ifdef Py_USING_UNICODE
 131     if (encoding == NULL)
 132         encoding = PyUnicode_GetDefaultEncoding();
 133 #else
 134     if (encoding == NULL) {
 135         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 136         return NULL;
 137     }
 138 #endif
 139
 140     /* Decode via the codec registry */
 141     return PyCodec_Decode(v, encoding, errors);
 142 }
 143
 144 /* --- Helpers ------------------------------------------------------------ */
 145
 146 static
 147 PyObject *codec_tuple(PyObject *unicode,
 148                       Py_ssize_t len)
 149 {
 150     PyObject *v;
 151     if (unicode == NULL)
 152         return NULL;
 153     v = Py_BuildValue("On", unicode, len);
 154     Py_DECREF(unicode);
 155     return v;
 156 }
 157
 158 /* --- String codecs ------------------------------------------------------ */
 159 static PyObject *
 160 escape_decode(PyObject *self,
 161               PyObject *args)
 162 {
 163     const char *errors = NULL;
 164     const char *data;
 165     Py_ssize_t size;
 166
 167     if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
 168                           &data, &size, &errors))
 169         return NULL;
 170     return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
 171                        size);
 172 }
 173
 174 static PyObject *
 175 escape_encode(PyObject *self,
 176               PyObject *args)
 177 {
 178         PyObject *str;
 179         const char *errors = NULL;
 180         char *buf;
 181         Py_ssize_t len;
 182
 183         if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
 184                               &PyString_Type, &str, &errors))
 185                 return NULL;
 186
 187         str = PyString_Repr(str, 0);
 188         if (!str)
 189                 return NULL;
 190
 191         /* The string will be quoted. Unquote, similar to unicode-escape. */
 192         buf = PyString_AS_STRING (str);
 193         len = PyString_GET_SIZE (str);
 194         memmove(buf, buf+1, len-2);
 195         _PyString_Resize(&str, len-2);
 196
 197         return codec_tuple(str, PyString_Size(str));
 198 }
 199
 200 #ifdef Py_USING_UNICODE
 201 /* --- Decoder ------------------------------------------------------------ */
 202
 203 static PyObject *
 204 unicode_internal_decode(PyObject *self,
 205                         PyObject *args)
 206 {
 207     PyObject *obj;
 208     const char *errors = NULL;
 209     const char *data;
 210     Py_ssize_t size;
 211
 212     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
 213                           &obj, &errors))
 214         return NULL;
 215
 216     if (PyUnicode_Check(obj)) {
 217         Py_INCREF(obj);
 218         return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
 219     }
 220     else {
 221         if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 222             return NULL;
 223
 224         return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
 225                            size);
 226     }
 227 }
 228
 229 static PyObject *
 230 utf_7_decode(PyObject *self,
 231             PyObject *args)
 232 {
 233     const char *data;
 234     Py_ssize_t size;
 235     const char *errors = NULL;
 236
 237     if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
 238                           &data, &size, &errors))
 239         return NULL;
 240
 241     return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
 242                        size);
 243 }
 244
 245 static PyObject *
 246 utf_8_decode(PyObject *self,
 247             PyObject *args)
 248 {
 249     const char *data;
 250     Py_ssize_t size;
 251     const char *errors = NULL;
 252     int final = 0;
 253     Py_ssize_t consumed;
 254     PyObject *decoded = NULL;
 255
 256     if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
 257                           &data, &size, &errors, &final))
 258         return NULL;
 259     if (size < 0) {
 260             PyErr_SetString(PyExc_ValueError, "negative argument");
 261             return 0;
 262     }
 263     consumed = size;
 264
 265     decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
 266                                            final ? NULL : &consumed);
 267     if (decoded == NULL)
 268         return NULL;
 269     return codec_tuple(decoded, consumed);
 270 }
 271
 272 static PyObject *
 273 utf_16_decode(PyObject *self,
 274             PyObject *args)
 275 {
 276     const char *data;
 277     Py_ssize_t size;
 278     const char *errors = NULL;
 279     int byteorder = 0;
 280     int final = 0;
 281     Py_ssize_t consumed;
 282     PyObject *decoded;
 283
 284     if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
 285                           &data, &size, &errors, &final))
 286         return NULL;
 287     if (size < 0) {
 288             PyErr_SetString(PyExc_ValueError, "negative argument");
 289             return 0;
 290     }
 291     consumed = size; /* This is overwritten unless final is true. */
 292     decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
 293                                             final ? NULL : &consumed);
 294     if (decoded == NULL)
 295         return NULL;
 296     return codec_tuple(decoded, consumed);
 297 }
 298
 299 static PyObject *
 300 utf_16_le_decode(PyObject *self,
 301                  PyObject *args)
 302 {
 303     const char *data;
 304     Py_ssize_t size;
 305     const char *errors = NULL;
 306     int byteorder = -1;
 307     int final = 0;
 308     Py_ssize_t consumed;
 309     PyObject *decoded = NULL;
 310
 311     if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
 312                           &data, &size, &errors, &final))
 313         return NULL;
 314
 315     if (size < 0) {
 316           PyErr_SetString(PyExc_ValueError, "negative argument");
 317           return 0;
 318     }
 319     consumed = size; /* This is overwritten unless final is true. */
 320     decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
 321         &byteorder, final ? NULL : &consumed);
 322     if (decoded == NULL)
 323         return NULL;
 324     return codec_tuple(decoded, consumed);
 325
 326 }
 327
 328 static PyObject *
 329 utf_16_be_decode(PyObject *self,
 330                  PyObject *args)
 331 {
 332     const char *data;
 333     Py_ssize_t size;
 334     const char *errors = NULL;
 335     int byteorder = 1;
 336     int final = 0;
 337     Py_ssize_t consumed;
 338     PyObject *decoded = NULL;
 339
 340     if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
 341                           &data, &size, &errors, &final))
 342         return NULL;
 343     if (size < 0) {
 344           PyErr_SetString(PyExc_ValueError, "negative argument");
 345           return 0;
 346     }
 347     consumed = size; /* This is overwritten unless final is true. */
 348     decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
 349         &byteorder, final ? NULL : &consumed);
 350     if (decoded == NULL)
 351         return NULL;
 352     return codec_tuple(decoded, consumed);
 353 }
 354
 355 /* This non-standard version also provides access to the byteorder
 356    parameter of the builtin UTF-16 codec.
 357
 358    It returns a tuple (unicode, bytesread, byteorder) with byteorder
 359    being the value in effect at the end of data.
 360
 361 */
 362
 363 static PyObject *
 364 utf_16_ex_decode(PyObject *self,
 365                  PyObject *args)
 366 {
 367     const char *data;
 368     Py_ssize_t size;
 369     const char *errors = NULL;
 370     int byteorder = 0;
 371     PyObject *unicode, *tuple;
 372     int final = 0;
 373     Py_ssize_t consumed;
 374
 375     if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
 376                           &data, &size, &errors, &byteorder, &final))
 377         return NULL;
 378     if (size < 0) {
 379             PyErr_SetString(PyExc_ValueError, "negative argument");
 380             return 0;
 381     }
 382     consumed = size; /* This is overwritten unless final is true. */
 383     unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
 384                                             final ? NULL : &consumed);
 385     if (unicode == NULL)
 386         return NULL;
 387     tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
 388     Py_DECREF(unicode);
 389     return tuple;
 390 }
 391
 392 static PyObject *
 393 unicode_escape_decode(PyObject *self,
 394                      PyObject *args)
 395 {
 396     const char *data;
 397     Py_ssize_t size;
 398     const char *errors = NULL;
 399
 400     if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
 401                           &data, &size, &errors))
 402         return NULL;
 403
 404     return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
 405                        size);
 406 }
 407
 408 static PyObject *
 409 raw_unicode_escape_decode(PyObject *self,
 410                         PyObject *args)
 411 {
 412     const char *data;
 413     Py_ssize_t size;
 414     const char *errors = NULL;
 415
 416     if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
 417                           &data, &size, &errors))
 418         return NULL;
 419
 420     return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
 421                        size);
 422 }
 423
 424 static PyObject *
 425 latin_1_decode(PyObject *self,
 426                PyObject *args)
 427 {
 428     const char *data;
 429     Py_ssize_t size;
 430     const char *errors = NULL;
 431
 432     if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
 433                           &data, &size, &errors))
 434         return NULL;
 435
 436     return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
 437                        size);
 438 }
 439
 440 static PyObject *
 441 ascii_decode(PyObject *self,
 442              PyObject *args)
 443 {
 444     const char *data;
 445     Py_ssize_t size;
 446     const char *errors = NULL;
 447
 448     if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
 449                           &data, &size, &errors))
 450         return NULL;
 451
 452     return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
 453                        size);
 454 }
 455
 456 static PyObject *
 457 charmap_decode(PyObject *self,
 458                PyObject *args)
 459 {
 460     const char *data;
 461     Py_ssize_t size;
 462     const char *errors = NULL;
 463     PyObject *mapping = NULL;
 464
 465     if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
 466                           &data, &size, &errors, &mapping))
 467         return NULL;
 468     if (mapping == Py_None)
 469         mapping = NULL;
 470
 471     return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
 472                        size);
 473 }
 474
 475 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
 476
 477 static PyObject *
 478 mbcs_decode(PyObject *self,
 479             PyObject *args)
 480 {
 481     const char *data;
 482     Py_ssize_t size, consumed;
 483     const char *errors = NULL;
 484     int final = 1;
 485     PyObject *decoded;
 486
 487     if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
 488                           &data, &size, &errors, &final))
 489         return NULL;
 490
 491     decoded = PyUnicode_DecodeMBCSStateful(
 492         data, size, errors, final ? NULL : &consumed);
 493     if (!decoded)
 494         return NULL;
 495     return codec_tuple(decoded, final ? size : consumed);
 496 }
 497
 498 #endif /* MS_WINDOWS */
 499
 500 /* --- Encoder ------------------------------------------------------------ */
 501
 502 static PyObject *
 503 readbuffer_encode(PyObject *self,
 504                   PyObject *args)
 505 {
 506     const char *data;
 507     Py_ssize_t size;
 508     const char *errors = NULL;
 509
 510     if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
 511                           &data, &size, &errors))
 512         return NULL;
 513
 514     return codec_tuple(PyString_FromStringAndSize(data, size),
 515                        size);
 516 }
 517
 518 static PyObject *
 519 charbuffer_encode(PyObject *self,
 520                   PyObject *args)
 521 {
 522     const char *data;
 523     Py_ssize_t size;
 524     const char *errors = NULL;
 525
 526     if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
 527                           &data, &size, &errors))
 528         return NULL;
 529
 530     return codec_tuple(PyString_FromStringAndSize(data, size),
 531                        size);
 532 }
 533
 534 static PyObject *
 535 unicode_internal_encode(PyObject *self,
 536                         PyObject *args)
 537 {
 538     PyObject *obj;
 539     const char *errors = NULL;
 540     const char *data;
 541     Py_ssize_t size;
 542
 543     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
 544                           &obj, &errors))
 545         return NULL;
 546
 547     if (PyUnicode_Check(obj)) {
 548         data = PyUnicode_AS_DATA(obj);
 549         size = PyUnicode_GET_DATA_SIZE(obj);
 550         return codec_tuple(PyString_FromStringAndSize(data, size),
 551                            size);
 552     }
 553     else {
 554         if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
 555             return NULL;
 556         return codec_tuple(PyString_FromStringAndSize(data, size),
 557                            size);
 558     }
 559 }
 560
 561 static PyObject *
 562 utf_7_encode(PyObject *self,
 563             PyObject *args)
 564 {
 565     PyObject *str, *v;
 566     const char *errors = NULL;
 567
 568     if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
 569                           &str, &errors))
 570         return NULL;
 571
 572     str = PyUnicode_FromObject(str);
 573     if (str == NULL)
 574         return NULL;
 575     v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
 576                                          PyUnicode_GET_SIZE(str),
 577                                          0,
 578                                          0,
 579                                          errors),
 580                     PyUnicode_GET_SIZE(str));
 581     Py_DECREF(str);
 582     return v;
 583 }
 584
 585 static PyObject *
 586 utf_8_encode(PyObject *self,
 587             PyObject *args)
 588 {
 589     PyObject *str, *v;
 590     const char *errors = NULL;
 591
 592     if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
 593                           &str, &errors))
 594         return NULL;
 595
 596     str = PyUnicode_FromObject(str);
 597     if (str == NULL)
 598         return NULL;
 599     v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
 600                                          PyUnicode_GET_SIZE(str),
 601                                          errors),
 602                     PyUnicode_GET_SIZE(str));
 603     Py_DECREF(str);
 604     return v;
 605 }
 606
 607 /* This version provides access to the byteorder parameter of the
 608    builtin UTF-16 codecs as optional third argument. It defaults to 0
 609    which means: use the native byte order and prepend the data with a
 610    BOM mark.
 611
 612 */
 613
 614 static PyObject *
 615 utf_16_encode(PyObject *self,
 616             PyObject *args)
 617 {
 618     PyObject *str, *v;
 619     const char *errors = NULL;
 620     int byteorder = 0;
 621
 622     if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
 623                           &str, &errors, &byteorder))
 624         return NULL;
 625
 626     str = PyUnicode_FromObject(str);
 627     if (str == NULL)
 628         return NULL;
 629     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 630                                           PyUnicode_GET_SIZE(str),
 631                                           errors,
 632                                           byteorder),
 633                     PyUnicode_GET_SIZE(str));
 634     Py_DECREF(str);
 635     return v;
 636 }
 637
 638 static PyObject *
 639 utf_16_le_encode(PyObject *self,
 640                  PyObject *args)
 641 {
 642     PyObject *str, *v;
 643     const char *errors = NULL;
 644
 645     if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
 646                           &str, &errors))
 647         return NULL;
 648
 649     str = PyUnicode_FromObject(str);
 650     if (str == NULL)
 651         return NULL;
 652     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 653                                              PyUnicode_GET_SIZE(str),
 654                                              errors,
 655                                              -1),
 656                        PyUnicode_GET_SIZE(str));
 657     Py_DECREF(str);
 658     return v;
 659 }
 660
 661 static PyObject *
 662 utf_16_be_encode(PyObject *self,
 663                  PyObject *args)
 664 {
 665     PyObject *str, *v;
 666     const char *errors = NULL;
 667
 668     if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
 669                           &str, &errors))
 670         return NULL;
 671
 672     str = PyUnicode_FromObject(str);
 673     if (str == NULL)
 674         return NULL;
 675     v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
 676                                           PyUnicode_GET_SIZE(str),
 677                                           errors,
 678                                           +1),
 679                     PyUnicode_GET_SIZE(str));
 680     Py_DECREF(str);
 681     return v;
 682 }
 683
 684 static PyObject *
 685 unicode_escape_encode(PyObject *self,
 686                      PyObject *args)
 687 {
 688     PyObject *str, *v;
 689     const char *errors = NULL;
 690
 691     if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
 692                           &str, &errors))
 693         return NULL;
 694
 695     str = PyUnicode_FromObject(str);
 696     if (str == NULL)
 697         return NULL;
 698     v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
 699                                                   PyUnicode_GET_SIZE(str)),
 700                     PyUnicode_GET_SIZE(str));
 701     Py_DECREF(str);
 702     return v;
 703 }
 704
 705 static PyObject *
 706 raw_unicode_escape_encode(PyObject *self,
 707                         PyObject *args)
 708 {
 709     PyObject *str, *v;
 710     const char *errors = NULL;
 711
 712     if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
 713                           &str, &errors))
 714         return NULL;
 715
 716     str = PyUnicode_FromObject(str);
 717     if (str == NULL)
 718         return NULL;
 719     v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
 720                                PyUnicode_AS_UNICODE(str),
 721                                PyUnicode_GET_SIZE(str)),
 722                     PyUnicode_GET_SIZE(str));
 723     Py_DECREF(str);
 724     return v;
 725 }
 726
 727 static PyObject *
 728 latin_1_encode(PyObject *self,
 729                PyObject *args)
 730 {
 731     PyObject *str, *v;
 732     const char *errors = NULL;
 733
 734     if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
 735                           &str, &errors))
 736         return NULL;
 737
 738     str = PyUnicode_FromObject(str);
 739     if (str == NULL)
 740         return NULL;
 741     v = codec_tuple(PyUnicode_EncodeLatin1(
 742                                PyUnicode_AS_UNICODE(str),
 743                                PyUnicode_GET_SIZE(str),
 744                                errors),
 745                     PyUnicode_GET_SIZE(str));
 746     Py_DECREF(str);
 747     return v;
 748 }
 749
 750 static PyObject *
 751 ascii_encode(PyObject *self,
 752              PyObject *args)
 753 {
 754     PyObject *str, *v;
 755     const char *errors = NULL;
 756
 757     if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
 758                           &str, &errors))
 759         return NULL;
 760
 761     str = PyUnicode_FromObject(str);
 762     if (str == NULL)
 763         return NULL;
 764     v = codec_tuple(PyUnicode_EncodeASCII(
 765                                PyUnicode_AS_UNICODE(str),
 766                                PyUnicode_GET_SIZE(str),
 767                                errors),
 768                     PyUnicode_GET_SIZE(str));
 769     Py_DECREF(str);
 770     return v;
 771 }
 772
 773 static PyObject *
 774 charmap_encode(PyObject *self,
 775              PyObject *args)
 776 {
 777     PyObject *str, *v;
 778     const char *errors = NULL;
 779     PyObject *mapping = NULL;
 780
 781     if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
 782                           &str, &errors, &mapping))
 783         return NULL;
 784     if (mapping == Py_None)
 785         mapping = NULL;
 786
 787     str = PyUnicode_FromObject(str);
 788     if (str == NULL)
 789         return NULL;
 790     v = codec_tuple(PyUnicode_EncodeCharmap(
 791                                PyUnicode_AS_UNICODE(str),
 792                                PyUnicode_GET_SIZE(str),
 793                                mapping,
 794                                errors),
 795                     PyUnicode_GET_SIZE(str));
 796     Py_DECREF(str);
 797     return v;
 798 }
 799
 800 static PyObject*
 801 charmap_build(PyObject *self, PyObject *args)
 802 {
 803     PyObject *map;
 804     if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
 805         return NULL;
 806     return PyUnicode_BuildEncodingMap(map);
 807 }
 808
 809 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
 810
 811 static PyObject *
 812 mbcs_encode(PyObject *self,
 813             PyObject *args)
 814 {
 815     PyObject *str, *v;
 816     const char *errors = NULL;
 817
 818     if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
 819                           &str, &errors))
 820         return NULL;
 821
 822     str = PyUnicode_FromObject(str);
 823     if (str == NULL)
 824         return NULL;
 825     v = codec_tuple(PyUnicode_EncodeMBCS(
 826                                PyUnicode_AS_UNICODE(str),
 827                                PyUnicode_GET_SIZE(str),
 828                                errors),
 829                     PyUnicode_GET_SIZE(str));
 830     Py_DECREF(str);
 831     return v;
 832 }
 833
 834 #endif /* MS_WINDOWS */
 835 #endif /* Py_USING_UNICODE */
 836
 837 /* --- Error handler registry --------------------------------------------- */
 838
 839 PyDoc_STRVAR(register_error__doc__,
 840 "register_error(errors, handler)\n\
 841 \n\
 842 Register the specified error handler under the name\n\
 843 errors. handler must be a callable object, that\n\
 844 will be called with an exception instance containing\n\
 845 information about the location of the encoding/decoding\n\
 846 error and must return a (replacement, new position) tuple.");
 847
 848 static PyObject *register_error(PyObject *self, PyObject *args)
 849 {
 850     const char *name;
 851     PyObject *handler;
 852
 853     if (!PyArg_ParseTuple(args, "sO:register_error",
 854                           &name, &handler))
 855         return NULL;
 856     if (PyCodec_RegisterError(name, handler))
 857         return NULL;
 858     Py_RETURN_NONE;
 859 }
 860
 861 PyDoc_STRVAR(lookup_error__doc__,
 862 "lookup_error(errors) -> handler\n\
 863 \n\
 864 Return the error handler for the specified error handling name\n\
 865 or raise a LookupError, if no handler exists under this name.");
 866
 867 static PyObject *lookup_error(PyObject *self, PyObject *args)
 868 {
 869     const char *name;
 870
 871     if (!PyArg_ParseTuple(args, "s:lookup_error",
 872                           &name))
 873         return NULL;
 874     return PyCodec_LookupError(name);
 875 }
 876
 877 /* --- Module API --------------------------------------------------------- */
 878
 879 static PyMethodDef _codecs_functions[] = {
 880     {"register",                codec_register,                 METH_O,
 881         register__doc__},
 882     {"lookup",                  codec_lookup,                   METH_VARARGS,
 883         lookup__doc__},
 884     {"encode",                  codec_encode,                   METH_VARARGS,
 885         encode__doc__},
 886     {"decode",                  codec_decode,                   METH_VARARGS,
 887         decode__doc__},
 888     {"escape_encode",           escape_encode,                  METH_VARARGS},
 889     {"escape_decode",           escape_decode,                  METH_VARARGS},
 890 #ifdef Py_USING_UNICODE
 891     {"utf_8_encode",            utf_8_encode,                   METH_VARARGS},
 892     {"utf_8_decode",            utf_8_decode,                   METH_VARARGS},
 893     {"utf_7_encode",            utf_7_encode,                   METH_VARARGS},
 894     {"utf_7_decode",            utf_7_decode,                   METH_VARARGS},
 895     {"utf_16_encode",           utf_16_encode,                  METH_VARARGS},
 896     {"utf_16_le_encode",        utf_16_le_encode,               METH_VARARGS},
 897     {"utf_16_be_encode",        utf_16_be_encode,               METH_VARARGS},
 898     {"utf_16_decode",           utf_16_decode,                  METH_VARARGS},
 899     {"utf_16_le_decode",        utf_16_le_decode,               METH_VARARGS},
 900     {"utf_16_be_decode",        utf_16_be_decode,               METH_VARARGS},
 901     {"utf_16_ex_decode",        utf_16_ex_decode,               METH_VARARGS},
 902     {"unicode_escape_encode",   unicode_escape_encode,          METH_VARARGS},
 903     {"unicode_escape_decode",   unicode_escape_decode,          METH_VARARGS},
 904     {"unicode_internal_encode", unicode_internal_encode,        METH_VARARGS},
 905     {"unicode_internal_decode", unicode_internal_decode,        METH_VARARGS},
 906     {"raw_unicode_escape_encode", raw_unicode_escape_encode,    METH_VARARGS},
 907     {"raw_unicode_escape_decode", raw_unicode_escape_decode,    METH_VARARGS},
 908     {"latin_1_encode",          latin_1_encode,                 METH_VARARGS},
 909     {"latin_1_decode",          latin_1_decode,                 METH_VARARGS},
 910     {"ascii_encode",            ascii_encode,                   METH_VARARGS},
 911     {"ascii_decode",            ascii_decode,                   METH_VARARGS},
 912     {"charmap_encode",          charmap_encode,                 METH_VARARGS},
 913     {"charmap_decode",          charmap_decode,                 METH_VARARGS},
 914     {"charmap_build",           charmap_build,                  METH_VARARGS},
 915     {"readbuffer_encode",       readbuffer_encode,              METH_VARARGS},
 916     {"charbuffer_encode",       charbuffer_encode,              METH_VARARGS},
 917 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
 918     {"mbcs_encode",             mbcs_encode,                    METH_VARARGS},
 919     {"mbcs_decode",             mbcs_decode,                    METH_VARARGS},
 920 #endif
 921 #endif /* Py_USING_UNICODE */
 922     {"register_error",          register_error,                 METH_VARARGS,
 923         register_error__doc__},
 924     {"lookup_error",            lookup_error,                   METH_VARARGS,
 925         lookup_error__doc__},
 926     {NULL, NULL}                /* sentinel */
 927 };
 928
 929 PyMODINIT_FUNC
 930 init_codecs(void)
 931 {
 932     Py_InitModule("_codecs", _codecs_functions);
 933 }