Python/codecs.c

   1 /* ------------------------------------------------------------------------
   2
   3    Python Codec Registry and support functions
   4
   5 Written by Marc-Andre Lemburg (mal@lemburg.com).
   6
   7 Copyright (c) Corporation for National Research Initiatives.
   8
   9    ------------------------------------------------------------------------ */
  10
  11 #include "Python.h"
  12 #include <ctype.h>
  13
  14 /* --- Codec Registry ----------------------------------------------------- */
  15
  16 /* Import the standard encodings package which will register the first
  17    codec search function.
  18
  19    This is done in a lazy way so that the Unicode implementation does
  20    not downgrade startup time of scripts not needing it.
  21
  22    ImportErrors are silently ignored by this function. Only one try is
  23    made.
  24
  25 */
  26
  27 static int _PyCodecRegistry_Init(void); /* Forward */
  28
  29 int PyCodec_Register(PyObject *search_function)
  30 {
  31     PyInterpreterState *interp = PyThreadState_GET()->interp;
  32     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
  33         goto onError;
  34     if (search_function == NULL) {
  35         PyErr_BadArgument();
  36         goto onError;
  37     }
  38     if (!PyCallable_Check(search_function)) {
  39         PyErr_SetString(PyExc_TypeError, "argument must be callable");
  40         goto onError;
  41     }
  42     return PyList_Append(interp->codec_search_path, search_function);
  43
  44  onError:
  45     return -1;
  46 }
  47
  48 /* Convert a string to a normalized Python string: all characters are
  49    converted to lower case, spaces are replaced with underscores. */
  50
  51 static
  52 PyObject *normalizestring(const char *string)
  53 {
  54     register size_t i;
  55     size_t len = strlen(string);
  56     char *p;
  57     PyObject *v;
  58
  59         if (len > INT_MAX) {
  60                 PyErr_SetString(PyExc_OverflowError, "string is too large");
  61                 return NULL;
  62         }
  63
  64     v = PyString_FromStringAndSize(NULL, (int)len);
  65     if (v == NULL)
  66         return NULL;
  67     p = PyString_AS_STRING(v);
  68     for (i = 0; i < len; i++) {
  69         register char ch = string[i];
  70         if (ch == ' ')
  71             ch = '-';
  72         else
  73             ch = tolower(ch);
  74         p[i] = ch;
  75     }
  76     return v;
  77 }
  78
  79 /* Lookup the given encoding and return a tuple providing the codec
  80    facilities.
  81
  82    The encoding string is looked up converted to all lower-case
  83    characters. This makes encodings looked up through this mechanism
  84    effectively case-insensitive.
  85
  86    If no codec is found, a LookupError is set and NULL returned.
  87
  88    As side effect, this tries to load the encodings package, if not
  89    yet done. This is part of the lazy load strategy for the encodings
  90    package.
  91
  92 */
  93
  94 PyObject *_PyCodec_Lookup(const char *encoding)
  95 {
  96     PyInterpreterState *interp;
  97     PyObject *result, *args = NULL, *v;
  98     int i, len;
  99
 100     if (encoding == NULL) {
 101         PyErr_BadArgument();
 102         goto onError;
 103     }
 104
 105     interp = PyThreadState_GET()->interp;
 106     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 107         goto onError;
 108
 109     /* Convert the encoding to a normalized Python string: all
 110        characters are converted to lower case, spaces and hyphens are
 111        replaced with underscores. */
 112     v = normalizestring(encoding);
 113     if (v == NULL)
 114         goto onError;
 115     PyString_InternInPlace(&v);
 116
 117     /* First, try to lookup the name in the registry dictionary */
 118     result = PyDict_GetItem(interp->codec_search_cache, v);
 119     if (result != NULL) {
 120         Py_INCREF(result);
 121         Py_DECREF(v);
 122         return result;
 123     }
 124
 125     /* Next, scan the search functions in order of registration */
 126     args = PyTuple_New(1);
 127     if (args == NULL)
 128         goto onError;
 129     PyTuple_SET_ITEM(args,0,v);
 130
 131     len = PyList_Size(interp->codec_search_path);
 132     if (len < 0)
 133         goto onError;
 134     if (len == 0) {
 135         PyErr_SetString(PyExc_LookupError,
 136                         "no codec search functions registered: "
 137                         "can't find encoding");
 138         goto onError;
 139     }
 140
 141     for (i = 0; i < len; i++) {
 142         PyObject *func;
 143
 144         func = PyList_GetItem(interp->codec_search_path, i);
 145         if (func == NULL)
 146             goto onError;
 147         result = PyEval_CallObject(func, args);
 148         if (result == NULL)
 149             goto onError;
 150         if (result == Py_None) {
 151             Py_DECREF(result);
 152             continue;
 153         }
 154         if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
 155             PyErr_SetString(PyExc_TypeError,
 156                             "codec search functions must return 4-tuples");
 157             Py_DECREF(result);
 158             goto onError;
 159         }
 160         break;
 161     }
 162     if (i == len) {
 163         /* XXX Perhaps we should cache misses too ? */
 164         PyErr_Format(PyExc_LookupError,
 165                      "unknown encoding: %s", encoding);
 166         goto onError;
 167     }
 168
 169     /* Cache and return the result */
 170     PyDict_SetItem(interp->codec_search_cache, v, result);
 171     Py_DECREF(args);
 172     return result;
 173
 174  onError:
 175     Py_XDECREF(args);
 176     return NULL;
 177 }
 178
 179 static
 180 PyObject *args_tuple(PyObject *object,
 181                      const char *errors)
 182 {
 183     PyObject *args;
 184
 185     args = PyTuple_New(1 + (errors != NULL));
 186     if (args == NULL)
 187         return NULL;
 188     Py_INCREF(object);
 189     PyTuple_SET_ITEM(args,0,object);
 190     if (errors) {
 191         PyObject *v;
 192
 193         v = PyString_FromString(errors);
 194         if (v == NULL) {
 195             Py_DECREF(args);
 196             return NULL;
 197         }
 198         PyTuple_SET_ITEM(args, 1, v);
 199     }
 200     return args;
 201 }
 202
 203 /* Build a codec by calling factory(stream[,errors]) or just
 204    factory(errors) depending on whether the given parameters are
 205    non-NULL. */
 206
 207 static
 208 PyObject *build_stream_codec(PyObject *factory,
 209                              PyObject *stream,
 210                              const char *errors)
 211 {
 212     PyObject *args, *codec;
 213
 214     args = args_tuple(stream, errors);
 215     if (args == NULL)
 216         return NULL;
 217
 218     codec = PyEval_CallObject(factory, args);
 219     Py_DECREF(args);
 220     return codec;
 221 }
 222
 223 /* Convenience APIs to query the Codec registry.
 224
 225    All APIs return a codec object with incremented refcount.
 226
 227  */
 228
 229 PyObject *PyCodec_Encoder(const char *encoding)
 230 {
 231     PyObject *codecs;
 232     PyObject *v;
 233
 234     codecs = _PyCodec_Lookup(encoding);
 235     if (codecs == NULL)
 236         goto onError;
 237     v = PyTuple_GET_ITEM(codecs,0);
 238     Py_DECREF(codecs);
 239     Py_INCREF(v);
 240     return v;
 241
 242  onError:
 243     return NULL;
 244 }
 245
 246 PyObject *PyCodec_Decoder(const char *encoding)
 247 {
 248     PyObject *codecs;
 249     PyObject *v;
 250
 251     codecs = _PyCodec_Lookup(encoding);
 252     if (codecs == NULL)
 253         goto onError;
 254     v = PyTuple_GET_ITEM(codecs,1);
 255     Py_DECREF(codecs);
 256     Py_INCREF(v);
 257     return v;
 258
 259  onError:
 260     return NULL;
 261 }
 262
 263 PyObject *PyCodec_StreamReader(const char *encoding,
 264                                PyObject *stream,
 265                                const char *errors)
 266 {
 267     PyObject *codecs, *ret;
 268
 269     codecs = _PyCodec_Lookup(encoding);
 270     if (codecs == NULL)
 271         goto onError;
 272     ret = build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
 273     Py_DECREF(codecs);
 274     return ret;
 275
 276  onError:
 277     return NULL;
 278 }
 279
 280 PyObject *PyCodec_StreamWriter(const char *encoding,
 281                                PyObject *stream,
 282                                const char *errors)
 283 {
 284     PyObject *codecs, *ret;
 285
 286     codecs = _PyCodec_Lookup(encoding);
 287     if (codecs == NULL)
 288         goto onError;
 289     ret = build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
 290     Py_DECREF(codecs);
 291     return ret;
 292
 293  onError:
 294     return NULL;
 295 }
 296
 297 /* Encode an object (e.g. an Unicode object) using the given encoding
 298    and return the resulting encoded object (usually a Python string).
 299
 300    errors is passed to the encoder factory as argument if non-NULL. */
 301
 302 PyObject *PyCodec_Encode(PyObject *object,
 303                          const char *encoding,
 304                          const char *errors)
 305 {
 306     PyObject *encoder = NULL;
 307     PyObject *args = NULL, *result = NULL;
 308     PyObject *v;
 309
 310     encoder = PyCodec_Encoder(encoding);
 311     if (encoder == NULL)
 312         goto onError;
 313
 314     args = args_tuple(object, errors);
 315     if (args == NULL)
 316         goto onError;
 317
 318     result = PyEval_CallObject(encoder,args);
 319     if (result == NULL)
 320         goto onError;
 321
 322     if (!PyTuple_Check(result) ||
 323         PyTuple_GET_SIZE(result) != 2) {
 324         PyErr_SetString(PyExc_TypeError,
 325                         "encoder must return a tuple (object,integer)");
 326         goto onError;
 327     }
 328     v = PyTuple_GET_ITEM(result,0);
 329     Py_INCREF(v);
 330     /* We don't check or use the second (integer) entry. */
 331
 332     Py_DECREF(args);
 333     Py_DECREF(encoder);
 334     Py_DECREF(result);
 335     return v;
 336
 337  onError:
 338     Py_XDECREF(result);
 339     Py_XDECREF(args);
 340     Py_XDECREF(encoder);
 341     return NULL;
 342 }
 343
 344 /* Decode an object (usually a Python string) using the given encoding
 345    and return an equivalent object (e.g. an Unicode object).
 346
 347    errors is passed to the decoder factory as argument if non-NULL. */
 348
 349 PyObject *PyCodec_Decode(PyObject *object,
 350                          const char *encoding,
 351                          const char *errors)
 352 {
 353     PyObject *decoder = NULL;
 354     PyObject *args = NULL, *result = NULL;
 355     PyObject *v;
 356
 357     decoder = PyCodec_Decoder(encoding);
 358     if (decoder == NULL)
 359         goto onError;
 360
 361     args = args_tuple(object, errors);
 362     if (args == NULL)
 363         goto onError;
 364
 365     result = PyEval_CallObject(decoder,args);
 366     if (result == NULL)
 367         goto onError;
 368     if (!PyTuple_Check(result) ||
 369         PyTuple_GET_SIZE(result) != 2) {
 370         PyErr_SetString(PyExc_TypeError,
 371                         "decoder must return a tuple (object,integer)");
 372         goto onError;
 373     }
 374     v = PyTuple_GET_ITEM(result,0);
 375     Py_INCREF(v);
 376     /* We don't check or use the second (integer) entry. */
 377
 378     Py_DECREF(args);
 379     Py_DECREF(decoder);
 380     Py_DECREF(result);
 381     return v;
 382
 383  onError:
 384     Py_XDECREF(args);
 385     Py_XDECREF(decoder);
 386     Py_XDECREF(result);
 387     return NULL;
 388 }
 389
 390 /* Register the error handling callback function error under the name
 391    name. This function will be called by the codec when it encounters
 392    an unencodable characters/undecodable bytes and doesn't know the
 393    callback name, when name is specified as the error parameter
 394    in the call to the encode/decode function.
 395    Return 0 on success, -1 on error */
 396 int PyCodec_RegisterError(const char *name, PyObject *error)
 397 {
 398     PyInterpreterState *interp = PyThreadState_GET()->interp;
 399     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 400         return -1;
 401     if (!PyCallable_Check(error)) {
 402         PyErr_SetString(PyExc_TypeError, "handler must be callable");
 403         return -1;
 404     }
 405     return PyDict_SetItemString(interp->codec_error_registry,
 406                                 (char *)name, error);
 407 }
 408
 409 /* Lookup the error handling callback function registered under the
 410    name error. As a special case NULL can be passed, in which case
 411    the error handling callback for strict encoding will be returned. */
 412 PyObject *PyCodec_LookupError(const char *name)
 413 {
 414     PyObject *handler = NULL;
 415
 416     PyInterpreterState *interp = PyThreadState_GET()->interp;
 417     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 418         return NULL;
 419
 420     if (name==NULL)
 421         name = "strict";
 422     handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
 423     if (!handler)
 424         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
 425     else
 426         Py_INCREF(handler);
 427     return handler;
 428 }
 429
 430 static void wrong_exception_type(PyObject *exc)
 431 {
 432     PyObject *type = PyObject_GetAttrString(exc, "__class__");
 433     if (type != NULL) {
 434         PyObject *name = PyObject_GetAttrString(type, "__name__");
 435         Py_DECREF(type);
 436         if (name != NULL) {
 437             PyObject *string = PyObject_Str(name);
 438             Py_DECREF(name);
 439             if (string != NULL) {
 440                 PyErr_Format(PyExc_TypeError,
 441                     "don't know how to handle %.400s in error callback",
 442                     PyString_AS_STRING(string));
 443                 Py_DECREF(string);
 444             }
 445         }
 446     }
 447 }
 448
 449 PyObject *PyCodec_StrictErrors(PyObject *exc)
 450 {
 451     if (PyInstance_Check(exc))
 452         PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class,
 453             exc);
 454     else
 455         PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
 456     return NULL;
 457 }
 458
 459
 460 #ifdef Py_USING_UNICODE
 461 PyObject *PyCodec_IgnoreErrors(PyObject *exc)
 462 {
 463     int end;
 464     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 465         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 466             return NULL;
 467     }
 468     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 469         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 470             return NULL;
 471     }
 472     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 473         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 474             return NULL;
 475     }
 476     else {
 477         wrong_exception_type(exc);
 478         return NULL;
 479     }
 480     /* ouch: passing NULL, 0, pos gives None instead of u'' */
 481     return Py_BuildValue("(u#i)", &end, 0, end);
 482 }
 483
 484
 485 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
 486 {
 487     PyObject *restuple;
 488     int start;
 489     int end;
 490     int i;
 491
 492     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 493         PyObject *res;
 494         Py_UNICODE *p;
 495         if (PyUnicodeEncodeError_GetStart(exc, &start))
 496             return NULL;
 497         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 498             return NULL;
 499         res = PyUnicode_FromUnicode(NULL, end-start);
 500         if (res == NULL)
 501             return NULL;
 502         for (p = PyUnicode_AS_UNICODE(res), i = start;
 503             i<end; ++p, ++i)
 504             *p = '?';
 505         restuple = Py_BuildValue("(Oi)", res, end);
 506         Py_DECREF(res);
 507         return restuple;
 508     }
 509     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 510         Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
 511         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 512             return NULL;
 513         return Py_BuildValue("(u#i)", &res, 1, end);
 514     }
 515     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 516         PyObject *res;
 517         Py_UNICODE *p;
 518         if (PyUnicodeTranslateError_GetStart(exc, &start))
 519             return NULL;
 520         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 521             return NULL;
 522         res = PyUnicode_FromUnicode(NULL, end-start);
 523         if (res == NULL)
 524             return NULL;
 525         for (p = PyUnicode_AS_UNICODE(res), i = start;
 526             i<end; ++p, ++i)
 527             *p = Py_UNICODE_REPLACEMENT_CHARACTER;
 528         restuple = Py_BuildValue("(Oi)", res, end);
 529         Py_DECREF(res);
 530         return restuple;
 531     }
 532     else {
 533         wrong_exception_type(exc);
 534         return NULL;
 535     }
 536 }
 537
 538 PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
 539 {
 540     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 541         PyObject *restuple;
 542         PyObject *object;
 543         int start;
 544         int end;
 545         PyObject *res;
 546         Py_UNICODE *p;
 547         Py_UNICODE *startp;
 548         Py_UNICODE *outp;
 549         int ressize;
 550         if (PyUnicodeEncodeError_GetStart(exc, &start))
 551             return NULL;
 552         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 553             return NULL;
 554         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 555             return NULL;
 556         startp = PyUnicode_AS_UNICODE(object);
 557         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 558             if (*p<10)
 559                 ressize += 2+1+1;
 560             else if (*p<100)
 561                 ressize += 2+2+1;
 562             else if (*p<1000)
 563                 ressize += 2+3+1;
 564             else if (*p<10000)
 565                 ressize += 2+4+1;
 566 #ifndef Py_UNICODE_WIDE
 567             else
 568                 ressize += 2+5+1;
 569 #else
 570             else if (*p<100000)
 571                 ressize += 2+5+1;
 572             else if (*p<1000000)
 573                 ressize += 2+6+1;
 574             else
 575                 ressize += 2+7+1;
 576 #endif
 577         }
 578         /* allocate replacement */
 579         res = PyUnicode_FromUnicode(NULL, ressize);
 580         if (res == NULL) {
 581             Py_DECREF(object);
 582             return NULL;
 583         }
 584         /* generate replacement */
 585         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 586             p < startp+end; ++p) {
 587             Py_UNICODE c = *p;
 588             int digits;
 589             int base;
 590             *outp++ = '&';
 591             *outp++ = '#';
 592             if (*p<10) {
 593                 digits = 1;
 594                 base = 1;
 595             }
 596             else if (*p<100) {
 597                 digits = 2;
 598                 base = 10;
 599             }
 600             else if (*p<1000) {
 601                 digits = 3;
 602                 base = 100;
 603             }
 604             else if (*p<10000) {
 605                 digits = 4;
 606                 base = 1000;
 607             }
 608 #ifndef Py_UNICODE_WIDE
 609             else {
 610                 digits = 5;
 611                 base = 10000;
 612             }
 613 #else
 614             else if (*p<100000) {
 615                 digits = 5;
 616                 base = 10000;
 617             }
 618             else if (*p<1000000) {
 619                 digits = 6;
 620                 base = 100000;
 621             }
 622             else {
 623                 digits = 7;
 624                 base = 1000000;
 625             }
 626 #endif
 627             while (digits-->0) {
 628                 *outp++ = '0' + c/base;
 629                 c %= base;
 630                 base /= 10;
 631             }
 632             *outp++ = ';';
 633         }
 634         restuple = Py_BuildValue("(Oi)", res, end);
 635         Py_DECREF(res);
 636         Py_DECREF(object);
 637         return restuple;
 638     }
 639     else {
 640         wrong_exception_type(exc);
 641         return NULL;
 642     }
 643 }
 644
 645 static Py_UNICODE hexdigits[] = {
 646     '0', '1', '2', '3', '4', '5', '6', '7',
 647     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
 648 };
 649
 650 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 651 {
 652     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 653         PyObject *restuple;
 654         PyObject *object;
 655         int start;
 656         int end;
 657         PyObject *res;
 658         Py_UNICODE *p;
 659         Py_UNICODE *startp;
 660         Py_UNICODE *outp;
 661         int ressize;
 662         if (PyUnicodeEncodeError_GetStart(exc, &start))
 663             return NULL;
 664         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 665             return NULL;
 666         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 667             return NULL;
 668         startp = PyUnicode_AS_UNICODE(object);
 669         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 670 #ifdef Py_UNICODE_WIDE
 671             if (*p >= 0x00010000)
 672                 ressize += 1+1+8;
 673             else
 674 #endif
 675             if (*p >= 0x100) {
 676                 ressize += 1+1+4;
 677             }
 678             else
 679                 ressize += 1+1+2;
 680         }
 681         res = PyUnicode_FromUnicode(NULL, ressize);
 682         if (res==NULL)
 683             return NULL;
 684         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 685             p < startp+end; ++p) {
 686             Py_UNICODE c = *p;
 687             *outp++ = '\\';
 688 #ifdef Py_UNICODE_WIDE
 689             if (c >= 0x00010000) {
 690                 *outp++ = 'U';
 691                 *outp++ = hexdigits[(c>>28)&0xf];
 692                 *outp++ = hexdigits[(c>>24)&0xf];
 693                 *outp++ = hexdigits[(c>>20)&0xf];
 694                 *outp++ = hexdigits[(c>>16)&0xf];
 695                 *outp++ = hexdigits[(c>>12)&0xf];
 696                 *outp++ = hexdigits[(c>>8)&0xf];
 697             }
 698             else
 699 #endif
 700             if (c >= 0x100) {
 701                 *outp++ = 'u';
 702                 *outp++ = hexdigits[(c>>12)&0xf];
 703                 *outp++ = hexdigits[(c>>8)&0xf];
 704             }
 705             else
 706                 *outp++ = 'x';
 707             *outp++ = hexdigits[(c>>4)&0xf];
 708             *outp++ = hexdigits[c&0xf];
 709         }
 710
 711         restuple = Py_BuildValue("(Oi)", res, end);
 712         Py_DECREF(res);
 713         Py_DECREF(object);
 714         return restuple;
 715     }
 716     else {
 717         wrong_exception_type(exc);
 718         return NULL;
 719     }
 720 }
 721 #endif
 722
 723 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 724 {
 725     return PyCodec_StrictErrors(exc);
 726 }
 727
 728
 729 #ifdef Py_USING_UNICODE
 730 static PyObject *ignore_errors(PyObject *self, PyObject *exc)
 731 {
 732     return PyCodec_IgnoreErrors(exc);
 733 }
 734
 735
 736 static PyObject *replace_errors(PyObject *self, PyObject *exc)
 737 {
 738     return PyCodec_ReplaceErrors(exc);
 739 }
 740
 741
 742 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
 743 {
 744     return PyCodec_XMLCharRefReplaceErrors(exc);
 745 }
 746
 747
 748 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
 749 {
 750     return PyCodec_BackslashReplaceErrors(exc);
 751 }
 752 #endif
 753
 754 static int _PyCodecRegistry_Init(void)
 755 {
 756     static struct {
 757         char *name;
 758         PyMethodDef def;
 759     } methods[] =
 760     {
 761         {
 762             "strict",
 763             {
 764                 "strict_errors",
 765                 strict_errors,
 766                 METH_O
 767             }
 768         },
 769 #ifdef Py_USING_UNICODE
 770         {
 771             "ignore",
 772             {
 773                 "ignore_errors",
 774                 ignore_errors,
 775                 METH_O
 776             }
 777         },
 778         {
 779             "replace",
 780             {
 781                 "replace_errors",
 782                 replace_errors,
 783                 METH_O
 784             }
 785         },
 786         {
 787             "xmlcharrefreplace",
 788             {
 789                 "xmlcharrefreplace_errors",
 790                 xmlcharrefreplace_errors,
 791                 METH_O
 792             }
 793         },
 794         {
 795             "backslashreplace",
 796             {
 797                 "backslashreplace_errors",
 798                 backslashreplace_errors,
 799                 METH_O
 800             }
 801         }
 802 #endif
 803     };
 804
 805     PyInterpreterState *interp = PyThreadState_GET()->interp;
 806     PyObject *mod;
 807     unsigned i;
 808
 809     if (interp->codec_search_path != NULL)
 810         return 0;
 811
 812     interp->codec_search_path = PyList_New(0);
 813     interp->codec_search_cache = PyDict_New();
 814     interp->codec_error_registry = PyDict_New();
 815
 816     if (interp->codec_error_registry) {
 817         for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
 818             PyObject *func = PyCFunction_New(&methods[i].def, NULL);
 819             int res;
 820             if (!func)
 821                 Py_FatalError("can't initialize codec error registry");
 822             res = PyCodec_RegisterError(methods[i].name, func);
 823             Py_DECREF(func);
 824             if (res)
 825                 Py_FatalError("can't initialize codec error registry");
 826         }
 827     }
 828
 829     if (interp->codec_search_path == NULL ||
 830         interp->codec_search_cache == NULL ||
 831         interp->codec_error_registry == NULL)
 832         Py_FatalError("can't initialize codec registry");
 833
 834     mod = PyImport_ImportModuleEx("encodings", NULL, NULL, NULL);
 835     if (mod == NULL) {
 836         if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 837             /* Ignore ImportErrors... this is done so that
 838                distributions can disable the encodings package. Note
 839                that other errors are not masked, e.g. SystemErrors
 840                raised to inform the user of an error in the Python
 841                configuration are still reported back to the user. */
 842             PyErr_Clear();
 843             return 0;
 844         }
 845         return -1;
 846     }
 847     Py_DECREF(mod);
 848     return 0;
 849 }