Python/codecs.c

   1 /* ------------------------------------------------------------------------
   2
   3    Python Codec Registry and support functions
   4
   5 Written by Marc-Andre Lemburg (mal@lemburg.com).
   6
   7 Copyright (c) Corporation for National Research Initiatives.
   8
   9    ------------------------------------------------------------------------ */
  10
  11 #include "Python.h"
  12 #include <ctype.h>
  13
  14 /* --- Codec Registry ----------------------------------------------------- */
  15
  16 /* Import the standard encodings package which will register the first
  17    codec search function.
  18
  19    This is done in a lazy way so that the Unicode implementation does
  20    not downgrade startup time of scripts not needing it.
  21
  22    ImportErrors are silently ignored by this function. Only one try is
  23    made.
  24
  25 */
  26
  27 static int _PyCodecRegistry_Init(void); /* Forward */
  28
  29 int PyCodec_Register(PyObject *search_function)
  30 {
  31     PyInterpreterState *interp = PyThreadState_GET()->interp;
  32     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
  33         goto onError;
  34     if (search_function == NULL) {
  35         PyErr_BadArgument();
  36         goto onError;
  37     }
  38     if (!PyCallable_Check(search_function)) {
  39         PyErr_SetString(PyExc_TypeError, "argument must be callable");
  40         goto onError;
  41     }
  42     return PyList_Append(interp->codec_search_path, search_function);
  43
  44  onError:
  45     return -1;
  46 }
  47
  48 /* Convert a string to a normalized Python string: all characters are
  49    converted to lower case, spaces are replaced with underscores. */
  50
  51 static
  52 PyObject *normalizestring(const char *string)
  53 {
  54     register size_t i;
  55     size_t len = strlen(string);
  56     char *p;
  57     PyObject *v;
  58
  59     if (len > PY_SSIZE_T_MAX) {
  60         PyErr_SetString(PyExc_OverflowError, "string is too large");
  61         return NULL;
  62     }
  63
  64     v = PyString_FromStringAndSize(NULL, len);
  65     if (v == NULL)
  66         return NULL;
  67     p = PyString_AS_STRING(v);
  68     for (i = 0; i < len; i++) {
  69         register char ch = string[i];
  70         if (ch == ' ')
  71             ch = '-';
  72         else
  73             ch = tolower(Py_CHARMASK(ch));
  74         p[i] = ch;
  75     }
  76     return v;
  77 }
  78
  79 /* Lookup the given encoding and return a tuple providing the codec
  80    facilities.
  81
  82    The encoding string is looked up converted to all lower-case
  83    characters. This makes encodings looked up through this mechanism
  84    effectively case-insensitive.
  85
  86    If no codec is found, a LookupError is set and NULL returned.
  87
  88    As side effect, this tries to load the encodings package, if not
  89    yet done. This is part of the lazy load strategy for the encodings
  90    package.
  91
  92 */
  93
  94 PyObject *_PyCodec_Lookup(const char *encoding)
  95 {
  96     PyInterpreterState *interp;
  97     PyObject *result, *args = NULL, *v;
  98     Py_ssize_t i, len;
  99
 100     if (encoding == NULL) {
 101         PyErr_BadArgument();
 102         goto onError;
 103     }
 104
 105     interp = PyThreadState_GET()->interp;
 106     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 107         goto onError;
 108
 109     /* Convert the encoding to a normalized Python string: all
 110        characters are converted to lower case, spaces and hyphens are
 111        replaced with underscores. */
 112     v = normalizestring(encoding);
 113     if (v == NULL)
 114         goto onError;
 115     PyString_InternInPlace(&v);
 116
 117     /* First, try to lookup the name in the registry dictionary */
 118     result = PyDict_GetItem(interp->codec_search_cache, v);
 119     if (result != NULL) {
 120         Py_INCREF(result);
 121         Py_DECREF(v);
 122         return result;
 123     }
 124
 125     /* Next, scan the search functions in order of registration */
 126     args = PyTuple_New(1);
 127     if (args == NULL)
 128         goto onError;
 129     PyTuple_SET_ITEM(args,0,v);
 130
 131     len = PyList_Size(interp->codec_search_path);
 132     if (len < 0)
 133         goto onError;
 134     if (len == 0) {
 135         PyErr_SetString(PyExc_LookupError,
 136                         "no codec search functions registered: "
 137                         "can't find encoding");
 138         goto onError;
 139     }
 140
 141     for (i = 0; i < len; i++) {
 142         PyObject *func;
 143
 144         func = PyList_GetItem(interp->codec_search_path, i);
 145         if (func == NULL)
 146             goto onError;
 147         result = PyEval_CallObject(func, args);
 148         if (result == NULL)
 149             goto onError;
 150         if (result == Py_None) {
 151             Py_DECREF(result);
 152             continue;
 153         }
 154         if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
 155             PyErr_SetString(PyExc_TypeError,
 156                             "codec search functions must return 4-tuples");
 157             Py_DECREF(result);
 158             goto onError;
 159         }
 160         break;
 161     }
 162     if (i == len) {
 163         /* XXX Perhaps we should cache misses too ? */
 164         PyErr_Format(PyExc_LookupError,
 165                      "unknown encoding: %s", encoding);
 166         goto onError;
 167     }
 168
 169     /* Cache and return the result */
 170     PyDict_SetItem(interp->codec_search_cache, v, result);
 171     Py_DECREF(args);
 172     return result;
 173
 174  onError:
 175     Py_XDECREF(args);
 176     return NULL;
 177 }
 178
 179 static
 180 PyObject *args_tuple(PyObject *object,
 181                      const char *errors)
 182 {
 183     PyObject *args;
 184
 185     args = PyTuple_New(1 + (errors != NULL));
 186     if (args == NULL)
 187         return NULL;
 188     Py_INCREF(object);
 189     PyTuple_SET_ITEM(args,0,object);
 190     if (errors) {
 191         PyObject *v;
 192
 193         v = PyString_FromString(errors);
 194         if (v == NULL) {
 195             Py_DECREF(args);
 196             return NULL;
 197         }
 198         PyTuple_SET_ITEM(args, 1, v);
 199     }
 200     return args;
 201 }
 202
 203 /* Helper function to get a codec item */
 204
 205 static
 206 PyObject *codec_getitem(const char *encoding, int index)
 207 {
 208     PyObject *codecs;
 209     PyObject *v;
 210
 211     codecs = _PyCodec_Lookup(encoding);
 212     if (codecs == NULL)
 213         return NULL;
 214     v = PyTuple_GET_ITEM(codecs, index);
 215     Py_DECREF(codecs);
 216     Py_INCREF(v);
 217     return v;
 218 }
 219
 220 /* Helper function to create an incremental codec. */
 221
 222 static
 223 PyObject *codec_getincrementalcodec(const char *encoding,
 224                                     const char *errors,
 225                                     const char *attrname)
 226 {
 227     PyObject *codecs, *ret, *inccodec;
 228
 229     codecs = _PyCodec_Lookup(encoding);
 230     if (codecs == NULL)
 231         return NULL;
 232     inccodec = PyObject_GetAttrString(codecs, attrname);
 233     Py_DECREF(codecs);
 234     if (inccodec == NULL)
 235         return NULL;
 236     if (errors)
 237         ret = PyObject_CallFunction(inccodec, "s", errors);
 238     else
 239         ret = PyObject_CallFunction(inccodec, NULL);
 240     Py_DECREF(inccodec);
 241     return ret;
 242 }
 243
 244 /* Helper function to create a stream codec. */
 245
 246 static
 247 PyObject *codec_getstreamcodec(const char *encoding,
 248                                PyObject *stream,
 249                                const char *errors,
 250                                const int index)
 251 {
 252     PyObject *codecs, *streamcodec, *codeccls;
 253
 254     codecs = _PyCodec_Lookup(encoding);
 255     if (codecs == NULL)
 256         return NULL;
 257
 258     codeccls = PyTuple_GET_ITEM(codecs, index);
 259     if (errors != NULL)
 260         streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
 261     else
 262         streamcodec = PyObject_CallFunction(codeccls, "O", stream);
 263     Py_DECREF(codecs);
 264     return streamcodec;
 265 }
 266
 267 /* Convenience APIs to query the Codec registry.
 268
 269    All APIs return a codec object with incremented refcount.
 270
 271  */
 272
 273 PyObject *PyCodec_Encoder(const char *encoding)
 274 {
 275     return codec_getitem(encoding, 0);
 276 }
 277
 278 PyObject *PyCodec_Decoder(const char *encoding)
 279 {
 280     return codec_getitem(encoding, 1);
 281 }
 282
 283 PyObject *PyCodec_IncrementalEncoder(const char *encoding,
 284                                      const char *errors)
 285 {
 286     return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
 287 }
 288
 289 PyObject *PyCodec_IncrementalDecoder(const char *encoding,
 290                                      const char *errors)
 291 {
 292     return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
 293 }
 294
 295 PyObject *PyCodec_StreamReader(const char *encoding,
 296                                PyObject *stream,
 297                                const char *errors)
 298 {
 299     return codec_getstreamcodec(encoding, stream, errors, 2);
 300 }
 301
 302 PyObject *PyCodec_StreamWriter(const char *encoding,
 303                                PyObject *stream,
 304                                const char *errors)
 305 {
 306     return codec_getstreamcodec(encoding, stream, errors, 3);
 307 }
 308
 309 /* Encode an object (e.g. an Unicode object) using the given encoding
 310    and return the resulting encoded object (usually a Python string).
 311
 312    errors is passed to the encoder factory as argument if non-NULL. */
 313
 314 PyObject *PyCodec_Encode(PyObject *object,
 315                          const char *encoding,
 316                          const char *errors)
 317 {
 318     PyObject *encoder = NULL;
 319     PyObject *args = NULL, *result = NULL;
 320     PyObject *v;
 321
 322     encoder = PyCodec_Encoder(encoding);
 323     if (encoder == NULL)
 324         goto onError;
 325
 326     args = args_tuple(object, errors);
 327     if (args == NULL)
 328         goto onError;
 329
 330     result = PyEval_CallObject(encoder,args);
 331     if (result == NULL)
 332         goto onError;
 333
 334     if (!PyTuple_Check(result) ||
 335         PyTuple_GET_SIZE(result) != 2) {
 336         PyErr_SetString(PyExc_TypeError,
 337                         "encoder must return a tuple (object,integer)");
 338         goto onError;
 339     }
 340     v = PyTuple_GET_ITEM(result,0);
 341     Py_INCREF(v);
 342     /* We don't check or use the second (integer) entry. */
 343
 344     Py_DECREF(args);
 345     Py_DECREF(encoder);
 346     Py_DECREF(result);
 347     return v;
 348
 349  onError:
 350     Py_XDECREF(result);
 351     Py_XDECREF(args);
 352     Py_XDECREF(encoder);
 353     return NULL;
 354 }
 355
 356 /* Decode an object (usually a Python string) using the given encoding
 357    and return an equivalent object (e.g. an Unicode object).
 358
 359    errors is passed to the decoder factory as argument if non-NULL. */
 360
 361 PyObject *PyCodec_Decode(PyObject *object,
 362                          const char *encoding,
 363                          const char *errors)
 364 {
 365     PyObject *decoder = NULL;
 366     PyObject *args = NULL, *result = NULL;
 367     PyObject *v;
 368
 369     decoder = PyCodec_Decoder(encoding);
 370     if (decoder == NULL)
 371         goto onError;
 372
 373     args = args_tuple(object, errors);
 374     if (args == NULL)
 375         goto onError;
 376
 377     result = PyEval_CallObject(decoder,args);
 378     if (result == NULL)
 379         goto onError;
 380     if (!PyTuple_Check(result) ||
 381         PyTuple_GET_SIZE(result) != 2) {
 382         PyErr_SetString(PyExc_TypeError,
 383                         "decoder must return a tuple (object,integer)");
 384         goto onError;
 385     }
 386     v = PyTuple_GET_ITEM(result,0);
 387     Py_INCREF(v);
 388     /* We don't check or use the second (integer) entry. */
 389
 390     Py_DECREF(args);
 391     Py_DECREF(decoder);
 392     Py_DECREF(result);
 393     return v;
 394
 395  onError:
 396     Py_XDECREF(args);
 397     Py_XDECREF(decoder);
 398     Py_XDECREF(result);
 399     return NULL;
 400 }
 401
 402 /* Register the error handling callback function error under the name
 403    name. This function will be called by the codec when it encounters
 404    an unencodable characters/undecodable bytes and doesn't know the
 405    callback name, when name is specified as the error parameter
 406    in the call to the encode/decode function.
 407    Return 0 on success, -1 on error */
 408 int PyCodec_RegisterError(const char *name, PyObject *error)
 409 {
 410     PyInterpreterState *interp = PyThreadState_GET()->interp;
 411     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 412         return -1;
 413     if (!PyCallable_Check(error)) {
 414         PyErr_SetString(PyExc_TypeError, "handler must be callable");
 415         return -1;
 416     }
 417     return PyDict_SetItemString(interp->codec_error_registry,
 418                                 (char *)name, error);
 419 }
 420
 421 /* Lookup the error handling callback function registered under the
 422    name error. As a special case NULL can be passed, in which case
 423    the error handling callback for strict encoding will be returned. */
 424 PyObject *PyCodec_LookupError(const char *name)
 425 {
 426     PyObject *handler = NULL;
 427
 428     PyInterpreterState *interp = PyThreadState_GET()->interp;
 429     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 430         return NULL;
 431
 432     if (name==NULL)
 433         name = "strict";
 434     handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
 435     if (!handler)
 436         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
 437     else
 438         Py_INCREF(handler);
 439     return handler;
 440 }
 441
 442 static void wrong_exception_type(PyObject *exc)
 443 {
 444     PyObject *type = PyObject_GetAttrString(exc, "__class__");
 445     if (type != NULL) {
 446         PyObject *name = PyObject_GetAttrString(type, "__name__");
 447         Py_DECREF(type);
 448         if (name != NULL) {
 449             PyObject *string = PyObject_Str(name);
 450             Py_DECREF(name);
 451             if (string != NULL) {
 452                 PyErr_Format(PyExc_TypeError,
 453                     "don't know how to handle %.400s in error callback",
 454                     PyString_AS_STRING(string));
 455                 Py_DECREF(string);
 456             }
 457         }
 458     }
 459 }
 460
 461 PyObject *PyCodec_StrictErrors(PyObject *exc)
 462 {
 463     if (PyExceptionInstance_Check(exc))
 464         PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
 465     else
 466         PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
 467     return NULL;
 468 }
 469
 470
 471 #ifdef Py_USING_UNICODE
 472 PyObject *PyCodec_IgnoreErrors(PyObject *exc)
 473 {
 474     Py_ssize_t end;
 475     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 476         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 477             return NULL;
 478     }
 479     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 480         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 481             return NULL;
 482     }
 483     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 484         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 485             return NULL;
 486     }
 487     else {
 488         wrong_exception_type(exc);
 489         return NULL;
 490     }
 491     /* ouch: passing NULL, 0, pos gives None instead of u'' */
 492     return Py_BuildValue("(u#n)", &end, 0, end);
 493 }
 494
 495
 496 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
 497 {
 498     PyObject *restuple;
 499     Py_ssize_t start;
 500     Py_ssize_t end;
 501     Py_ssize_t i;
 502
 503     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 504         PyObject *res;
 505         Py_UNICODE *p;
 506         if (PyUnicodeEncodeError_GetStart(exc, &start))
 507             return NULL;
 508         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 509             return NULL;
 510         res = PyUnicode_FromUnicode(NULL, end-start);
 511         if (res == NULL)
 512             return NULL;
 513         for (p = PyUnicode_AS_UNICODE(res), i = start;
 514             i<end; ++p, ++i)
 515             *p = '?';
 516         restuple = Py_BuildValue("(On)", res, end);
 517         Py_DECREF(res);
 518         return restuple;
 519     }
 520     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 521         Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
 522         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 523             return NULL;
 524         return Py_BuildValue("(u#n)", &res, 1, end);
 525     }
 526     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 527         PyObject *res;
 528         Py_UNICODE *p;
 529         if (PyUnicodeTranslateError_GetStart(exc, &start))
 530             return NULL;
 531         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 532             return NULL;
 533         res = PyUnicode_FromUnicode(NULL, end-start);
 534         if (res == NULL)
 535             return NULL;
 536         for (p = PyUnicode_AS_UNICODE(res), i = start;
 537             i<end; ++p, ++i)
 538             *p = Py_UNICODE_REPLACEMENT_CHARACTER;
 539         restuple = Py_BuildValue("(On)", res, end);
 540         Py_DECREF(res);
 541         return restuple;
 542     }
 543     else {
 544         wrong_exception_type(exc);
 545         return NULL;
 546     }
 547 }
 548
 549 PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
 550 {
 551     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 552         PyObject *restuple;
 553         PyObject *object;
 554         Py_ssize_t start;
 555         Py_ssize_t end;
 556         PyObject *res;
 557         Py_UNICODE *p;
 558         Py_UNICODE *startp;
 559         Py_UNICODE *outp;
 560         int ressize;
 561         if (PyUnicodeEncodeError_GetStart(exc, &start))
 562             return NULL;
 563         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 564             return NULL;
 565         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 566             return NULL;
 567         startp = PyUnicode_AS_UNICODE(object);
 568         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 569             if (*p<10)
 570                 ressize += 2+1+1;
 571             else if (*p<100)
 572                 ressize += 2+2+1;
 573             else if (*p<1000)
 574                 ressize += 2+3+1;
 575             else if (*p<10000)
 576                 ressize += 2+4+1;
 577 #ifndef Py_UNICODE_WIDE
 578             else
 579                 ressize += 2+5+1;
 580 #else
 581             else if (*p<100000)
 582                 ressize += 2+5+1;
 583             else if (*p<1000000)
 584                 ressize += 2+6+1;
 585             else
 586                 ressize += 2+7+1;
 587 #endif
 588         }
 589         /* allocate replacement */
 590         res = PyUnicode_FromUnicode(NULL, ressize);
 591         if (res == NULL) {
 592             Py_DECREF(object);
 593             return NULL;
 594         }
 595         /* generate replacement */
 596         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 597             p < startp+end; ++p) {
 598             Py_UNICODE c = *p;
 599             int digits;
 600             int base;
 601             *outp++ = '&';
 602             *outp++ = '#';
 603             if (*p<10) {
 604                 digits = 1;
 605                 base = 1;
 606             }
 607             else if (*p<100) {
 608                 digits = 2;
 609                 base = 10;
 610             }
 611             else if (*p<1000) {
 612                 digits = 3;
 613                 base = 100;
 614             }
 615             else if (*p<10000) {
 616                 digits = 4;
 617                 base = 1000;
 618             }
 619 #ifndef Py_UNICODE_WIDE
 620             else {
 621                 digits = 5;
 622                 base = 10000;
 623             }
 624 #else
 625             else if (*p<100000) {
 626                 digits = 5;
 627                 base = 10000;
 628             }
 629             else if (*p<1000000) {
 630                 digits = 6;
 631                 base = 100000;
 632             }
 633             else {
 634                 digits = 7;
 635                 base = 1000000;
 636             }
 637 #endif
 638             while (digits-->0) {
 639                 *outp++ = '0' + c/base;
 640                 c %= base;
 641                 base /= 10;
 642             }
 643             *outp++ = ';';
 644         }
 645         restuple = Py_BuildValue("(On)", res, end);
 646         Py_DECREF(res);
 647         Py_DECREF(object);
 648         return restuple;
 649     }
 650     else {
 651         wrong_exception_type(exc);
 652         return NULL;
 653     }
 654 }
 655
 656 static Py_UNICODE hexdigits[] = {
 657     '0', '1', '2', '3', '4', '5', '6', '7',
 658     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
 659 };
 660
 661 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 662 {
 663     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 664         PyObject *restuple;
 665         PyObject *object;
 666         Py_ssize_t start;
 667         Py_ssize_t end;
 668         PyObject *res;
 669         Py_UNICODE *p;
 670         Py_UNICODE *startp;
 671         Py_UNICODE *outp;
 672         int ressize;
 673         if (PyUnicodeEncodeError_GetStart(exc, &start))
 674             return NULL;
 675         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 676             return NULL;
 677         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 678             return NULL;
 679         startp = PyUnicode_AS_UNICODE(object);
 680         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 681 #ifdef Py_UNICODE_WIDE
 682             if (*p >= 0x00010000)
 683                 ressize += 1+1+8;
 684             else
 685 #endif
 686             if (*p >= 0x100) {
 687                 ressize += 1+1+4;
 688             }
 689             else
 690                 ressize += 1+1+2;
 691         }
 692         res = PyUnicode_FromUnicode(NULL, ressize);
 693         if (res==NULL)
 694             return NULL;
 695         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 696             p < startp+end; ++p) {
 697             Py_UNICODE c = *p;
 698             *outp++ = '\\';
 699 #ifdef Py_UNICODE_WIDE
 700             if (c >= 0x00010000) {
 701                 *outp++ = 'U';
 702                 *outp++ = hexdigits[(c>>28)&0xf];
 703                 *outp++ = hexdigits[(c>>24)&0xf];
 704                 *outp++ = hexdigits[(c>>20)&0xf];
 705                 *outp++ = hexdigits[(c>>16)&0xf];
 706                 *outp++ = hexdigits[(c>>12)&0xf];
 707                 *outp++ = hexdigits[(c>>8)&0xf];
 708             }
 709             else
 710 #endif
 711             if (c >= 0x100) {
 712                 *outp++ = 'u';
 713                 *outp++ = hexdigits[(c>>12)&0xf];
 714                 *outp++ = hexdigits[(c>>8)&0xf];
 715             }
 716             else
 717                 *outp++ = 'x';
 718             *outp++ = hexdigits[(c>>4)&0xf];
 719             *outp++ = hexdigits[c&0xf];
 720         }
 721
 722         restuple = Py_BuildValue("(On)", res, end);
 723         Py_DECREF(res);
 724         Py_DECREF(object);
 725         return restuple;
 726     }
 727     else {
 728         wrong_exception_type(exc);
 729         return NULL;
 730     }
 731 }
 732 #endif
 733
 734 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 735 {
 736     return PyCodec_StrictErrors(exc);
 737 }
 738
 739
 740 #ifdef Py_USING_UNICODE
 741 static PyObject *ignore_errors(PyObject *self, PyObject *exc)
 742 {
 743     return PyCodec_IgnoreErrors(exc);
 744 }
 745
 746
 747 static PyObject *replace_errors(PyObject *self, PyObject *exc)
 748 {
 749     return PyCodec_ReplaceErrors(exc);
 750 }
 751
 752
 753 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
 754 {
 755     return PyCodec_XMLCharRefReplaceErrors(exc);
 756 }
 757
 758
 759 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
 760 {
 761     return PyCodec_BackslashReplaceErrors(exc);
 762 }
 763 #endif
 764
 765 static int _PyCodecRegistry_Init(void)
 766 {
 767     static struct {
 768         char *name;
 769         PyMethodDef def;
 770     } methods[] =
 771     {
 772         {
 773             "strict",
 774             {
 775                 "strict_errors",
 776                 strict_errors,
 777                 METH_O,
 778                 PyDoc_STR("Implements the 'strict' error handling, which "
 779                           "raises a UnicodeError on coding errors.")
 780             }
 781         },
 782 #ifdef Py_USING_UNICODE
 783         {
 784             "ignore",
 785             {
 786                 "ignore_errors",
 787                 ignore_errors,
 788                 METH_O,
 789                 PyDoc_STR("Implements the 'ignore' error handling, which "
 790                           "ignores malformed data and continues.")
 791             }
 792         },
 793         {
 794             "replace",
 795             {
 796                 "replace_errors",
 797                 replace_errors,
 798                 METH_O,
 799                 PyDoc_STR("Implements the 'replace' error handling, which "
 800                           "replaces malformed data with a replacement marker.")
 801             }
 802         },
 803         {
 804             "xmlcharrefreplace",
 805             {
 806                 "xmlcharrefreplace_errors",
 807                 xmlcharrefreplace_errors,
 808                 METH_O,
 809                 PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
 810                           "which replaces an unencodable character with the "
 811                           "appropriate XML character reference.")
 812             }
 813         },
 814         {
 815             "backslashreplace",
 816             {
 817                 "backslashreplace_errors",
 818                 backslashreplace_errors,
 819                 METH_O,
 820                 PyDoc_STR("Implements the 'backslashreplace' error handling, "
 821                           "which replaces an unencodable character with a "
 822                           "backslashed escape sequence.")
 823             }
 824         }
 825 #endif
 826     };
 827
 828     PyInterpreterState *interp = PyThreadState_GET()->interp;
 829     PyObject *mod;
 830     unsigned i;
 831
 832     if (interp->codec_search_path != NULL)
 833         return 0;
 834
 835     interp->codec_search_path = PyList_New(0);
 836     interp->codec_search_cache = PyDict_New();
 837     interp->codec_error_registry = PyDict_New();
 838
 839     if (interp->codec_error_registry) {
 840         for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
 841             PyObject *func = PyCFunction_New(&methods[i].def, NULL);
 842             int res;
 843             if (!func)
 844                 Py_FatalError("can't initialize codec error registry");
 845             res = PyCodec_RegisterError(methods[i].name, func);
 846             Py_DECREF(func);
 847             if (res)
 848                 Py_FatalError("can't initialize codec error registry");
 849         }
 850     }
 851
 852     if (interp->codec_search_path == NULL ||
 853         interp->codec_search_cache == NULL ||
 854         interp->codec_error_registry == NULL)
 855         Py_FatalError("can't initialize codec registry");
 856
 857     mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
 858     if (mod == NULL) {
 859         if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 860             /* Ignore ImportErrors... this is done so that
 861                distributions can disable the encodings package. Note
 862                that other errors are not masked, e.g. SystemErrors
 863                raised to inform the user of an error in the Python
 864                configuration are still reported back to the user. */
 865             PyErr_Clear();
 866             return 0;
 867         }
 868         return -1;
 869     }
 870     Py_DECREF(mod);
 871     return 0;
 872 }