Python/codecs.c

   1 /* ------------------------------------------------------------------------
   2
   3    Python Codec Registry and support functions
   4
   5 Written by Marc-Andre Lemburg (mal@lemburg.com).
   6
   7 Copyright (c) Corporation for National Research Initiatives.
   8
   9    ------------------------------------------------------------------------ */
  10
  11 #include "Python.h"
  12 #include <ctype.h>
  13
  14 /* --- Codec Registry ----------------------------------------------------- */
  15
  16 /* Import the standard encodings package which will register the first
  17    codec search function.
  18
  19    This is done in a lazy way so that the Unicode implementation does
  20    not downgrade startup time of scripts not needing it.
  21
  22    ImportErrors are silently ignored by this function. Only one try is
  23    made.
  24
  25 */
  26
  27 static int _PyCodecRegistry_Init(void); /* Forward */
  28
  29 int PyCodec_Register(PyObject *search_function)
  30 {
  31     PyInterpreterState *interp = PyThreadState_GET()->interp;
  32     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
  33         goto onError;
  34     if (search_function == NULL) {
  35         PyErr_BadArgument();
  36         goto onError;
  37     }
  38     if (!PyCallable_Check(search_function)) {
  39         PyErr_SetString(PyExc_TypeError, "argument must be callable");
  40         goto onError;
  41     }
  42     return PyList_Append(interp->codec_search_path, search_function);
  43
  44  onError:
  45     return -1;
  46 }
  47
  48 /* Convert a string to a normalized Python string: all characters are
  49    converted to lower case, spaces are replaced with underscores. */
  50
  51 static
  52 PyObject *normalizestring(const char *string)
  53 {
  54     register size_t i;
  55     size_t len = strlen(string);
  56     char *p;
  57     PyObject *v;
  58
  59         if (len > INT_MAX) {
  60                 PyErr_SetString(PyExc_OverflowError, "string is too large");
  61                 return NULL;
  62         }
  63
  64     v = PyString_FromStringAndSize(NULL, (int)len);
  65     if (v == NULL)
  66         return NULL;
  67     p = PyString_AS_STRING(v);
  68     for (i = 0; i < len; i++) {
  69         register char ch = string[i];
  70         if (ch == ' ')
  71             ch = '-';
  72         else
  73             ch = tolower(ch);
  74         p[i] = ch;
  75     }
  76     return v;
  77 }
  78
  79 /* Lookup the given encoding and return a tuple providing the codec
  80    facilities.
  81
  82    The encoding string is looked up converted to all lower-case
  83    characters. This makes encodings looked up through this mechanism
  84    effectively case-insensitive.
  85
  86    If no codec is found, a LookupError is set and NULL returned.
  87
  88    As side effect, this tries to load the encodings package, if not
  89    yet done. This is part of the lazy load strategy for the encodings
  90    package.
  91
  92 */
  93
  94 PyObject *_PyCodec_Lookup(const char *encoding)
  95 {
  96     PyInterpreterState *interp;
  97     PyObject *result, *args = NULL, *v;
  98     int i, len;
  99
 100     if (encoding == NULL) {
 101         PyErr_BadArgument();
 102         goto onError;
 103     }
 104
 105     interp = PyThreadState_GET()->interp;
 106     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 107         goto onError;
 108
 109     /* Convert the encoding to a normalized Python string: all
 110        characters are converted to lower case, spaces and hyphens are
 111        replaced with underscores. */
 112     v = normalizestring(encoding);
 113     if (v == NULL)
 114         goto onError;
 115     PyString_InternInPlace(&v);
 116
 117     /* First, try to lookup the name in the registry dictionary */
 118     result = PyDict_GetItem(interp->codec_search_cache, v);
 119     if (result != NULL) {
 120         Py_INCREF(result);
 121         Py_DECREF(v);
 122         return result;
 123     }
 124
 125     /* Next, scan the search functions in order of registration */
 126     args = PyTuple_New(1);
 127     if (args == NULL)
 128         goto onError;
 129     PyTuple_SET_ITEM(args,0,v);
 130
 131     len = PyList_Size(interp->codec_search_path);
 132     if (len < 0)
 133         goto onError;
 134     if (len == 0) {
 135         PyErr_SetString(PyExc_LookupError,
 136                         "no codec search functions registered: "
 137                         "can't find encoding");
 138         goto onError;
 139     }
 140
 141     for (i = 0; i < len; i++) {
 142         PyObject *func;
 143
 144         func = PyList_GetItem(interp->codec_search_path, i);
 145         if (func == NULL)
 146             goto onError;
 147         result = PyEval_CallObject(func, args);
 148         if (result == NULL)
 149             goto onError;
 150         if (result == Py_None) {
 151             Py_DECREF(result);
 152             continue;
 153         }
 154         if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
 155             PyErr_SetString(PyExc_TypeError,
 156                             "codec search functions must return 4-tuples");
 157             Py_DECREF(result);
 158             goto onError;
 159         }
 160         break;
 161     }
 162     if (i == len) {
 163         /* XXX Perhaps we should cache misses too ? */
 164         PyErr_Format(PyExc_LookupError,
 165                      "unknown encoding: %s", encoding);
 166         goto onError;
 167     }
 168
 169     /* Cache and return the result */
 170     PyDict_SetItem(interp->codec_search_cache, v, result);
 171     Py_DECREF(args);
 172     return result;
 173
 174  onError:
 175     Py_XDECREF(args);
 176     return NULL;
 177 }
 178
 179 static
 180 PyObject *args_tuple(PyObject *object,
 181                      const char *errors)
 182 {
 183     PyObject *args;
 184
 185     args = PyTuple_New(1 + (errors != NULL));
 186     if (args == NULL)
 187         return NULL;
 188     Py_INCREF(object);
 189     PyTuple_SET_ITEM(args,0,object);
 190     if (errors) {
 191         PyObject *v;
 192
 193         v = PyString_FromString(errors);
 194         if (v == NULL) {
 195             Py_DECREF(args);
 196             return NULL;
 197         }
 198         PyTuple_SET_ITEM(args, 1, v);
 199     }
 200     return args;
 201 }
 202
 203 /* Build a codec by calling factory(stream[,errors]) or just
 204    factory(errors) depending on whether the given parameters are
 205    non-NULL. */
 206
 207 static
 208 PyObject *build_stream_codec(PyObject *factory,
 209                              PyObject *stream,
 210                              const char *errors)
 211 {
 212     PyObject *args, *codec;
 213
 214     args = args_tuple(stream, errors);
 215     if (args == NULL)
 216         return NULL;
 217
 218     codec = PyEval_CallObject(factory, args);
 219     Py_DECREF(args);
 220     return codec;
 221 }
 222
 223 /* Convenience APIs to query the Codec registry.
 224
 225    All APIs return a codec object with incremented refcount.
 226
 227  */
 228
 229 PyObject *PyCodec_Encoder(const char *encoding)
 230 {
 231     PyObject *codecs;
 232     PyObject *v;
 233
 234     codecs = _PyCodec_Lookup(encoding);
 235     if (codecs == NULL)
 236         goto onError;
 237     v = PyTuple_GET_ITEM(codecs,0);
 238     Py_DECREF(codecs);
 239     Py_INCREF(v);
 240     return v;
 241
 242  onError:
 243     return NULL;
 244 }
 245
 246 PyObject *PyCodec_Decoder(const char *encoding)
 247 {
 248     PyObject *codecs;
 249     PyObject *v;
 250
 251     codecs = _PyCodec_Lookup(encoding);
 252     if (codecs == NULL)
 253         goto onError;
 254     v = PyTuple_GET_ITEM(codecs,1);
 255     Py_DECREF(codecs);
 256     Py_INCREF(v);
 257     return v;
 258
 259  onError:
 260     return NULL;
 261 }
 262
 263 PyObject *PyCodec_IncrementalEncoder(const char *encoding,
 264                                      const char *errors)
 265 {
 266     PyObject *codecs, *ret, *encoder;
 267
 268     codecs = _PyCodec_Lookup(encoding);
 269     if (codecs == NULL)
 270         goto onError;
 271     encoder = PyObject_GetAttrString(codecs, "incrementalencoder");
 272     if (encoder == NULL) {
 273         Py_DECREF(codecs);
 274         return NULL;
 275     }
 276     if (errors)
 277         ret = PyObject_CallFunction(encoder, "O", errors);
 278     else
 279         ret = PyObject_CallFunction(encoder, NULL);
 280     Py_DECREF(encoder);
 281     Py_DECREF(codecs);
 282     return ret;
 283
 284  onError:
 285     return NULL;
 286 }
 287
 288 PyObject *PyCodec_IncrementalDecoder(const char *encoding,
 289                                      const char *errors)
 290 {
 291     PyObject *codecs, *ret, *decoder;
 292
 293     codecs = _PyCodec_Lookup(encoding);
 294     if (codecs == NULL)
 295         goto onError;
 296     decoder = PyObject_GetAttrString(codecs, "incrementaldecoder");
 297     if (decoder == NULL) {
 298         Py_DECREF(codecs);
 299         return NULL;
 300     }
 301     if (errors)
 302         ret = PyObject_CallFunction(decoder, "O", errors);
 303     else
 304         ret = PyObject_CallFunction(decoder, NULL);
 305     Py_DECREF(decoder);
 306     Py_DECREF(codecs);
 307     return ret;
 308
 309  onError:
 310     return NULL;
 311 }
 312
 313 PyObject *PyCodec_StreamReader(const char *encoding,
 314                                PyObject *stream,
 315                                const char *errors)
 316 {
 317     PyObject *codecs, *ret;
 318
 319     codecs = _PyCodec_Lookup(encoding);
 320     if (codecs == NULL)
 321         goto onError;
 322     ret = build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
 323     Py_DECREF(codecs);
 324     return ret;
 325
 326  onError:
 327     return NULL;
 328 }
 329
 330 PyObject *PyCodec_StreamWriter(const char *encoding,
 331                                PyObject *stream,
 332                                const char *errors)
 333 {
 334     PyObject *codecs, *ret;
 335
 336     codecs = _PyCodec_Lookup(encoding);
 337     if (codecs == NULL)
 338         goto onError;
 339     ret = build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
 340     Py_DECREF(codecs);
 341     return ret;
 342
 343  onError:
 344     return NULL;
 345 }
 346
 347 /* Encode an object (e.g. an Unicode object) using the given encoding
 348    and return the resulting encoded object (usually a Python string).
 349
 350    errors is passed to the encoder factory as argument if non-NULL. */
 351
 352 PyObject *PyCodec_Encode(PyObject *object,
 353                          const char *encoding,
 354                          const char *errors)
 355 {
 356     PyObject *encoder = NULL;
 357     PyObject *args = NULL, *result = NULL;
 358     PyObject *v;
 359
 360     encoder = PyCodec_Encoder(encoding);
 361     if (encoder == NULL)
 362         goto onError;
 363
 364     args = args_tuple(object, errors);
 365     if (args == NULL)
 366         goto onError;
 367
 368     result = PyEval_CallObject(encoder,args);
 369     if (result == NULL)
 370         goto onError;
 371
 372     if (!PyTuple_Check(result) ||
 373         PyTuple_GET_SIZE(result) != 2) {
 374         PyErr_SetString(PyExc_TypeError,
 375                         "encoder must return a tuple (object,integer)");
 376         goto onError;
 377     }
 378     v = PyTuple_GET_ITEM(result,0);
 379     Py_INCREF(v);
 380     /* We don't check or use the second (integer) entry. */
 381
 382     Py_DECREF(args);
 383     Py_DECREF(encoder);
 384     Py_DECREF(result);
 385     return v;
 386
 387  onError:
 388     Py_XDECREF(result);
 389     Py_XDECREF(args);
 390     Py_XDECREF(encoder);
 391     return NULL;
 392 }
 393
 394 /* Decode an object (usually a Python string) using the given encoding
 395    and return an equivalent object (e.g. an Unicode object).
 396
 397    errors is passed to the decoder factory as argument if non-NULL. */
 398
 399 PyObject *PyCodec_Decode(PyObject *object,
 400                          const char *encoding,
 401                          const char *errors)
 402 {
 403     PyObject *decoder = NULL;
 404     PyObject *args = NULL, *result = NULL;
 405     PyObject *v;
 406
 407     decoder = PyCodec_Decoder(encoding);
 408     if (decoder == NULL)
 409         goto onError;
 410
 411     args = args_tuple(object, errors);
 412     if (args == NULL)
 413         goto onError;
 414
 415     result = PyEval_CallObject(decoder,args);
 416     if (result == NULL)
 417         goto onError;
 418     if (!PyTuple_Check(result) ||
 419         PyTuple_GET_SIZE(result) != 2) {
 420         PyErr_SetString(PyExc_TypeError,
 421                         "decoder must return a tuple (object,integer)");
 422         goto onError;
 423     }
 424     v = PyTuple_GET_ITEM(result,0);
 425     Py_INCREF(v);
 426     /* We don't check or use the second (integer) entry. */
 427
 428     Py_DECREF(args);
 429     Py_DECREF(decoder);
 430     Py_DECREF(result);
 431     return v;
 432
 433  onError:
 434     Py_XDECREF(args);
 435     Py_XDECREF(decoder);
 436     Py_XDECREF(result);
 437     return NULL;
 438 }
 439
 440 /* Register the error handling callback function error under the name
 441    name. This function will be called by the codec when it encounters
 442    an unencodable characters/undecodable bytes and doesn't know the
 443    callback name, when name is specified as the error parameter
 444    in the call to the encode/decode function.
 445    Return 0 on success, -1 on error */
 446 int PyCodec_RegisterError(const char *name, PyObject *error)
 447 {
 448     PyInterpreterState *interp = PyThreadState_GET()->interp;
 449     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 450         return -1;
 451     if (!PyCallable_Check(error)) {
 452         PyErr_SetString(PyExc_TypeError, "handler must be callable");
 453         return -1;
 454     }
 455     return PyDict_SetItemString(interp->codec_error_registry,
 456                                 (char *)name, error);
 457 }
 458
 459 /* Lookup the error handling callback function registered under the
 460    name error. As a special case NULL can be passed, in which case
 461    the error handling callback for strict encoding will be returned. */
 462 PyObject *PyCodec_LookupError(const char *name)
 463 {
 464     PyObject *handler = NULL;
 465
 466     PyInterpreterState *interp = PyThreadState_GET()->interp;
 467     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 468         return NULL;
 469
 470     if (name==NULL)
 471         name = "strict";
 472     handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
 473     if (!handler)
 474         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
 475     else
 476         Py_INCREF(handler);
 477     return handler;
 478 }
 479
 480 static void wrong_exception_type(PyObject *exc)
 481 {
 482     PyObject *type = PyObject_GetAttrString(exc, "__class__");
 483     if (type != NULL) {
 484         PyObject *name = PyObject_GetAttrString(type, "__name__");
 485         Py_DECREF(type);
 486         if (name != NULL) {
 487             PyObject *string = PyObject_Str(name);
 488             Py_DECREF(name);
 489             if (string != NULL) {
 490                 PyErr_Format(PyExc_TypeError,
 491                     "don't know how to handle %.400s in error callback",
 492                     PyString_AS_STRING(string));
 493                 Py_DECREF(string);
 494             }
 495         }
 496     }
 497 }
 498
 499 PyObject *PyCodec_StrictErrors(PyObject *exc)
 500 {
 501     if (PyExceptionInstance_Check(exc))
 502         PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
 503     else
 504         PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
 505     return NULL;
 506 }
 507
 508
 509 #ifdef Py_USING_UNICODE
 510 PyObject *PyCodec_IgnoreErrors(PyObject *exc)
 511 {
 512     Py_ssize_t end;
 513     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 514         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 515             return NULL;
 516     }
 517     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 518         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 519             return NULL;
 520     }
 521     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 522         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 523             return NULL;
 524     }
 525     else {
 526         wrong_exception_type(exc);
 527         return NULL;
 528     }
 529     /* ouch: passing NULL, 0, pos gives None instead of u'' */
 530     return Py_BuildValue("(u#n)", &end, 0, end);
 531 }
 532
 533
 534 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
 535 {
 536     PyObject *restuple;
 537     Py_ssize_t start;
 538     Py_ssize_t end;
 539     Py_ssize_t i;
 540
 541     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 542         PyObject *res;
 543         Py_UNICODE *p;
 544         if (PyUnicodeEncodeError_GetStart(exc, &start))
 545             return NULL;
 546         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 547             return NULL;
 548         res = PyUnicode_FromUnicode(NULL, end-start);
 549         if (res == NULL)
 550             return NULL;
 551         for (p = PyUnicode_AS_UNICODE(res), i = start;
 552             i<end; ++p, ++i)
 553             *p = '?';
 554         restuple = Py_BuildValue("(On)", res, end);
 555         Py_DECREF(res);
 556         return restuple;
 557     }
 558     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 559         Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
 560         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 561             return NULL;
 562         return Py_BuildValue("(u#n)", &res, 1, end);
 563     }
 564     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 565         PyObject *res;
 566         Py_UNICODE *p;
 567         if (PyUnicodeTranslateError_GetStart(exc, &start))
 568             return NULL;
 569         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 570             return NULL;
 571         res = PyUnicode_FromUnicode(NULL, end-start);
 572         if (res == NULL)
 573             return NULL;
 574         for (p = PyUnicode_AS_UNICODE(res), i = start;
 575             i<end; ++p, ++i)
 576             *p = Py_UNICODE_REPLACEMENT_CHARACTER;
 577         restuple = Py_BuildValue("(On)", res, end);
 578         Py_DECREF(res);
 579         return restuple;
 580     }
 581     else {
 582         wrong_exception_type(exc);
 583         return NULL;
 584     }
 585 }
 586
 587 PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
 588 {
 589     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 590         PyObject *restuple;
 591         PyObject *object;
 592         Py_ssize_t start;
 593         Py_ssize_t end;
 594         PyObject *res;
 595         Py_UNICODE *p;
 596         Py_UNICODE *startp;
 597         Py_UNICODE *outp;
 598         int ressize;
 599         if (PyUnicodeEncodeError_GetStart(exc, &start))
 600             return NULL;
 601         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 602             return NULL;
 603         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 604             return NULL;
 605         startp = PyUnicode_AS_UNICODE(object);
 606         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 607             if (*p<10)
 608                 ressize += 2+1+1;
 609             else if (*p<100)
 610                 ressize += 2+2+1;
 611             else if (*p<1000)
 612                 ressize += 2+3+1;
 613             else if (*p<10000)
 614                 ressize += 2+4+1;
 615 #ifndef Py_UNICODE_WIDE
 616             else
 617                 ressize += 2+5+1;
 618 #else
 619             else if (*p<100000)
 620                 ressize += 2+5+1;
 621             else if (*p<1000000)
 622                 ressize += 2+6+1;
 623             else
 624                 ressize += 2+7+1;
 625 #endif
 626         }
 627         /* allocate replacement */
 628         res = PyUnicode_FromUnicode(NULL, ressize);
 629         if (res == NULL) {
 630             Py_DECREF(object);
 631             return NULL;
 632         }
 633         /* generate replacement */
 634         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 635             p < startp+end; ++p) {
 636             Py_UNICODE c = *p;
 637             int digits;
 638             int base;
 639             *outp++ = '&';
 640             *outp++ = '#';
 641             if (*p<10) {
 642                 digits = 1;
 643                 base = 1;
 644             }
 645             else if (*p<100) {
 646                 digits = 2;
 647                 base = 10;
 648             }
 649             else if (*p<1000) {
 650                 digits = 3;
 651                 base = 100;
 652             }
 653             else if (*p<10000) {
 654                 digits = 4;
 655                 base = 1000;
 656             }
 657 #ifndef Py_UNICODE_WIDE
 658             else {
 659                 digits = 5;
 660                 base = 10000;
 661             }
 662 #else
 663             else if (*p<100000) {
 664                 digits = 5;
 665                 base = 10000;
 666             }
 667             else if (*p<1000000) {
 668                 digits = 6;
 669                 base = 100000;
 670             }
 671             else {
 672                 digits = 7;
 673                 base = 1000000;
 674             }
 675 #endif
 676             while (digits-->0) {
 677                 *outp++ = '0' + c/base;
 678                 c %= base;
 679                 base /= 10;
 680             }
 681             *outp++ = ';';
 682         }
 683         restuple = Py_BuildValue("(On)", res, end);
 684         Py_DECREF(res);
 685         Py_DECREF(object);
 686         return restuple;
 687     }
 688     else {
 689         wrong_exception_type(exc);
 690         return NULL;
 691     }
 692 }
 693
 694 static Py_UNICODE hexdigits[] = {
 695     '0', '1', '2', '3', '4', '5', '6', '7',
 696     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
 697 };
 698
 699 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 700 {
 701     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 702         PyObject *restuple;
 703         PyObject *object;
 704         Py_ssize_t start;
 705         Py_ssize_t end;
 706         PyObject *res;
 707         Py_UNICODE *p;
 708         Py_UNICODE *startp;
 709         Py_UNICODE *outp;
 710         int ressize;
 711         if (PyUnicodeEncodeError_GetStart(exc, &start))
 712             return NULL;
 713         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 714             return NULL;
 715         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 716             return NULL;
 717         startp = PyUnicode_AS_UNICODE(object);
 718         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 719 #ifdef Py_UNICODE_WIDE
 720             if (*p >= 0x00010000)
 721                 ressize += 1+1+8;
 722             else
 723 #endif
 724             if (*p >= 0x100) {
 725                 ressize += 1+1+4;
 726             }
 727             else
 728                 ressize += 1+1+2;
 729         }
 730         res = PyUnicode_FromUnicode(NULL, ressize);
 731         if (res==NULL)
 732             return NULL;
 733         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 734             p < startp+end; ++p) {
 735             Py_UNICODE c = *p;
 736             *outp++ = '\\';
 737 #ifdef Py_UNICODE_WIDE
 738             if (c >= 0x00010000) {
 739                 *outp++ = 'U';
 740                 *outp++ = hexdigits[(c>>28)&0xf];
 741                 *outp++ = hexdigits[(c>>24)&0xf];
 742                 *outp++ = hexdigits[(c>>20)&0xf];
 743                 *outp++ = hexdigits[(c>>16)&0xf];
 744                 *outp++ = hexdigits[(c>>12)&0xf];
 745                 *outp++ = hexdigits[(c>>8)&0xf];
 746             }
 747             else
 748 #endif
 749             if (c >= 0x100) {
 750                 *outp++ = 'u';
 751                 *outp++ = hexdigits[(c>>12)&0xf];
 752                 *outp++ = hexdigits[(c>>8)&0xf];
 753             }
 754             else
 755                 *outp++ = 'x';
 756             *outp++ = hexdigits[(c>>4)&0xf];
 757             *outp++ = hexdigits[c&0xf];
 758         }
 759
 760         restuple = Py_BuildValue("(On)", res, end);
 761         Py_DECREF(res);
 762         Py_DECREF(object);
 763         return restuple;
 764     }
 765     else {
 766         wrong_exception_type(exc);
 767         return NULL;
 768     }
 769 }
 770 #endif
 771
 772 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 773 {
 774     return PyCodec_StrictErrors(exc);
 775 }
 776
 777
 778 #ifdef Py_USING_UNICODE
 779 static PyObject *ignore_errors(PyObject *self, PyObject *exc)
 780 {
 781     return PyCodec_IgnoreErrors(exc);
 782 }
 783
 784
 785 static PyObject *replace_errors(PyObject *self, PyObject *exc)
 786 {
 787     return PyCodec_ReplaceErrors(exc);
 788 }
 789
 790
 791 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
 792 {
 793     return PyCodec_XMLCharRefReplaceErrors(exc);
 794 }
 795
 796
 797 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
 798 {
 799     return PyCodec_BackslashReplaceErrors(exc);
 800 }
 801 #endif
 802
 803 static int _PyCodecRegistry_Init(void)
 804 {
 805     static struct {
 806         char *name;
 807         PyMethodDef def;
 808     } methods[] =
 809     {
 810         {
 811             "strict",
 812             {
 813                 "strict_errors",
 814                 strict_errors,
 815                 METH_O
 816             }
 817         },
 818 #ifdef Py_USING_UNICODE
 819         {
 820             "ignore",
 821             {
 822                 "ignore_errors",
 823                 ignore_errors,
 824                 METH_O
 825             }
 826         },
 827         {
 828             "replace",
 829             {
 830                 "replace_errors",
 831                 replace_errors,
 832                 METH_O
 833             }
 834         },
 835         {
 836             "xmlcharrefreplace",
 837             {
 838                 "xmlcharrefreplace_errors",
 839                 xmlcharrefreplace_errors,
 840                 METH_O
 841             }
 842         },
 843         {
 844             "backslashreplace",
 845             {
 846                 "backslashreplace_errors",
 847                 backslashreplace_errors,
 848                 METH_O
 849             }
 850         }
 851 #endif
 852     };
 853
 854     PyInterpreterState *interp = PyThreadState_GET()->interp;
 855     PyObject *mod;
 856     unsigned i;
 857
 858     if (interp->codec_search_path != NULL)
 859         return 0;
 860
 861     interp->codec_search_path = PyList_New(0);
 862     interp->codec_search_cache = PyDict_New();
 863     interp->codec_error_registry = PyDict_New();
 864
 865     if (interp->codec_error_registry) {
 866         for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
 867             PyObject *func = PyCFunction_New(&methods[i].def, NULL);
 868             int res;
 869             if (!func)
 870                 Py_FatalError("can't initialize codec error registry");
 871             res = PyCodec_RegisterError(methods[i].name, func);
 872             Py_DECREF(func);
 873             if (res)
 874                 Py_FatalError("can't initialize codec error registry");
 875         }
 876     }
 877
 878     if (interp->codec_search_path == NULL ||
 879         interp->codec_search_cache == NULL ||
 880         interp->codec_error_registry == NULL)
 881         Py_FatalError("can't initialize codec registry");
 882
 883     mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
 884     if (mod == NULL) {
 885         if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 886             /* Ignore ImportErrors... this is done so that
 887                distributions can disable the encodings package. Note
 888                that other errors are not masked, e.g. SystemErrors
 889                raised to inform the user of an error in the Python
 890                configuration are still reported back to the user. */
 891             PyErr_Clear();
 892             return 0;
 893         }
 894         return -1;
 895     }
 896     Py_DECREF(mod);
 897     return 0;
 898 }