Lib/test/test_codeccallbacks.py

   1 import test.test_support, unittest
   2 import sys, codecs, htmlentitydefs, unicodedata
   3
   4 class PosReturn:
   5     # this can be used for configurable callbacks
   6
   7     def __init__(self):
   8         self.pos = 0
   9
  10     def handle(self, exc):
  11         oldpos = self.pos
  12         realpos = oldpos
  13         if realpos<0:
  14             realpos = len(exc.object) + realpos
  15         # if we don't advance this time, terminate on the next call
  16         # otherwise we'd get an endless loop
  17         if realpos <= exc.start:
  18             self.pos = len(exc.object)
  19         return (u"<?>", oldpos)
  20
  21 # A UnicodeEncodeError object with a bad start attribute
  22 class BadStartUnicodeEncodeError(UnicodeEncodeError):
  23     def __init__(self):
  24         UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
  25         self.start = []
  26
  27 # A UnicodeEncodeError object with a bad object attribute
  28 class BadObjectUnicodeEncodeError(UnicodeEncodeError):
  29     def __init__(self):
  30         UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
  31         self.object = []
  32
  33 # A UnicodeDecodeError object without an end attribute
  34 class NoEndUnicodeDecodeError(UnicodeDecodeError):
  35     def __init__(self):
  36         UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
  37         del self.end
  38
  39 # A UnicodeDecodeError object with a bad object attribute
  40 class BadObjectUnicodeDecodeError(UnicodeDecodeError):
  41     def __init__(self):
  42         UnicodeDecodeError.__init__(self, "ascii", "", 0, 1, "bad")
  43         self.object = []
  44
  45 # A UnicodeTranslateError object without a start attribute
  46 class NoStartUnicodeTranslateError(UnicodeTranslateError):
  47     def __init__(self):
  48         UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
  49         del self.start
  50
  51 # A UnicodeTranslateError object without an end attribute
  52 class NoEndUnicodeTranslateError(UnicodeTranslateError):
  53     def __init__(self):
  54         UnicodeTranslateError.__init__(self,  u"", 0, 1, "bad")
  55         del self.end
  56
  57 # A UnicodeTranslateError object without an object attribute
  58 class NoObjectUnicodeTranslateError(UnicodeTranslateError):
  59     def __init__(self):
  60         UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
  61         del self.object
  62
  63 class CodecCallbackTest(unittest.TestCase):
  64
  65     def test_xmlcharrefreplace(self):
  66         # replace unencodable characters which numeric character entities.
  67         # For ascii, latin-1 and charmaps this is completely implemented
  68         # in C and should be reasonably fast.
  69         s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
  70         self.assertEqual(
  71             s.encode("ascii", "xmlcharrefreplace"),
  72             "&#12473;&#12497;&#12514; &#228;nd eggs"
  73         )
  74         self.assertEqual(
  75             s.encode("latin-1", "xmlcharrefreplace"),
  76             "&#12473;&#12497;&#12514; \xe4nd eggs"
  77         )
  78
  79     def test_xmlcharnamereplace(self):
  80         # This time use a named character entity for unencodable
  81         # characters, if one is available.
  82
  83         def xmlcharnamereplace(exc):
  84             if not isinstance(exc, UnicodeEncodeError):
  85                 raise TypeError("don't know how to handle %r" % exc)
  86             l = []
  87             for c in exc.object[exc.start:exc.end]:
  88                 try:
  89                     l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
  90                 except KeyError:
  91                     l.append(u"&#%d;" % ord(c))
  92             return (u"".join(l), exc.end)
  93
  94         codecs.register_error(
  95             "test.xmlcharnamereplace", xmlcharnamereplace)
  96
  97         sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
  98         sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
  99         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
 100         sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
 101         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
 102         sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
 103         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
 104
 105     def test_uninamereplace(self):
 106         # We're using the names from the unicode database this time,
 107         # and we're doing "syntax highlighting" here, i.e. we include
 108         # the replaced text in ANSI escape sequences. For this it is
 109         # useful that the error handler is not called for every single
 110         # unencodable character, but for a complete sequence of
 111         # unencodable characters, otherwise we would output many
 112         # unnecessary escape sequences.
 113
 114         def uninamereplace(exc):
 115             if not isinstance(exc, UnicodeEncodeError):
 116                 raise TypeError("don't know how to handle %r" % exc)
 117             l = []
 118             for c in exc.object[exc.start:exc.end]:
 119                 l.append(unicodedata.name(c, u"0x%x" % ord(c)))
 120             return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
 121
 122         codecs.register_error(
 123             "test.uninamereplace", uninamereplace)
 124
 125         sin = u"\xac\u1234\u20ac\u8000"
 126         sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
 127         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
 128
 129         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
 130         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
 131
 132         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
 133         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
 134
 135     def test_backslashescape(self):
 136         # Does the same as the "unicode-escape" encoding, but with different
 137         # base encodings.
 138         sin = u"a\xac\u1234\u20ac\u8000"
 139         if sys.maxunicode > 0xffff:
 140             sin += unichr(sys.maxunicode)
 141         sout = "a\\xac\\u1234\\u20ac\\u8000"
 142         if sys.maxunicode > 0xffff:
 143             sout += "\\U%08x" % sys.maxunicode
 144         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
 145
 146         sout = "a\xac\\u1234\\u20ac\\u8000"
 147         if sys.maxunicode > 0xffff:
 148             sout += "\\U%08x" % sys.maxunicode
 149         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
 150
 151         sout = "a\xac\\u1234\xa4\\u8000"
 152         if sys.maxunicode > 0xffff:
 153             sout += "\\U%08x" % sys.maxunicode
 154         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
 155
 156     def test_decoderelaxedutf8(self):
 157         # This is the test for a decoding callback handler,
 158         # that relaxes the UTF-8 minimal encoding restriction.
 159         # A null byte that is encoded as "\xc0\x80" will be
 160         # decoded as a null byte. All other illegal sequences
 161         # will be handled strictly.
 162         def relaxedutf8(exc):
 163             if not isinstance(exc, UnicodeDecodeError):
 164                 raise TypeError("don't know how to handle %r" % exc)
 165             if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
 166                 return (u"\x00", exc.start+2) # retry after two bytes
 167             else:
 168                 raise exc
 169
 170         codecs.register_error(
 171             "test.relaxedutf8", relaxedutf8)
 172
 173         sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
 174         sout = u"a\x00b\x00c\xfc\x00\x00"
 175         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
 176         sin = "\xc0\x80\xc0\x81"
 177         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
 178
 179     def test_charmapencode(self):
 180         # For charmap encodings the replacement string will be
 181         # mapped through the encoding again. This means, that
 182         # to be able to use e.g. the "replace" handler, the
 183         # charmap has to have a mapping for "?".
 184         charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
 185         sin = u"abc"
 186         sout = "AABBCC"
 187         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
 188
 189         sin = u"abcA"
 190         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
 191
 192         charmap[ord("?")] = "XYZ"
 193         sin = u"abcDEF"
 194         sout = "AABBCCXYZXYZXYZ"
 195         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
 196
 197         charmap[ord("?")] = u"XYZ"
 198         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
 199
 200         charmap[ord("?")] = u"XYZ"
 201         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
 202
 203     def test_decodeunicodeinternal(self):
 204         self.assertRaises(
 205             UnicodeDecodeError,
 206             "\x00\x00\x00\x00\x00".decode,
 207             "unicode-internal",
 208         )
 209         if sys.maxunicode > 0xffff:
 210             def handler_unicodeinternal(exc):
 211                 if not isinstance(exc, UnicodeDecodeError):
 212                     raise TypeError("don't know how to handle %r" % exc)
 213                 return (u"\x01", 1)
 214
 215             self.assertEqual(
 216                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
 217                 u"\u0000"
 218             )
 219
 220             self.assertEqual(
 221                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
 222                 u"\u0000\ufffd"
 223             )
 224
 225             codecs.register_error("test.hui", handler_unicodeinternal)
 226
 227             self.assertEqual(
 228                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
 229                 u"\u0000\u0001\u0000"
 230             )
 231
 232     def test_callbacks(self):
 233         def handler1(exc):
 234             if not isinstance(exc, UnicodeEncodeError) \
 235                and not isinstance(exc, UnicodeDecodeError):
 236                 raise TypeError("don't know how to handle %r" % exc)
 237             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
 238             return (u"[%s]" % u"".join(l), exc.end)
 239
 240         codecs.register_error("test.handler1", handler1)
 241
 242         def handler2(exc):
 243             if not isinstance(exc, UnicodeDecodeError):
 244                 raise TypeError("don't know how to handle %r" % exc)
 245             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
 246             return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
 247
 248         codecs.register_error("test.handler2", handler2)
 249
 250         s = "\x00\x81\x7f\x80\xff"
 251
 252         self.assertEqual(
 253             s.decode("ascii", "test.handler1"),
 254             u"\x00[<129>]\x7f[<128>][<255>]"
 255         )
 256         self.assertEqual(
 257             s.decode("ascii", "test.handler2"),
 258             u"\x00[<129>][<128>]"
 259         )
 260
 261         self.assertEqual(
 262             "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
 263             u"\u3042[<92><117><51><120>]xx"
 264         )
 265
 266         self.assertEqual(
 267             "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
 268             u"\u3042[<92><117><51><120><120>]"
 269         )
 270
 271         self.assertEqual(
 272             codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
 273             u"z[<98>][<99>]"
 274         )
 275
 276         self.assertEqual(
 277             u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
 278             u"g[<252><223>]rk"
 279         )
 280
 281         self.assertEqual(
 282             u"g\xfc\xdf".encode("ascii", "test.handler1"),
 283             u"g[<252><223>]"
 284         )
 285
 286     def test_longstrings(self):
 287         # test long strings to check for memory overflow problems
 288         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
 289                    "backslashreplace"]
 290         # register the handlers under different names,
 291         # to prevent the codec from recognizing the name
 292         for err in errors:
 293             codecs.register_error("test." + err, codecs.lookup_error(err))
 294         l = 1000
 295         errors += [ "test." + err for err in errors ]
 296         for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
 297             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
 298                         "utf-8", "utf-7", "utf-16", "utf-32"):
 299                 for err in errors:
 300                     try:
 301                         uni.encode(enc, err)
 302                     except UnicodeError:
 303                         pass
 304
 305     def check_exceptionobjectargs(self, exctype, args, msg):
 306         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
 307         # check with one missing argument
 308         self.assertRaises(TypeError, exctype, *args[:-1])
 309         # check with one argument too much
 310         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
 311         # check with one argument of the wrong type
 312         wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
 313         for i in xrange(len(args)):
 314             for wrongarg in wrongargs:
 315                 if type(wrongarg) is type(args[i]):
 316                     continue
 317                 # build argument array
 318                 callargs = []
 319                 for j in xrange(len(args)):
 320                     if i==j:
 321                         callargs.append(wrongarg)
 322                     else:
 323                         callargs.append(args[i])
 324                 self.assertRaises(TypeError, exctype, *callargs)
 325
 326         # check with the correct number and type of arguments
 327         exc = exctype(*args)
 328         self.assertEquals(str(exc), msg)
 329
 330     def test_unicodeencodeerror(self):
 331         self.check_exceptionobjectargs(
 332             UnicodeEncodeError,
 333             ["ascii", u"g\xfcrk", 1, 2, "ouch"],
 334             "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
 335         )
 336         self.check_exceptionobjectargs(
 337             UnicodeEncodeError,
 338             ["ascii", u"g\xfcrk", 1, 4, "ouch"],
 339             "'ascii' codec can't encode characters in position 1-3: ouch"
 340         )
 341         self.check_exceptionobjectargs(
 342             UnicodeEncodeError,
 343             ["ascii", u"\xfcx", 0, 1, "ouch"],
 344             "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
 345         )
 346         self.check_exceptionobjectargs(
 347             UnicodeEncodeError,
 348             ["ascii", u"\u0100x", 0, 1, "ouch"],
 349             "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
 350         )
 351         self.check_exceptionobjectargs(
 352             UnicodeEncodeError,
 353             ["ascii", u"\uffffx", 0, 1, "ouch"],
 354             "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
 355         )
 356         if sys.maxunicode > 0xffff:
 357             self.check_exceptionobjectargs(
 358                 UnicodeEncodeError,
 359                 ["ascii", u"\U00010000x", 0, 1, "ouch"],
 360                 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
 361             )
 362
 363     def test_unicodedecodeerror(self):
 364         self.check_exceptionobjectargs(
 365             UnicodeDecodeError,
 366             ["ascii", "g\xfcrk", 1, 2, "ouch"],
 367             "'ascii' codec can't decode byte 0xfc in position 1: ouch"
 368         )
 369         self.check_exceptionobjectargs(
 370             UnicodeDecodeError,
 371             ["ascii", "g\xfcrk", 1, 3, "ouch"],
 372             "'ascii' codec can't decode bytes in position 1-2: ouch"
 373         )
 374
 375     def test_unicodetranslateerror(self):
 376         self.check_exceptionobjectargs(
 377             UnicodeTranslateError,
 378             [u"g\xfcrk", 1, 2, "ouch"],
 379             "can't translate character u'\\xfc' in position 1: ouch"
 380         )
 381         self.check_exceptionobjectargs(
 382             UnicodeTranslateError,
 383             [u"g\u0100rk", 1, 2, "ouch"],
 384             "can't translate character u'\\u0100' in position 1: ouch"
 385         )
 386         self.check_exceptionobjectargs(
 387             UnicodeTranslateError,
 388             [u"g\uffffrk", 1, 2, "ouch"],
 389             "can't translate character u'\\uffff' in position 1: ouch"
 390         )
 391         if sys.maxunicode > 0xffff:
 392             self.check_exceptionobjectargs(
 393                 UnicodeTranslateError,
 394                 [u"g\U00010000rk", 1, 2, "ouch"],
 395                 "can't translate character u'\\U00010000' in position 1: ouch"
 396             )
 397         self.check_exceptionobjectargs(
 398             UnicodeTranslateError,
 399             [u"g\xfcrk", 1, 3, "ouch"],
 400             "can't translate characters in position 1-2: ouch"
 401         )
 402
 403     def test_badandgoodstrictexceptions(self):
 404         # "strict" complains about a non-exception passed in
 405         self.assertRaises(
 406             TypeError,
 407             codecs.strict_errors,
 408             42
 409         )
 410         # "strict" complains about the wrong exception type
 411         self.assertRaises(
 412             Exception,
 413             codecs.strict_errors,
 414             Exception("ouch")
 415         )
 416
 417         # If the correct exception is passed in, "strict" raises it
 418         self.assertRaises(
 419             UnicodeEncodeError,
 420             codecs.strict_errors,
 421             UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
 422         )
 423
 424     def test_badandgoodignoreexceptions(self):
 425         # "ignore" complains about a non-exception passed in
 426         self.assertRaises(
 427            TypeError,
 428            codecs.ignore_errors,
 429            42
 430         )
 431         # "ignore" complains about the wrong exception type
 432         self.assertRaises(
 433            TypeError,
 434            codecs.ignore_errors,
 435            UnicodeError("ouch")
 436         )
 437         # If the correct exception is passed in, "ignore" returns an empty replacement
 438         self.assertEquals(
 439             codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
 440             (u"", 1)
 441         )
 442         self.assertEquals(
 443             codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
 444             (u"", 1)
 445         )
 446         self.assertEquals(
 447             codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
 448             (u"", 1)
 449         )
 450
 451     def test_badandgoodreplaceexceptions(self):
 452         # "replace" complains about a non-exception passed in
 453         self.assertRaises(
 454            TypeError,
 455            codecs.replace_errors,
 456            42
 457         )
 458         # "replace" complains about the wrong exception type
 459         self.assertRaises(
 460            TypeError,
 461            codecs.replace_errors,
 462            UnicodeError("ouch")
 463         )
 464         self.assertRaises(
 465             TypeError,
 466             codecs.replace_errors,
 467             BadObjectUnicodeEncodeError()
 468         )
 469         self.assertRaises(
 470             TypeError,
 471             codecs.replace_errors,
 472             BadObjectUnicodeDecodeError()
 473         )
 474         # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
 475         self.assertEquals(
 476             codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
 477             (u"?", 1)
 478         )
 479         self.assertEquals(
 480             codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
 481             (u"\ufffd", 1)
 482         )
 483         self.assertEquals(
 484             codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
 485             (u"\ufffd", 1)
 486         )
 487
 488     def test_badandgoodxmlcharrefreplaceexceptions(self):
 489         # "xmlcharrefreplace" complains about a non-exception passed in
 490         self.assertRaises(
 491            TypeError,
 492            codecs.xmlcharrefreplace_errors,
 493            42
 494         )
 495         # "xmlcharrefreplace" complains about the wrong exception types
 496         self.assertRaises(
 497            TypeError,
 498            codecs.xmlcharrefreplace_errors,
 499            UnicodeError("ouch")
 500         )
 501         # "xmlcharrefreplace" can only be used for encoding
 502         self.assertRaises(
 503             TypeError,
 504             codecs.xmlcharrefreplace_errors,
 505             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
 506         )
 507         self.assertRaises(
 508             TypeError,
 509             codecs.xmlcharrefreplace_errors,
 510             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
 511         )
 512         # Use the correct exception
 513         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
 514         s = "".join(unichr(c) for c in cs)
 515         self.assertEquals(
 516             codecs.xmlcharrefreplace_errors(
 517                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
 518             ),
 519             (u"".join(u"&#%d;" % ord(c) for c in s), len(s))
 520         )
 521
 522     def test_badandgoodbackslashreplaceexceptions(self):
 523         # "backslashreplace" complains about a non-exception passed in
 524         self.assertRaises(
 525            TypeError,
 526            codecs.backslashreplace_errors,
 527            42
 528         )
 529         # "backslashreplace" complains about the wrong exception types
 530         self.assertRaises(
 531            TypeError,
 532            codecs.backslashreplace_errors,
 533            UnicodeError("ouch")
 534         )
 535         # "backslashreplace" can only be used for encoding
 536         self.assertRaises(
 537             TypeError,
 538             codecs.backslashreplace_errors,
 539             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
 540         )
 541         self.assertRaises(
 542             TypeError,
 543             codecs.backslashreplace_errors,
 544             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
 545         )
 546         # Use the correct exception
 547         self.assertEquals(
 548             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
 549             (u"\\u3042", 1)
 550         )
 551         self.assertEquals(
 552             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
 553             (u"\\x00", 1)
 554         )
 555         self.assertEquals(
 556             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
 557             (u"\\xff", 1)
 558         )
 559         self.assertEquals(
 560             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
 561             (u"\\u0100", 1)
 562         )
 563         self.assertEquals(
 564             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
 565             (u"\\uffff", 1)
 566         )
 567         if sys.maxunicode>0xffff:
 568             self.assertEquals(
 569                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
 570                 (u"\\U00010000", 1)
 571             )
 572             self.assertEquals(
 573                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
 574                 (u"\\U0010ffff", 1)
 575             )
 576
 577     def test_badhandlerresults(self):
 578         results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
 579         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
 580
 581         for res in results:
 582             codecs.register_error("test.badhandler", lambda x: res)
 583             for enc in encs:
 584                 self.assertRaises(
 585                     TypeError,
 586                     u"\u3042".encode,
 587                     enc,
 588                     "test.badhandler"
 589                 )
 590             for (enc, bytes) in (
 591                 ("ascii", "\xff"),
 592                 ("utf-8", "\xff"),
 593                 ("utf-7", "+x-"),
 594                 ("unicode-internal", "\x00"),
 595             ):
 596                 self.assertRaises(
 597                     TypeError,
 598                     bytes.decode,
 599                     enc,
 600                     "test.badhandler"
 601                 )
 602
 603     def test_lookup(self):
 604         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
 605         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
 606         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
 607         self.assertEquals(
 608             codecs.xmlcharrefreplace_errors,
 609             codecs.lookup_error("xmlcharrefreplace")
 610         )
 611         self.assertEquals(
 612             codecs.backslashreplace_errors,
 613             codecs.lookup_error("backslashreplace")
 614         )
 615
 616     def test_unencodablereplacement(self):
 617         def unencrepl(exc):
 618             if isinstance(exc, UnicodeEncodeError):
 619                 return (u"\u4242", exc.end)
 620             else:
 621                 raise TypeError("don't know how to handle %r" % exc)
 622         codecs.register_error("test.unencreplhandler", unencrepl)
 623         for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
 624             self.assertRaises(
 625                 UnicodeEncodeError,
 626                 u"\u4242".encode,
 627                 enc,
 628                 "test.unencreplhandler"
 629             )
 630
 631     def test_badregistercall(self):
 632         # enhance coverage of:
 633         # Modules/_codecsmodule.c::register_error()
 634         # Python/codecs.c::PyCodec_RegisterError()
 635         self.assertRaises(TypeError, codecs.register_error, 42)
 636         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
 637
 638     def test_badlookupcall(self):
 639         # enhance coverage of:
 640         # Modules/_codecsmodule.c::lookup_error()
 641         self.assertRaises(TypeError, codecs.lookup_error)
 642
 643     def test_unknownhandler(self):
 644         # enhance coverage of:
 645         # Modules/_codecsmodule.c::lookup_error()
 646         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
 647
 648     def test_xmlcharrefvalues(self):
 649         # enhance coverage of:
 650         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
 651         # and inline implementations
 652         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
 653         if sys.maxunicode>=100000:
 654             v += (100000, 500000, 1000000)
 655         s = u"".join([unichr(x) for x in v])
 656         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
 657         for enc in ("ascii", "iso-8859-15"):
 658             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
 659                 s.encode(enc, err)
 660
 661     def test_decodehelper(self):
 662         # enhance coverage of:
 663         # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
 664         # and callers
 665         self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
 666
 667         def baddecodereturn1(exc):
 668             return 42
 669         codecs.register_error("test.baddecodereturn1", baddecodereturn1)
 670         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
 671         self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
 672         self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
 673         self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
 674         self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
 675         self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
 676
 677         def baddecodereturn2(exc):
 678             return (u"?", None)
 679         codecs.register_error("test.baddecodereturn2", baddecodereturn2)
 680         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
 681
 682         handler = PosReturn()
 683         codecs.register_error("test.posreturn", handler.handle)
 684
 685         # Valid negative position
 686         handler.pos = -1
 687         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
 688
 689         # Valid negative position
 690         handler.pos = -2
 691         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
 692
 693         # Negative position out of bounds
 694         handler.pos = -3
 695         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
 696
 697         # Valid positive position
 698         handler.pos = 1
 699         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
 700
 701         # Largest valid positive position (one beyond end of input)
 702         handler.pos = 2
 703         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
 704
 705         # Invalid positive position
 706         handler.pos = 3
 707         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
 708
 709         # Restart at the "0"
 710         handler.pos = 6
 711         self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
 712
 713         class D(dict):
 714             def __getitem__(self, key):
 715                 raise ValueError
 716         self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
 717         self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
 718         self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
 719
 720     def test_encodehelper(self):
 721         # enhance coverage of:
 722         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
 723         # and callers
 724         self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
 725
 726         def badencodereturn1(exc):
 727             return 42
 728         codecs.register_error("test.badencodereturn1", badencodereturn1)
 729         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
 730
 731         def badencodereturn2(exc):
 732             return (u"?", None)
 733         codecs.register_error("test.badencodereturn2", badencodereturn2)
 734         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
 735
 736         handler = PosReturn()
 737         codecs.register_error("test.posreturn", handler.handle)
 738
 739         # Valid negative position
 740         handler.pos = -1
 741         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
 742
 743         # Valid negative position
 744         handler.pos = -2
 745         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
 746
 747         # Negative position out of bounds
 748         handler.pos = -3
 749         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
 750
 751         # Valid positive position
 752         handler.pos = 1
 753         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
 754
 755         # Largest valid positive position (one beyond end of input
 756         handler.pos = 2
 757         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
 758
 759         # Invalid positive position
 760         handler.pos = 3
 761         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
 762
 763         handler.pos = 0
 764
 765         class D(dict):
 766             def __getitem__(self, key):
 767                 raise ValueError
 768         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
 769             self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
 770             self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
 771             self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
 772
 773     def test_translatehelper(self):
 774         # enhance coverage of:
 775         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
 776         # and callers
 777         # (Unfortunately the errors argument is not directly accessible
 778         # from Python, so we can't test that much)
 779         class D(dict):
 780             def __getitem__(self, key):
 781                 raise ValueError
 782         self.assertRaises(ValueError, u"\xff".translate, D())
 783         self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
 784         self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
 785
 786     def test_bug828737(self):
 787         charmap = {
 788             ord("&"): u"&amp;",
 789             ord("<"): u"&lt;",
 790             ord(">"): u"&gt;",
 791             ord('"'): u"&quot;",
 792         }
 793
 794         for n in (1, 10, 100, 1000):
 795             text = u'abc<def>ghi'*n
 796             text.translate(charmap)
 797
 798 def test_main():
 799     test.test_support.run_unittest(CodecCallbackTest)
 800
 801 if __name__ == "__main__":
 802     test_main()