1 import test
.test_support
, unittest
2 import sys
, codecs
, htmlentitydefs
, unicodedata
5 # this can be used for configurable callbacks
10 def handle(self
, exc
):
14 realpos
= len(exc
.object) + realpos
15 # if we don't advance this time, terminate on the next call
16 # otherwise we'd get an endless loop
17 if realpos
<= exc
.start
:
18 self
.pos
= len(exc
.object)
19 return (u
"<?>", oldpos
)
21 # A UnicodeEncodeError object with a bad start attribute
22 class BadStartUnicodeEncodeError(UnicodeEncodeError):
24 UnicodeEncodeError.__init
__(self
, "ascii", u
"", 0, 1, "bad")
27 # A UnicodeEncodeError object with a bad object attribute
28 class BadObjectUnicodeEncodeError(UnicodeEncodeError):
30 UnicodeEncodeError.__init
__(self
, "ascii", u
"", 0, 1, "bad")
33 # A UnicodeDecodeError object without an end attribute
34 class NoEndUnicodeDecodeError(UnicodeDecodeError):
36 UnicodeDecodeError.__init
__(self
, "ascii", "", 0, 1, "bad")
39 # A UnicodeDecodeError object with a bad object attribute
40 class BadObjectUnicodeDecodeError(UnicodeDecodeError):
42 UnicodeDecodeError.__init
__(self
, "ascii", "", 0, 1, "bad")
45 # A UnicodeTranslateError object without a start attribute
46 class NoStartUnicodeTranslateError(UnicodeTranslateError):
48 UnicodeTranslateError.__init
__(self
, u
"", 0, 1, "bad")
51 # A UnicodeTranslateError object without an end attribute
52 class NoEndUnicodeTranslateError(UnicodeTranslateError):
54 UnicodeTranslateError.__init
__(self
, u
"", 0, 1, "bad")
57 # A UnicodeTranslateError object without an object attribute
58 class NoObjectUnicodeTranslateError(UnicodeTranslateError):
60 UnicodeTranslateError.__init
__(self
, u
"", 0, 1, "bad")
63 class CodecCallbackTest(unittest
.TestCase
):
65 def test_xmlcharrefreplace(self
):
66 # replace unencodable characters which numeric character entities.
67 # For ascii, latin-1 and charmaps this is completely implemented
68 # in C and should be reasonably fast.
69 s
= u
"\u30b9\u30d1\u30e2 \xe4nd eggs"
71 s
.encode("ascii", "xmlcharrefreplace"),
72 "スパモ änd eggs"
75 s
.encode("latin-1", "xmlcharrefreplace"),
76 "スパモ \xe4nd eggs"
79 def test_xmlcharnamereplace(self
):
80 # This time use a named character entity for unencodable
81 # characters, if one is available.
83 def xmlcharnamereplace(exc
):
84 if not isinstance(exc
, UnicodeEncodeError):
85 raise TypeError("don't know how to handle %r" % exc
)
87 for c
in exc
.object[exc
.start
:exc
.end
]:
89 l
.append(u
"&%s;" % htmlentitydefs
.codepoint2name
[ord(c
)])
91 l
.append(u
"&#%d;" % ord(c
))
92 return (u
"".join(l
), exc
.end
)
94 codecs
.register_error(
95 "test.xmlcharnamereplace", xmlcharnamereplace
)
97 sin
= u
"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
98 sout
= "«ℜ» = ⟨ሴ€⟩"
99 self
.assertEqual(sin
.encode("ascii", "test.xmlcharnamereplace"), sout
)
100 sout
= "\xabℜ\xbb = ⟨ሴ€⟩"
101 self
.assertEqual(sin
.encode("latin-1", "test.xmlcharnamereplace"), sout
)
102 sout
= "\xabℜ\xbb = ⟨ሴ\xa4⟩"
103 self
.assertEqual(sin
.encode("iso-8859-15", "test.xmlcharnamereplace"), sout
)
105 def test_uninamereplace(self
):
106 # We're using the names from the unicode database this time,
107 # and we're doing "syntax highlighting" here, i.e. we include
108 # the replaced text in ANSI escape sequences. For this it is
109 # useful that the error handler is not called for every single
110 # unencodable character, but for a complete sequence of
111 # unencodable characters, otherwise we would output many
112 # unnecessary escape sequences.
114 def uninamereplace(exc
):
115 if not isinstance(exc
, UnicodeEncodeError):
116 raise TypeError("don't know how to handle %r" % exc
)
118 for c
in exc
.object[exc
.start
:exc
.end
]:
119 l
.append(unicodedata
.name(c
, u
"0x%x" % ord(c
)))
120 return (u
"\033[1m%s\033[0m" % u
", ".join(l
), exc
.end
)
122 codecs
.register_error(
123 "test.uninamereplace", uninamereplace
)
125 sin
= u
"\xac\u1234\u20ac\u8000"
126 sout
= "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
127 self
.assertEqual(sin
.encode("ascii", "test.uninamereplace"), sout
)
129 sout
= "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
130 self
.assertEqual(sin
.encode("latin-1", "test.uninamereplace"), sout
)
132 sout
= "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
133 self
.assertEqual(sin
.encode("iso-8859-15", "test.uninamereplace"), sout
)
135 def test_backslashescape(self
):
136 # Does the same as the "unicode-escape" encoding, but with different
138 sin
= u
"a\xac\u1234\u20ac\u8000"
139 if sys
.maxunicode
> 0xffff:
140 sin
+= unichr(sys
.maxunicode
)
141 sout
= "a\\xac\\u1234\\u20ac\\u8000"
142 if sys
.maxunicode
> 0xffff:
143 sout
+= "\\U%08x" % sys
.maxunicode
144 self
.assertEqual(sin
.encode("ascii", "backslashreplace"), sout
)
146 sout
= "a\xac\\u1234\\u20ac\\u8000"
147 if sys
.maxunicode
> 0xffff:
148 sout
+= "\\U%08x" % sys
.maxunicode
149 self
.assertEqual(sin
.encode("latin-1", "backslashreplace"), sout
)
151 sout
= "a\xac\\u1234\xa4\\u8000"
152 if sys
.maxunicode
> 0xffff:
153 sout
+= "\\U%08x" % sys
.maxunicode
154 self
.assertEqual(sin
.encode("iso-8859-15", "backslashreplace"), sout
)
156 def test_decoding_callbacks(self
):
157 # This is a test for a decoding callback handler
158 # that allows the decoding of the invalid sequence
159 # "\xc0\x80" and returns "\x00" instead of raising an error.
160 # All other illegal sequences will be handled strictly.
161 def relaxedutf8(exc
):
162 if not isinstance(exc
, UnicodeDecodeError):
163 raise TypeError("don't know how to handle %r" % exc
)
164 if exc
.object[exc
.start
:exc
.start
+2] == "\xc0\x80":
165 return (u
"\x00", exc
.start
+2) # retry after two bytes
169 codecs
.register_error("test.relaxedutf8", relaxedutf8
)
171 # all the "\xc0\x80" will be decoded to "\x00"
172 sin
= "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
173 sout
= u
"a\x00b\x00c\xfc\x00\x00"
174 self
.assertEqual(sin
.decode("utf-8", "test.relaxedutf8"), sout
)
176 # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised
177 sin
= "\xc0\x80\xc0\x81"
178 self
.assertRaises(UnicodeDecodeError, sin
.decode
,
179 "utf-8", "test.relaxedutf8")
181 def test_charmapencode(self
):
182 # For charmap encodings the replacement string will be
183 # mapped through the encoding again. This means, that
184 # to be able to use e.g. the "replace" handler, the
185 # charmap has to have a mapping for "?".
186 charmap
= dict([ (ord(c
), 2*c
.upper()) for c
in "abcdefgh"])
189 self
.assertEquals(codecs
.charmap_encode(sin
, "strict", charmap
)[0], sout
)
192 self
.assertRaises(UnicodeError, codecs
.charmap_encode
, sin
, "strict", charmap
)
194 charmap
[ord("?")] = "XYZ"
196 sout
= "AABBCCXYZXYZXYZ"
197 self
.assertEquals(codecs
.charmap_encode(sin
, "replace", charmap
)[0], sout
)
199 charmap
[ord("?")] = u
"XYZ"
200 self
.assertRaises(TypeError, codecs
.charmap_encode
, sin
, "replace", charmap
)
202 charmap
[ord("?")] = u
"XYZ"
203 self
.assertRaises(TypeError, codecs
.charmap_encode
, sin
, "replace", charmap
)
205 def test_decodeunicodeinternal(self
):
208 "\x00\x00\x00\x00\x00".decode
,
211 if sys
.maxunicode
> 0xffff:
212 def handler_unicodeinternal(exc
):
213 if not isinstance(exc
, UnicodeDecodeError):
214 raise TypeError("don't know how to handle %r" % exc
)
218 "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
223 "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
227 codecs
.register_error("test.hui", handler_unicodeinternal
)
230 "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
231 u
"\u0000\u0001\u0000"
234 def test_callbacks(self
):
236 if not isinstance(exc
, UnicodeEncodeError) \
237 and not isinstance(exc
, UnicodeDecodeError):
238 raise TypeError("don't know how to handle %r" % exc
)
239 l
= [u
"<%d>" % ord(exc
.object[pos
]) for pos
in xrange(exc
.start
, exc
.end
)]
240 return (u
"[%s]" % u
"".join(l
), exc
.end
)
242 codecs
.register_error("test.handler1", handler1
)
245 if not isinstance(exc
, UnicodeDecodeError):
246 raise TypeError("don't know how to handle %r" % exc
)
247 l
= [u
"<%d>" % ord(exc
.object[pos
]) for pos
in xrange(exc
.start
, exc
.end
)]
248 return (u
"[%s]" % u
"".join(l
), exc
.end
+1) # skip one character
250 codecs
.register_error("test.handler2", handler2
)
252 s
= "\x00\x81\x7f\x80\xff"
255 s
.decode("ascii", "test.handler1"),
256 u
"\x00[<129>]\x7f[<128>][<255>]"
259 s
.decode("ascii", "test.handler2"),
260 u
"\x00[<129>][<128>]"
264 "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
265 u
"\u3042[<92><117><51><120>]xx"
269 "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
270 u
"\u3042[<92><117><51><120><120>]"
274 codecs
.charmap_decode("abc", "test.handler1", {ord("a"): u
"z"})[0],
279 u
"g\xfc\xdfrk".encode("ascii", "test.handler1"),
284 u
"g\xfc\xdf".encode("ascii", "test.handler1"),
288 def test_longstrings(self
):
289 # test long strings to check for memory overflow problems
290 errors
= [ "strict", "ignore", "replace", "xmlcharrefreplace",
292 # register the handlers under different names,
293 # to prevent the codec from recognizing the name
295 codecs
.register_error("test." + err
, codecs
.lookup_error(err
))
297 errors
+= [ "test." + err
for err
in errors
]
298 for uni
in [ s
*l
for s
in (u
"x", u
"\u3042", u
"a\xe4") ]:
299 for enc
in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
300 "utf-8", "utf-7", "utf-16", "utf-32"):
307 def check_exceptionobjectargs(self
, exctype
, args
, msg
):
308 # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
309 # check with one missing argument
310 self
.assertRaises(TypeError, exctype
, *args
[:-1])
311 # check with one argument too much
312 self
.assertRaises(TypeError, exctype
, *(args
+ ["too much"]))
313 # check with one argument of the wrong type
314 wrongargs
= [ "spam", u
"eggs", 42, 1.0, None ]
315 for i
in xrange(len(args
)):
316 for wrongarg
in wrongargs
:
317 if type(wrongarg
) is type(args
[i
]):
319 # build argument array
321 for j
in xrange(len(args
)):
323 callargs
.append(wrongarg
)
325 callargs
.append(args
[i
])
326 self
.assertRaises(TypeError, exctype
, *callargs
)
328 # check with the correct number and type of arguments
330 self
.assertEquals(str(exc
), msg
)
332 def test_unicodeencodeerror(self
):
333 self
.check_exceptionobjectargs(
335 ["ascii", u
"g\xfcrk", 1, 2, "ouch"],
336 "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
338 self
.check_exceptionobjectargs(
340 ["ascii", u
"g\xfcrk", 1, 4, "ouch"],
341 "'ascii' codec can't encode characters in position 1-3: ouch"
343 self
.check_exceptionobjectargs(
345 ["ascii", u
"\xfcx", 0, 1, "ouch"],
346 "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
348 self
.check_exceptionobjectargs(
350 ["ascii", u
"\u0100x", 0, 1, "ouch"],
351 "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
353 self
.check_exceptionobjectargs(
355 ["ascii", u
"\uffffx", 0, 1, "ouch"],
356 "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
358 if sys
.maxunicode
> 0xffff:
359 self
.check_exceptionobjectargs(
361 ["ascii", u
"\U00010000x", 0, 1, "ouch"],
362 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
365 def test_unicodedecodeerror(self
):
366 self
.check_exceptionobjectargs(
368 ["ascii", "g\xfcrk", 1, 2, "ouch"],
369 "'ascii' codec can't decode byte 0xfc in position 1: ouch"
371 self
.check_exceptionobjectargs(
373 ["ascii", "g\xfcrk", 1, 3, "ouch"],
374 "'ascii' codec can't decode bytes in position 1-2: ouch"
377 def test_unicodetranslateerror(self
):
378 self
.check_exceptionobjectargs(
379 UnicodeTranslateError,
380 [u
"g\xfcrk", 1, 2, "ouch"],
381 "can't translate character u'\\xfc' in position 1: ouch"
383 self
.check_exceptionobjectargs(
384 UnicodeTranslateError,
385 [u
"g\u0100rk", 1, 2, "ouch"],
386 "can't translate character u'\\u0100' in position 1: ouch"
388 self
.check_exceptionobjectargs(
389 UnicodeTranslateError,
390 [u
"g\uffffrk", 1, 2, "ouch"],
391 "can't translate character u'\\uffff' in position 1: ouch"
393 if sys
.maxunicode
> 0xffff:
394 self
.check_exceptionobjectargs(
395 UnicodeTranslateError,
396 [u
"g\U00010000rk", 1, 2, "ouch"],
397 "can't translate character u'\\U00010000' in position 1: ouch"
399 self
.check_exceptionobjectargs(
400 UnicodeTranslateError,
401 [u
"g\xfcrk", 1, 3, "ouch"],
402 "can't translate characters in position 1-2: ouch"
405 def test_badandgoodstrictexceptions(self
):
406 # "strict" complains about a non-exception passed in
409 codecs
.strict_errors
,
412 # "strict" complains about the wrong exception type
415 codecs
.strict_errors
,
419 # If the correct exception is passed in, "strict" raises it
422 codecs
.strict_errors
,
423 UnicodeEncodeError("ascii", u
"\u3042", 0, 1, "ouch")
426 def test_badandgoodignoreexceptions(self
):
427 # "ignore" complains about a non-exception passed in
430 codecs
.ignore_errors
,
433 # "ignore" complains about the wrong exception type
436 codecs
.ignore_errors
,
439 # If the correct exception is passed in, "ignore" returns an empty replacement
441 codecs
.ignore_errors(UnicodeEncodeError("ascii", u
"\u3042", 0, 1, "ouch")),
445 codecs
.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
449 codecs
.ignore_errors(UnicodeTranslateError(u
"\u3042", 0, 1, "ouch")),
453 def test_badandgoodreplaceexceptions(self
):
454 # "replace" complains about a non-exception passed in
457 codecs
.replace_errors
,
460 # "replace" complains about the wrong exception type
463 codecs
.replace_errors
,
468 codecs
.replace_errors
,
469 BadObjectUnicodeEncodeError()
473 codecs
.replace_errors
,
474 BadObjectUnicodeDecodeError()
476 # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
478 codecs
.replace_errors(UnicodeEncodeError("ascii", u
"\u3042", 0, 1, "ouch")),
482 codecs
.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
486 codecs
.replace_errors(UnicodeTranslateError(u
"\u3042", 0, 1, "ouch")),
490 def test_badandgoodxmlcharrefreplaceexceptions(self
):
491 # "xmlcharrefreplace" complains about a non-exception passed in
494 codecs
.xmlcharrefreplace_errors
,
497 # "xmlcharrefreplace" complains about the wrong exception types
500 codecs
.xmlcharrefreplace_errors
,
503 # "xmlcharrefreplace" can only be used for encoding
506 codecs
.xmlcharrefreplace_errors
,
507 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
511 codecs
.xmlcharrefreplace_errors
,
512 UnicodeTranslateError(u
"\u3042", 0, 1, "ouch")
514 # Use the correct exception
515 cs
= (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
516 s
= "".join(unichr(c
) for c
in cs
)
518 codecs
.xmlcharrefreplace_errors(
519 UnicodeEncodeError("ascii", s
, 0, len(s
), "ouch")
521 (u
"".join(u
"&#%d;" % ord(c
) for c
in s
), len(s
))
524 def test_badandgoodbackslashreplaceexceptions(self
):
525 # "backslashreplace" complains about a non-exception passed in
528 codecs
.backslashreplace_errors
,
531 # "backslashreplace" complains about the wrong exception types
534 codecs
.backslashreplace_errors
,
537 # "backslashreplace" can only be used for encoding
540 codecs
.backslashreplace_errors
,
541 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
545 codecs
.backslashreplace_errors
,
546 UnicodeTranslateError(u
"\u3042", 0, 1, "ouch")
548 # Use the correct exception
550 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\u3042", 0, 1, "ouch")),
554 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\x00", 0, 1, "ouch")),
558 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\xff", 0, 1, "ouch")),
562 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\u0100", 0, 1, "ouch")),
566 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\uffff", 0, 1, "ouch")),
569 if sys
.maxunicode
>0xffff:
571 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\U00010000", 0, 1, "ouch")),
575 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\U0010ffff", 0, 1, "ouch")),
579 def test_badhandlerresults(self
):
580 results
= ( 42, u
"foo", (1,2,3), (u
"foo", 1, 3), (u
"foo", None), (u
"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
581 encs
= ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
584 codecs
.register_error("test.badhandler", lambda x
: res
)
592 for (enc
, bytes
) in (
596 ("unicode-internal", "\x00"),
605 def test_lookup(self
):
606 self
.assertEquals(codecs
.strict_errors
, codecs
.lookup_error("strict"))
607 self
.assertEquals(codecs
.ignore_errors
, codecs
.lookup_error("ignore"))
608 self
.assertEquals(codecs
.strict_errors
, codecs
.lookup_error("strict"))
610 codecs
.xmlcharrefreplace_errors
,
611 codecs
.lookup_error("xmlcharrefreplace")
614 codecs
.backslashreplace_errors
,
615 codecs
.lookup_error("backslashreplace")
618 def test_unencodablereplacement(self
):
620 if isinstance(exc
, UnicodeEncodeError):
621 return (u
"\u4242", exc
.end
)
623 raise TypeError("don't know how to handle %r" % exc
)
624 codecs
.register_error("test.unencreplhandler", unencrepl
)
625 for enc
in ("ascii", "iso-8859-1", "iso-8859-15"):
630 "test.unencreplhandler"
633 def test_badregistercall(self
):
634 # enhance coverage of:
635 # Modules/_codecsmodule.c::register_error()
636 # Python/codecs.c::PyCodec_RegisterError()
637 self
.assertRaises(TypeError, codecs
.register_error
, 42)
638 self
.assertRaises(TypeError, codecs
.register_error
, "test.dummy", 42)
640 def test_badlookupcall(self
):
641 # enhance coverage of:
642 # Modules/_codecsmodule.c::lookup_error()
643 self
.assertRaises(TypeError, codecs
.lookup_error
)
645 def test_unknownhandler(self
):
646 # enhance coverage of:
647 # Modules/_codecsmodule.c::lookup_error()
648 self
.assertRaises(LookupError, codecs
.lookup_error
, "test.unknown")
650 def test_xmlcharrefvalues(self
):
651 # enhance coverage of:
652 # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
653 # and inline implementations
654 v
= (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
655 if sys
.maxunicode
>=100000:
656 v
+= (100000, 500000, 1000000)
657 s
= u
"".join([unichr(x
) for x
in v
])
658 codecs
.register_error("test.xmlcharrefreplace", codecs
.xmlcharrefreplace_errors
)
659 for enc
in ("ascii", "iso-8859-15"):
660 for err
in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
663 def test_decodehelper(self
):
664 # enhance coverage of:
665 # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
667 self
.assertRaises(LookupError, "\xff".decode
, "ascii", "test.unknown")
669 def baddecodereturn1(exc
):
671 codecs
.register_error("test.baddecodereturn1", baddecodereturn1
)
672 self
.assertRaises(TypeError, "\xff".decode
, "ascii", "test.baddecodereturn1")
673 self
.assertRaises(TypeError, "\\".decode
, "unicode-escape", "test.baddecodereturn1")
674 self
.assertRaises(TypeError, "\\x0".decode
, "unicode-escape", "test.baddecodereturn1")
675 self
.assertRaises(TypeError, "\\x0y".decode
, "unicode-escape", "test.baddecodereturn1")
676 self
.assertRaises(TypeError, "\\Uffffeeee".decode
, "unicode-escape", "test.baddecodereturn1")
677 self
.assertRaises(TypeError, "\\uyyyy".decode
, "raw-unicode-escape", "test.baddecodereturn1")
679 def baddecodereturn2(exc
):
681 codecs
.register_error("test.baddecodereturn2", baddecodereturn2
)
682 self
.assertRaises(TypeError, "\xff".decode
, "ascii", "test.baddecodereturn2")
684 handler
= PosReturn()
685 codecs
.register_error("test.posreturn", handler
.handle
)
687 # Valid negative position
689 self
.assertEquals("\xff0".decode("ascii", "test.posreturn"), u
"<?>0")
691 # Valid negative position
693 self
.assertEquals("\xff0".decode("ascii", "test.posreturn"), u
"<?><?>")
695 # Negative position out of bounds
697 self
.assertRaises(IndexError, "\xff0".decode
, "ascii", "test.posreturn")
699 # Valid positive position
701 self
.assertEquals("\xff0".decode("ascii", "test.posreturn"), u
"<?>0")
703 # Largest valid positive position (one beyond end of input)
705 self
.assertEquals("\xff0".decode("ascii", "test.posreturn"), u
"<?>")
707 # Invalid positive position
709 self
.assertRaises(IndexError, "\xff0".decode
, "ascii", "test.posreturn")
713 self
.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u
"<?>0")
716 def __getitem__(self
, key
):
718 self
.assertRaises(UnicodeError, codecs
.charmap_decode
, "\xff", "strict", {0xff: None})
719 self
.assertRaises(ValueError, codecs
.charmap_decode
, "\xff", "strict", D())
720 self
.assertRaises(TypeError, codecs
.charmap_decode
, "\xff", "strict", {0xff: sys
.maxunicode
+1})
722 def test_encodehelper(self
):
723 # enhance coverage of:
724 # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
726 self
.assertRaises(LookupError, u
"\xff".encode
, "ascii", "test.unknown")
728 def badencodereturn1(exc
):
730 codecs
.register_error("test.badencodereturn1", badencodereturn1
)
731 self
.assertRaises(TypeError, u
"\xff".encode
, "ascii", "test.badencodereturn1")
733 def badencodereturn2(exc
):
735 codecs
.register_error("test.badencodereturn2", badencodereturn2
)
736 self
.assertRaises(TypeError, u
"\xff".encode
, "ascii", "test.badencodereturn2")
738 handler
= PosReturn()
739 codecs
.register_error("test.posreturn", handler
.handle
)
741 # Valid negative position
743 self
.assertEquals(u
"\xff0".encode("ascii", "test.posreturn"), "<?>0")
745 # Valid negative position
747 self
.assertEquals(u
"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
749 # Negative position out of bounds
751 self
.assertRaises(IndexError, u
"\xff0".encode
, "ascii", "test.posreturn")
753 # Valid positive position
755 self
.assertEquals(u
"\xff0".encode("ascii", "test.posreturn"), "<?>0")
757 # Largest valid positive position (one beyond end of input
759 self
.assertEquals(u
"\xff0".encode("ascii", "test.posreturn"), "<?>")
761 # Invalid positive position
763 self
.assertRaises(IndexError, u
"\xff0".encode
, "ascii", "test.posreturn")
768 def __getitem__(self
, key
):
770 for err
in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
771 self
.assertRaises(UnicodeError, codecs
.charmap_encode
, u
"\xff", err
, {0xff: None})
772 self
.assertRaises(ValueError, codecs
.charmap_encode
, u
"\xff", err
, D())
773 self
.assertRaises(TypeError, codecs
.charmap_encode
, u
"\xff", err
, {0xff: 300})
775 def test_translatehelper(self
):
776 # enhance coverage of:
777 # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
779 # (Unfortunately the errors argument is not directly accessible
780 # from Python, so we can't test that much)
782 def __getitem__(self
, key
):
784 self
.assertRaises(ValueError, u
"\xff".translate
, D())
785 self
.assertRaises(TypeError, u
"\xff".translate
, {0xff: sys
.maxunicode
+1})
786 self
.assertRaises(TypeError, u
"\xff".translate
, {0xff: ()})
788 def test_bug828737(self
):
796 for n
in (1, 10, 100, 1000):
797 text
= u
'abc<def>ghi'*n
798 text
.translate(charmap
)
801 test
.test_support
.run_unittest(CodecCallbackTest
)
803 if __name__
== "__main__":