1 import test
.test_support
, unittest
2 import sys
, codecs
, htmlentitydefs
, unicodedata
5 # this can be used for configurable callbacks
10 def handle(self
, exc
):
14 realpos
= len(exc
.object) + realpos
15 # if we don't advance this time, terminate on the next call
16 # otherwise we'd get an endless loop
17 if realpos
<= exc
.start
:
18 self
.pos
= len(exc
.object)
19 return (u
"<?>", oldpos
)
21 # A UnicodeEncodeError object with a bad start attribute
22 class BadStartUnicodeEncodeError(UnicodeEncodeError):
24 UnicodeEncodeError.__init
__(self
, "ascii", u
"", 0, 1, "bad")
27 # A UnicodeEncodeError object with a bad object attribute
28 class BadObjectUnicodeEncodeError(UnicodeEncodeError):
30 UnicodeEncodeError.__init
__(self
, "ascii", u
"", 0, 1, "bad")
33 # A UnicodeDecodeError object without an end attribute
34 class NoEndUnicodeDecodeError(UnicodeDecodeError):
36 UnicodeDecodeError.__init
__(self
, "ascii", "", 0, 1, "bad")
39 # A UnicodeDecodeError object with a bad object attribute
40 class BadObjectUnicodeDecodeError(UnicodeDecodeError):
42 UnicodeDecodeError.__init
__(self
, "ascii", "", 0, 1, "bad")
45 # A UnicodeTranslateError object without a start attribute
46 class NoStartUnicodeTranslateError(UnicodeTranslateError):
48 UnicodeTranslateError.__init
__(self
, u
"", 0, 1, "bad")
51 # A UnicodeTranslateError object without an end attribute
52 class NoEndUnicodeTranslateError(UnicodeTranslateError):
54 UnicodeTranslateError.__init
__(self
, u
"", 0, 1, "bad")
57 # A UnicodeTranslateError object without an object attribute
58 class NoObjectUnicodeTranslateError(UnicodeTranslateError):
60 UnicodeTranslateError.__init
__(self
, u
"", 0, 1, "bad")
63 class CodecCallbackTest(unittest
.TestCase
):
65 def test_xmlcharrefreplace(self
):
66 # replace unencodable characters which numeric character entities.
67 # For ascii, latin-1 and charmaps this is completely implemented
68 # in C and should be reasonably fast.
69 s
= u
"\u30b9\u30d1\u30e2 \xe4nd eggs"
71 s
.encode("ascii", "xmlcharrefreplace"),
72 "スパモ änd eggs"
75 s
.encode("latin-1", "xmlcharrefreplace"),
76 "スパモ \xe4nd eggs"
79 def test_xmlcharnamereplace(self
):
80 # This time use a named character entity for unencodable
81 # characters, if one is available.
83 def xmlcharnamereplace(exc
):
84 if not isinstance(exc
, UnicodeEncodeError):
85 raise TypeError("don't know how to handle %r" % exc
)
87 for c
in exc
.object[exc
.start
:exc
.end
]:
89 l
.append(u
"&%s;" % htmlentitydefs
.codepoint2name
[ord(c
)])
91 l
.append(u
"&#%d;" % ord(c
))
92 return (u
"".join(l
), exc
.end
)
94 codecs
.register_error(
95 "test.xmlcharnamereplace", xmlcharnamereplace
)
97 sin
= u
"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
98 sout
= "«ℜ» = ⟨ሴ€⟩"
99 self
.assertEqual(sin
.encode("ascii", "test.xmlcharnamereplace"), sout
)
100 sout
= "\xabℜ\xbb = ⟨ሴ€⟩"
101 self
.assertEqual(sin
.encode("latin-1", "test.xmlcharnamereplace"), sout
)
102 sout
= "\xabℜ\xbb = ⟨ሴ\xa4⟩"
103 self
.assertEqual(sin
.encode("iso-8859-15", "test.xmlcharnamereplace"), sout
)
105 def test_uninamereplace(self
):
106 # We're using the names from the unicode database this time,
107 # and we're doing "syntax highlighting" here, i.e. we include
108 # the replaced text in ANSI escape sequences. For this it is
109 # useful that the error handler is not called for every single
110 # unencodable character, but for a complete sequence of
111 # unencodable characters, otherwise we would output many
112 # unnecessary escape sequences.
114 def uninamereplace(exc
):
115 if not isinstance(exc
, UnicodeEncodeError):
116 raise TypeError("don't know how to handle %r" % exc
)
118 for c
in exc
.object[exc
.start
:exc
.end
]:
119 l
.append(unicodedata
.name(c
, u
"0x%x" % ord(c
)))
120 return (u
"\033[1m%s\033[0m" % u
", ".join(l
), exc
.end
)
122 codecs
.register_error(
123 "test.uninamereplace", uninamereplace
)
125 sin
= u
"\xac\u1234\u20ac\u8000"
126 sout
= "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
127 self
.assertEqual(sin
.encode("ascii", "test.uninamereplace"), sout
)
129 sout
= "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
130 self
.assertEqual(sin
.encode("latin-1", "test.uninamereplace"), sout
)
132 sout
= "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
133 self
.assertEqual(sin
.encode("iso-8859-15", "test.uninamereplace"), sout
)
135 def test_backslashescape(self
):
136 # Does the same as the "unicode-escape" encoding, but with different
138 sin
= u
"a\xac\u1234\u20ac\u8000"
139 if sys
.maxunicode
> 0xffff:
140 sin
+= unichr(sys
.maxunicode
)
141 sout
= "a\\xac\\u1234\\u20ac\\u8000"
142 if sys
.maxunicode
> 0xffff:
143 sout
+= "\\U%08x" % sys
.maxunicode
144 self
.assertEqual(sin
.encode("ascii", "backslashreplace"), sout
)
146 sout
= "a\xac\\u1234\\u20ac\\u8000"
147 if sys
.maxunicode
> 0xffff:
148 sout
+= "\\U%08x" % sys
.maxunicode
149 self
.assertEqual(sin
.encode("latin-1", "backslashreplace"), sout
)
151 sout
= "a\xac\\u1234\xa4\\u8000"
152 if sys
.maxunicode
> 0xffff:
153 sout
+= "\\U%08x" % sys
.maxunicode
154 self
.assertEqual(sin
.encode("iso-8859-15", "backslashreplace"), sout
)
156 def test_decoderelaxedutf8(self
):
157 # This is the test for a decoding callback handler,
158 # that relaxes the UTF-8 minimal encoding restriction.
159 # A null byte that is encoded as "\xc0\x80" will be
160 # decoded as a null byte. All other illegal sequences
161 # will be handled strictly.
162 def relaxedutf8(exc
):
163 if not isinstance(exc
, UnicodeDecodeError):
164 raise TypeError("don't know how to handle %r" % exc
)
165 if exc
.object[exc
.start
:exc
.end
].startswith("\xc0\x80"):
166 return (u
"\x00", exc
.start
+2) # retry after two bytes
170 codecs
.register_error(
171 "test.relaxedutf8", relaxedutf8
)
173 sin
= "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
174 sout
= u
"a\x00b\x00c\xfc\x00\x00"
175 self
.assertEqual(sin
.decode("utf-8", "test.relaxedutf8"), sout
)
176 sin
= "\xc0\x80\xc0\x81"
177 self
.assertRaises(UnicodeError, sin
.decode
, "utf-8", "test.relaxedutf8")
179 def test_charmapencode(self
):
180 # For charmap encodings the replacement string will be
181 # mapped through the encoding again. This means, that
182 # to be able to use e.g. the "replace" handler, the
183 # charmap has to have a mapping for "?".
184 charmap
= dict([ (ord(c
), 2*c
.upper()) for c
in "abcdefgh"])
187 self
.assertEquals(codecs
.charmap_encode(sin
, "strict", charmap
)[0], sout
)
190 self
.assertRaises(UnicodeError, codecs
.charmap_encode
, sin
, "strict", charmap
)
192 charmap
[ord("?")] = "XYZ"
194 sout
= "AABBCCXYZXYZXYZ"
195 self
.assertEquals(codecs
.charmap_encode(sin
, "replace", charmap
)[0], sout
)
197 charmap
[ord("?")] = u
"XYZ"
198 self
.assertRaises(TypeError, codecs
.charmap_encode
, sin
, "replace", charmap
)
200 charmap
[ord("?")] = u
"XYZ"
201 self
.assertRaises(TypeError, codecs
.charmap_encode
, sin
, "replace", charmap
)
203 def test_decodeunicodeinternal(self
):
206 "\x00\x00\x00\x00\x00".decode
,
209 if sys
.maxunicode
> 0xffff:
210 def handler_unicodeinternal(exc
):
211 if not isinstance(exc
, UnicodeDecodeError):
212 raise TypeError("don't know how to handle %r" % exc
)
216 "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
221 "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
225 codecs
.register_error("test.hui", handler_unicodeinternal
)
228 "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
229 u
"\u0000\u0001\u0000"
232 def test_callbacks(self
):
234 if not isinstance(exc
, UnicodeEncodeError) \
235 and not isinstance(exc
, UnicodeDecodeError):
236 raise TypeError("don't know how to handle %r" % exc
)
237 l
= [u
"<%d>" % ord(exc
.object[pos
]) for pos
in xrange(exc
.start
, exc
.end
)]
238 return (u
"[%s]" % u
"".join(l
), exc
.end
)
240 codecs
.register_error("test.handler1", handler1
)
243 if not isinstance(exc
, UnicodeDecodeError):
244 raise TypeError("don't know how to handle %r" % exc
)
245 l
= [u
"<%d>" % ord(exc
.object[pos
]) for pos
in xrange(exc
.start
, exc
.end
)]
246 return (u
"[%s]" % u
"".join(l
), exc
.end
+1) # skip one character
248 codecs
.register_error("test.handler2", handler2
)
250 s
= "\x00\x81\x7f\x80\xff"
253 s
.decode("ascii", "test.handler1"),
254 u
"\x00[<129>]\x7f[<128>][<255>]"
257 s
.decode("ascii", "test.handler2"),
258 u
"\x00[<129>][<128>]"
262 "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
263 u
"\u3042[<92><117><51><120>]xx"
267 "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
268 u
"\u3042[<92><117><51><120><120>]"
272 codecs
.charmap_decode("abc", "test.handler1", {ord("a"): u
"z"})[0],
277 u
"g\xfc\xdfrk".encode("ascii", "test.handler1"),
282 u
"g\xfc\xdf".encode("ascii", "test.handler1"),
286 def test_longstrings(self
):
287 # test long strings to check for memory overflow problems
288 errors
= [ "strict", "ignore", "replace", "xmlcharrefreplace",
290 # register the handlers under different names,
291 # to prevent the codec from recognizing the name
293 codecs
.register_error("test." + err
, codecs
.lookup_error(err
))
295 errors
+= [ "test." + err
for err
in errors
]
296 for uni
in [ s
*l
for s
in (u
"x", u
"\u3042", u
"a\xe4") ]:
297 for enc
in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
298 "utf-8", "utf-7", "utf-16", "utf-32"):
305 def check_exceptionobjectargs(self
, exctype
, args
, msg
):
306 # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
307 # check with one missing argument
308 self
.assertRaises(TypeError, exctype
, *args
[:-1])
309 # check with one argument too much
310 self
.assertRaises(TypeError, exctype
, *(args
+ ["too much"]))
311 # check with one argument of the wrong type
312 wrongargs
= [ "spam", u
"eggs", 42, 1.0, None ]
313 for i
in xrange(len(args
)):
314 for wrongarg
in wrongargs
:
315 if type(wrongarg
) is type(args
[i
]):
317 # build argument array
319 for j
in xrange(len(args
)):
321 callargs
.append(wrongarg
)
323 callargs
.append(args
[i
])
324 self
.assertRaises(TypeError, exctype
, *callargs
)
326 # check with the correct number and type of arguments
328 self
.assertEquals(str(exc
), msg
)
330 def test_unicodeencodeerror(self
):
331 self
.check_exceptionobjectargs(
333 ["ascii", u
"g\xfcrk", 1, 2, "ouch"],
334 "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
336 self
.check_exceptionobjectargs(
338 ["ascii", u
"g\xfcrk", 1, 4, "ouch"],
339 "'ascii' codec can't encode characters in position 1-3: ouch"
341 self
.check_exceptionobjectargs(
343 ["ascii", u
"\xfcx", 0, 1, "ouch"],
344 "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
346 self
.check_exceptionobjectargs(
348 ["ascii", u
"\u0100x", 0, 1, "ouch"],
349 "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
351 self
.check_exceptionobjectargs(
353 ["ascii", u
"\uffffx", 0, 1, "ouch"],
354 "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
356 if sys
.maxunicode
> 0xffff:
357 self
.check_exceptionobjectargs(
359 ["ascii", u
"\U00010000x", 0, 1, "ouch"],
360 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
363 def test_unicodedecodeerror(self
):
364 self
.check_exceptionobjectargs(
366 ["ascii", "g\xfcrk", 1, 2, "ouch"],
367 "'ascii' codec can't decode byte 0xfc in position 1: ouch"
369 self
.check_exceptionobjectargs(
371 ["ascii", "g\xfcrk", 1, 3, "ouch"],
372 "'ascii' codec can't decode bytes in position 1-2: ouch"
375 def test_unicodetranslateerror(self
):
376 self
.check_exceptionobjectargs(
377 UnicodeTranslateError,
378 [u
"g\xfcrk", 1, 2, "ouch"],
379 "can't translate character u'\\xfc' in position 1: ouch"
381 self
.check_exceptionobjectargs(
382 UnicodeTranslateError,
383 [u
"g\u0100rk", 1, 2, "ouch"],
384 "can't translate character u'\\u0100' in position 1: ouch"
386 self
.check_exceptionobjectargs(
387 UnicodeTranslateError,
388 [u
"g\uffffrk", 1, 2, "ouch"],
389 "can't translate character u'\\uffff' in position 1: ouch"
391 if sys
.maxunicode
> 0xffff:
392 self
.check_exceptionobjectargs(
393 UnicodeTranslateError,
394 [u
"g\U00010000rk", 1, 2, "ouch"],
395 "can't translate character u'\\U00010000' in position 1: ouch"
397 self
.check_exceptionobjectargs(
398 UnicodeTranslateError,
399 [u
"g\xfcrk", 1, 3, "ouch"],
400 "can't translate characters in position 1-2: ouch"
403 def test_badandgoodstrictexceptions(self
):
404 # "strict" complains about a non-exception passed in
407 codecs
.strict_errors
,
410 # "strict" complains about the wrong exception type
413 codecs
.strict_errors
,
417 # If the correct exception is passed in, "strict" raises it
420 codecs
.strict_errors
,
421 UnicodeEncodeError("ascii", u
"\u3042", 0, 1, "ouch")
424 def test_badandgoodignoreexceptions(self
):
425 # "ignore" complains about a non-exception passed in
428 codecs
.ignore_errors
,
431 # "ignore" complains about the wrong exception type
434 codecs
.ignore_errors
,
437 # If the correct exception is passed in, "ignore" returns an empty replacement
439 codecs
.ignore_errors(UnicodeEncodeError("ascii", u
"\u3042", 0, 1, "ouch")),
443 codecs
.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
447 codecs
.ignore_errors(UnicodeTranslateError(u
"\u3042", 0, 1, "ouch")),
451 def test_badandgoodreplaceexceptions(self
):
452 # "replace" complains about a non-exception passed in
455 codecs
.replace_errors
,
458 # "replace" complains about the wrong exception type
461 codecs
.replace_errors
,
466 codecs
.replace_errors
,
467 BadObjectUnicodeEncodeError()
471 codecs
.replace_errors
,
472 BadObjectUnicodeDecodeError()
474 # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
476 codecs
.replace_errors(UnicodeEncodeError("ascii", u
"\u3042", 0, 1, "ouch")),
480 codecs
.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
484 codecs
.replace_errors(UnicodeTranslateError(u
"\u3042", 0, 1, "ouch")),
488 def test_badandgoodxmlcharrefreplaceexceptions(self
):
489 # "xmlcharrefreplace" complains about a non-exception passed in
492 codecs
.xmlcharrefreplace_errors
,
495 # "xmlcharrefreplace" complains about the wrong exception types
498 codecs
.xmlcharrefreplace_errors
,
501 # "xmlcharrefreplace" can only be used for encoding
504 codecs
.xmlcharrefreplace_errors
,
505 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
509 codecs
.xmlcharrefreplace_errors
,
510 UnicodeTranslateError(u
"\u3042", 0, 1, "ouch")
512 # Use the correct exception
513 cs
= (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
514 s
= "".join(unichr(c
) for c
in cs
)
516 codecs
.xmlcharrefreplace_errors(
517 UnicodeEncodeError("ascii", s
, 0, len(s
), "ouch")
519 (u
"".join(u
"&#%d;" % ord(c
) for c
in s
), len(s
))
522 def test_badandgoodbackslashreplaceexceptions(self
):
523 # "backslashreplace" complains about a non-exception passed in
526 codecs
.backslashreplace_errors
,
529 # "backslashreplace" complains about the wrong exception types
532 codecs
.backslashreplace_errors
,
535 # "backslashreplace" can only be used for encoding
538 codecs
.backslashreplace_errors
,
539 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
543 codecs
.backslashreplace_errors
,
544 UnicodeTranslateError(u
"\u3042", 0, 1, "ouch")
546 # Use the correct exception
548 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\u3042", 0, 1, "ouch")),
552 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\x00", 0, 1, "ouch")),
556 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\xff", 0, 1, "ouch")),
560 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\u0100", 0, 1, "ouch")),
564 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\uffff", 0, 1, "ouch")),
567 if sys
.maxunicode
>0xffff:
569 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\U00010000", 0, 1, "ouch")),
573 codecs
.backslashreplace_errors(UnicodeEncodeError("ascii", u
"\U0010ffff", 0, 1, "ouch")),
577 def test_badhandlerresults(self
):
578 results
= ( 42, u
"foo", (1,2,3), (u
"foo", 1, 3), (u
"foo", None), (u
"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
579 encs
= ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
582 codecs
.register_error("test.badhandler", lambda x
: res
)
590 for (enc
, bytes
) in (
594 ("unicode-internal", "\x00"),
603 def test_lookup(self
):
604 self
.assertEquals(codecs
.strict_errors
, codecs
.lookup_error("strict"))
605 self
.assertEquals(codecs
.ignore_errors
, codecs
.lookup_error("ignore"))
606 self
.assertEquals(codecs
.strict_errors
, codecs
.lookup_error("strict"))
608 codecs
.xmlcharrefreplace_errors
,
609 codecs
.lookup_error("xmlcharrefreplace")
612 codecs
.backslashreplace_errors
,
613 codecs
.lookup_error("backslashreplace")
616 def test_unencodablereplacement(self
):
618 if isinstance(exc
, UnicodeEncodeError):
619 return (u
"\u4242", exc
.end
)
621 raise TypeError("don't know how to handle %r" % exc
)
622 codecs
.register_error("test.unencreplhandler", unencrepl
)
623 for enc
in ("ascii", "iso-8859-1", "iso-8859-15"):
628 "test.unencreplhandler"
631 def test_badregistercall(self
):
632 # enhance coverage of:
633 # Modules/_codecsmodule.c::register_error()
634 # Python/codecs.c::PyCodec_RegisterError()
635 self
.assertRaises(TypeError, codecs
.register_error
, 42)
636 self
.assertRaises(TypeError, codecs
.register_error
, "test.dummy", 42)
638 def test_badlookupcall(self
):
639 # enhance coverage of:
640 # Modules/_codecsmodule.c::lookup_error()
641 self
.assertRaises(TypeError, codecs
.lookup_error
)
643 def test_unknownhandler(self
):
644 # enhance coverage of:
645 # Modules/_codecsmodule.c::lookup_error()
646 self
.assertRaises(LookupError, codecs
.lookup_error
, "test.unknown")
648 def test_xmlcharrefvalues(self
):
649 # enhance coverage of:
650 # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
651 # and inline implementations
652 v
= (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
653 if sys
.maxunicode
>=100000:
654 v
+= (100000, 500000, 1000000)
655 s
= u
"".join([unichr(x
) for x
in v
])
656 codecs
.register_error("test.xmlcharrefreplace", codecs
.xmlcharrefreplace_errors
)
657 for enc
in ("ascii", "iso-8859-15"):
658 for err
in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
661 def test_decodehelper(self
):
662 # enhance coverage of:
663 # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
665 self
.assertRaises(LookupError, "\xff".decode
, "ascii", "test.unknown")
667 def baddecodereturn1(exc
):
669 codecs
.register_error("test.baddecodereturn1", baddecodereturn1
)
670 self
.assertRaises(TypeError, "\xff".decode
, "ascii", "test.baddecodereturn1")
671 self
.assertRaises(TypeError, "\\".decode
, "unicode-escape", "test.baddecodereturn1")
672 self
.assertRaises(TypeError, "\\x0".decode
, "unicode-escape", "test.baddecodereturn1")
673 self
.assertRaises(TypeError, "\\x0y".decode
, "unicode-escape", "test.baddecodereturn1")
674 self
.assertRaises(TypeError, "\\Uffffeeee".decode
, "unicode-escape", "test.baddecodereturn1")
675 self
.assertRaises(TypeError, "\\uyyyy".decode
, "raw-unicode-escape", "test.baddecodereturn1")
677 def baddecodereturn2(exc
):
679 codecs
.register_error("test.baddecodereturn2", baddecodereturn2
)
680 self
.assertRaises(TypeError, "\xff".decode
, "ascii", "test.baddecodereturn2")
682 handler
= PosReturn()
683 codecs
.register_error("test.posreturn", handler
.handle
)
685 # Valid negative position
687 self
.assertEquals("\xff0".decode("ascii", "test.posreturn"), u
"<?>0")
689 # Valid negative position
691 self
.assertEquals("\xff0".decode("ascii", "test.posreturn"), u
"<?><?>")
693 # Negative position out of bounds
695 self
.assertRaises(IndexError, "\xff0".decode
, "ascii", "test.posreturn")
697 # Valid positive position
699 self
.assertEquals("\xff0".decode("ascii", "test.posreturn"), u
"<?>0")
701 # Largest valid positive position (one beyond end of input)
703 self
.assertEquals("\xff0".decode("ascii", "test.posreturn"), u
"<?>")
705 # Invalid positive position
707 self
.assertRaises(IndexError, "\xff0".decode
, "ascii", "test.posreturn")
711 self
.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u
"<?>0")
714 def __getitem__(self
, key
):
716 self
.assertRaises(UnicodeError, codecs
.charmap_decode
, "\xff", "strict", {0xff: None})
717 self
.assertRaises(ValueError, codecs
.charmap_decode
, "\xff", "strict", D())
718 self
.assertRaises(TypeError, codecs
.charmap_decode
, "\xff", "strict", {0xff: sys
.maxunicode
+1})
720 def test_encodehelper(self
):
721 # enhance coverage of:
722 # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
724 self
.assertRaises(LookupError, u
"\xff".encode
, "ascii", "test.unknown")
726 def badencodereturn1(exc
):
728 codecs
.register_error("test.badencodereturn1", badencodereturn1
)
729 self
.assertRaises(TypeError, u
"\xff".encode
, "ascii", "test.badencodereturn1")
731 def badencodereturn2(exc
):
733 codecs
.register_error("test.badencodereturn2", badencodereturn2
)
734 self
.assertRaises(TypeError, u
"\xff".encode
, "ascii", "test.badencodereturn2")
736 handler
= PosReturn()
737 codecs
.register_error("test.posreturn", handler
.handle
)
739 # Valid negative position
741 self
.assertEquals(u
"\xff0".encode("ascii", "test.posreturn"), "<?>0")
743 # Valid negative position
745 self
.assertEquals(u
"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
747 # Negative position out of bounds
749 self
.assertRaises(IndexError, u
"\xff0".encode
, "ascii", "test.posreturn")
751 # Valid positive position
753 self
.assertEquals(u
"\xff0".encode("ascii", "test.posreturn"), "<?>0")
755 # Largest valid positive position (one beyond end of input
757 self
.assertEquals(u
"\xff0".encode("ascii", "test.posreturn"), "<?>")
759 # Invalid positive position
761 self
.assertRaises(IndexError, u
"\xff0".encode
, "ascii", "test.posreturn")
766 def __getitem__(self
, key
):
768 for err
in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
769 self
.assertRaises(UnicodeError, codecs
.charmap_encode
, u
"\xff", err
, {0xff: None})
770 self
.assertRaises(ValueError, codecs
.charmap_encode
, u
"\xff", err
, D())
771 self
.assertRaises(TypeError, codecs
.charmap_encode
, u
"\xff", err
, {0xff: 300})
773 def test_translatehelper(self
):
774 # enhance coverage of:
775 # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
777 # (Unfortunately the errors argument is not directly accessible
778 # from Python, so we can't test that much)
780 def __getitem__(self
, key
):
782 self
.assertRaises(ValueError, u
"\xff".translate
, D())
783 self
.assertRaises(TypeError, u
"\xff".translate
, {0xff: sys
.maxunicode
+1})
784 self
.assertRaises(TypeError, u
"\xff".translate
, {0xff: ()})
786 def test_bug828737(self
):
794 for n
in (1, 10, 100, 1000):
795 text
= u
'abc<def>ghi'*n
796 text
.translate(charmap
)
799 test
.test_support
.run_unittest(CodecCallbackTest
)
801 if __name__
== "__main__":