Change a variable type to avoid signed overflow; replace repeated '19999' constant...
[python.git] / Lib / test / test_unicode.py
blobd67a2e13e8f05129f5d6eb85f6c4acc45e82ba85
1 # -*- coding: iso-8859-1 -*-
2 """ Test script for the Unicode implementation.
4 Written by Marc-Andre Lemburg (mal@lemburg.com).
6 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8 """#"
9 import sys, struct, codecs
10 from test import test_support, string_tests
12 # Error handling (bad decoder return)
13 def search_function(encoding):
14 def decode1(input, errors="strict"):
15 return 42 # not a tuple
16 def encode1(input, errors="strict"):
17 return 42 # not a tuple
18 def encode2(input, errors="strict"):
19 return (42, 42) # no unicode
20 def decode2(input, errors="strict"):
21 return (42, 42) # no unicode
22 if encoding=="test.unicode1":
23 return (encode1, decode1, None, None)
24 elif encoding=="test.unicode2":
25 return (encode2, decode2, None, None)
26 else:
27 return None
28 codecs.register(search_function)
30 class UnicodeTest(
31 string_tests.CommonTest,
32 string_tests.MixinStrUnicodeUserStringTest,
33 string_tests.MixinStrUnicodeTest,
35 type2test = unicode
37 def checkequalnofix(self, result, object, methodname, *args):
38 method = getattr(object, methodname)
39 realresult = method(*args)
40 self.assertEqual(realresult, result)
41 self.assertTrue(type(realresult) is type(result))
43 # if the original is returned make sure that
44 # this doesn't happen with subclasses
45 if realresult is object:
46 class usub(unicode):
47 def __repr__(self):
48 return 'usub(%r)' % unicode.__repr__(self)
49 object = usub(object)
50 method = getattr(object, methodname)
51 realresult = method(*args)
52 self.assertEqual(realresult, result)
53 self.assertTrue(object is not realresult)
55 def test_literals(self):
56 self.assertEqual(u'\xff', u'\u00ff')
57 self.assertEqual(u'\uffff', u'\U0000ffff')
58 self.assertRaises(SyntaxError, eval, 'u\'\\Ufffffffe\'')
59 self.assertRaises(SyntaxError, eval, 'u\'\\Uffffffff\'')
60 self.assertRaises(SyntaxError, eval, 'u\'\\U%08x\'' % 0x110000)
62 def test_repr(self):
63 if not sys.platform.startswith('java'):
64 # Test basic sanity of repr()
65 self.assertEqual(repr(u'abc'), "u'abc'")
66 self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
67 self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
68 self.assertEqual(repr(u'\\c'), "u'\\\\c'")
69 self.assertEqual(repr(u'\\'), "u'\\\\'")
70 self.assertEqual(repr(u'\n'), "u'\\n'")
71 self.assertEqual(repr(u'\r'), "u'\\r'")
72 self.assertEqual(repr(u'\t'), "u'\\t'")
73 self.assertEqual(repr(u'\b'), "u'\\x08'")
74 self.assertEqual(repr(u"'\""), """u'\\'"'""")
75 self.assertEqual(repr(u"'\""), """u'\\'"'""")
76 self.assertEqual(repr(u"'"), '''u"'"''')
77 self.assertEqual(repr(u'"'), """u'"'""")
78 latin1repr = (
79 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
80 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
81 "\\x1b\\x1c\\x1d\\x1e\\x1f !\"#$%&\\'()*+,-./0123456789:;<=>?@ABCDEFGHI"
82 "JKLMNOPQRSTUVWXYZ[\\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f"
83 "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8a\\x8b\\x8c\\x8d"
84 "\\x8e\\x8f\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9a\\x9b"
85 "\\x9c\\x9d\\x9e\\x9f\\xa0\\xa1\\xa2\\xa3\\xa4\\xa5\\xa6\\xa7\\xa8\\xa9"
86 "\\xaa\\xab\\xac\\xad\\xae\\xaf\\xb0\\xb1\\xb2\\xb3\\xb4\\xb5\\xb6\\xb7"
87 "\\xb8\\xb9\\xba\\xbb\\xbc\\xbd\\xbe\\xbf\\xc0\\xc1\\xc2\\xc3\\xc4\\xc5"
88 "\\xc6\\xc7\\xc8\\xc9\\xca\\xcb\\xcc\\xcd\\xce\\xcf\\xd0\\xd1\\xd2\\xd3"
89 "\\xd4\\xd5\\xd6\\xd7\\xd8\\xd9\\xda\\xdb\\xdc\\xdd\\xde\\xdf\\xe0\\xe1"
90 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
91 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
92 "\\xfe\\xff'")
93 testrepr = repr(u''.join(map(unichr, xrange(256))))
94 self.assertEqual(testrepr, latin1repr)
95 # Test repr works on wide unicode escapes without overflow.
96 self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096),
97 repr(u"\U00010000" * 39 + u"\uffff" * 4096))
100 def test_count(self):
101 string_tests.CommonTest.test_count(self)
102 # check mixed argument types
103 self.checkequalnofix(3, 'aaa', 'count', u'a')
104 self.checkequalnofix(0, 'aaa', 'count', u'b')
105 self.checkequalnofix(3, u'aaa', 'count', 'a')
106 self.checkequalnofix(0, u'aaa', 'count', 'b')
107 self.checkequalnofix(0, u'aaa', 'count', 'b')
108 self.checkequalnofix(1, u'aaa', 'count', 'a', -1)
109 self.checkequalnofix(3, u'aaa', 'count', 'a', -10)
110 self.checkequalnofix(2, u'aaa', 'count', 'a', 0, -1)
111 self.checkequalnofix(0, u'aaa', 'count', 'a', 0, -10)
113 def test_find(self):
114 self.checkequalnofix(0, u'abcdefghiabc', 'find', u'abc')
115 self.checkequalnofix(9, u'abcdefghiabc', 'find', u'abc', 1)
116 self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
118 self.assertRaises(TypeError, u'hello'.find)
119 self.assertRaises(TypeError, u'hello'.find, 42)
121 def test_rfind(self):
122 string_tests.CommonTest.test_rfind(self)
123 # check mixed argument types
124 self.checkequalnofix(9, 'abcdefghiabc', 'rfind', u'abc')
125 self.checkequalnofix(12, 'abcdefghiabc', 'rfind', u'')
126 self.checkequalnofix(12, u'abcdefghiabc', 'rfind', '')
128 def test_index(self):
129 string_tests.CommonTest.test_index(self)
130 # check mixed argument types
131 for (t1, t2) in ((str, unicode), (unicode, str)):
132 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2(''))
133 self.checkequalnofix(3, t1('abcdefghiabc'), 'index', t2('def'))
134 self.checkequalnofix(0, t1('abcdefghiabc'), 'index', t2('abc'))
135 self.checkequalnofix(9, t1('abcdefghiabc'), 'index', t2('abc'), 1)
136 self.assertRaises(ValueError, t1('abcdefghiabc').index, t2('hib'))
137 self.assertRaises(ValueError, t1('abcdefghiab').index, t2('abc'), 1)
138 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), 8)
139 self.assertRaises(ValueError, t1('abcdefghi').index, t2('ghi'), -1)
141 def test_rindex(self):
142 string_tests.CommonTest.test_rindex(self)
143 # check mixed argument types
144 for (t1, t2) in ((str, unicode), (unicode, str)):
145 self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex', t2(''))
146 self.checkequalnofix(3, t1('abcdefghiabc'), 'rindex', t2('def'))
147 self.checkequalnofix(9, t1('abcdefghiabc'), 'rindex', t2('abc'))
148 self.checkequalnofix(0, t1('abcdefghiabc'), 'rindex', t2('abc'), 0, -1)
150 self.assertRaises(ValueError, t1('abcdefghiabc').rindex, t2('hib'))
151 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('def'), 1)
152 self.assertRaises(ValueError, t1('defghiabc').rindex, t2('abc'), 0, -1)
153 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, 8)
154 self.assertRaises(ValueError, t1('abcdefghi').rindex, t2('ghi'), 0, -1)
156 def test_translate(self):
157 self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
158 self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
159 self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
160 self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
161 self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
162 self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
164 self.assertRaises(TypeError, u'hello'.translate)
165 self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
167 def test_split(self):
168 string_tests.CommonTest.test_split(self)
170 # Mixed arguments
171 self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
172 self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
173 self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
175 def test_join(self):
176 string_tests.MixinStrUnicodeUserStringTest.test_join(self)
178 # mixed arguments
179 self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
180 self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
181 self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
182 self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
183 self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
184 self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
185 self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
187 def test_strip(self):
188 string_tests.CommonTest.test_strip(self)
189 self.assertRaises(UnicodeError, u"hello".strip, "\xff")
191 def test_replace(self):
192 string_tests.CommonTest.test_replace(self)
194 # method call forwarded from str implementation because of unicode argument
195 self.checkequalnofix(u'one@two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
196 self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
198 def test_comparison(self):
199 # Comparisons:
200 self.assertEqual(u'abc', 'abc')
201 self.assertEqual('abc', u'abc')
202 self.assertEqual(u'abc', u'abc')
203 self.assertTrue(u'abcd' > 'abc')
204 self.assertTrue('abcd' > u'abc')
205 self.assertTrue(u'abcd' > u'abc')
206 self.assertTrue(u'abc' < 'abcd')
207 self.assertTrue('abc' < u'abcd')
208 self.assertTrue(u'abc' < u'abcd')
210 if 0:
211 # Move these tests to a Unicode collation module test...
212 # Testing UTF-16 code point order comparisons...
214 # No surrogates, no fixup required.
215 self.assertTrue(u'\u0061' < u'\u20ac')
216 # Non surrogate below surrogate value, no fixup required
217 self.assertTrue(u'\u0061' < u'\ud800\udc02')
219 # Non surrogate above surrogate value, fixup required
220 def test_lecmp(s, s2):
221 self.assertTrue(s < s2)
223 def test_fixup(s):
224 s2 = u'\ud800\udc01'
225 test_lecmp(s, s2)
226 s2 = u'\ud900\udc01'
227 test_lecmp(s, s2)
228 s2 = u'\uda00\udc01'
229 test_lecmp(s, s2)
230 s2 = u'\udb00\udc01'
231 test_lecmp(s, s2)
232 s2 = u'\ud800\udd01'
233 test_lecmp(s, s2)
234 s2 = u'\ud900\udd01'
235 test_lecmp(s, s2)
236 s2 = u'\uda00\udd01'
237 test_lecmp(s, s2)
238 s2 = u'\udb00\udd01'
239 test_lecmp(s, s2)
240 s2 = u'\ud800\ude01'
241 test_lecmp(s, s2)
242 s2 = u'\ud900\ude01'
243 test_lecmp(s, s2)
244 s2 = u'\uda00\ude01'
245 test_lecmp(s, s2)
246 s2 = u'\udb00\ude01'
247 test_lecmp(s, s2)
248 s2 = u'\ud800\udfff'
249 test_lecmp(s, s2)
250 s2 = u'\ud900\udfff'
251 test_lecmp(s, s2)
252 s2 = u'\uda00\udfff'
253 test_lecmp(s, s2)
254 s2 = u'\udb00\udfff'
255 test_lecmp(s, s2)
257 test_fixup(u'\ue000')
258 test_fixup(u'\uff61')
260 # Surrogates on both sides, no fixup required
261 self.assertTrue(u'\ud800\udc02' < u'\ud84d\udc56')
263 def test_islower(self):
264 string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
265 self.checkequalnofix(False, u'\u1FFc', 'islower')
267 def test_isupper(self):
268 string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
269 if not sys.platform.startswith('java'):
270 self.checkequalnofix(False, u'\u1FFc', 'isupper')
272 def test_istitle(self):
273 string_tests.MixinStrUnicodeUserStringTest.test_title(self)
274 self.checkequalnofix(True, u'\u1FFc', 'istitle')
275 self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
277 def test_isspace(self):
278 string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
279 self.checkequalnofix(True, u'\u2000', 'isspace')
280 self.checkequalnofix(True, u'\u200a', 'isspace')
281 self.checkequalnofix(False, u'\u2014', 'isspace')
283 def test_isalpha(self):
284 string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
285 self.checkequalnofix(True, u'\u1FFc', 'isalpha')
287 def test_isdecimal(self):
288 self.checkequalnofix(False, u'', 'isdecimal')
289 self.checkequalnofix(False, u'a', 'isdecimal')
290 self.checkequalnofix(True, u'0', 'isdecimal')
291 self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
292 self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
293 self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
294 self.checkequalnofix(True, u'0123456789', 'isdecimal')
295 self.checkequalnofix(False, u'0123456789a', 'isdecimal')
297 self.checkraises(TypeError, 'abc', 'isdecimal', 42)
299 def test_isdigit(self):
300 string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
301 self.checkequalnofix(True, u'\u2460', 'isdigit')
302 self.checkequalnofix(False, u'\xbc', 'isdigit')
303 self.checkequalnofix(True, u'\u0660', 'isdigit')
305 def test_isnumeric(self):
306 self.checkequalnofix(False, u'', 'isnumeric')
307 self.checkequalnofix(False, u'a', 'isnumeric')
308 self.checkequalnofix(True, u'0', 'isnumeric')
309 self.checkequalnofix(True, u'\u2460', 'isnumeric')
310 self.checkequalnofix(True, u'\xbc', 'isnumeric')
311 self.checkequalnofix(True, u'\u0660', 'isnumeric')
312 self.checkequalnofix(True, u'0123456789', 'isnumeric')
313 self.checkequalnofix(False, u'0123456789a', 'isnumeric')
315 self.assertRaises(TypeError, u"abc".isnumeric, 42)
317 def test_contains(self):
318 # Testing Unicode contains method
319 self.assertTrue('a' in u'abdb')
320 self.assertTrue('a' in u'bdab')
321 self.assertTrue('a' in u'bdaba')
322 self.assertTrue('a' in u'bdba')
323 self.assertTrue('a' in u'bdba')
324 self.assertTrue(u'a' in u'bdba')
325 self.assertTrue(u'a' not in u'bdb')
326 self.assertTrue(u'a' not in 'bdb')
327 self.assertTrue(u'a' in 'bdba')
328 self.assertTrue(u'a' in ('a',1,None))
329 self.assertTrue(u'a' in (1,None,'a'))
330 self.assertTrue(u'a' in (1,None,u'a'))
331 self.assertTrue('a' in ('a',1,None))
332 self.assertTrue('a' in (1,None,'a'))
333 self.assertTrue('a' in (1,None,u'a'))
334 self.assertTrue('a' not in ('x',1,u'y'))
335 self.assertTrue('a' not in ('x',1,None))
336 self.assertTrue(u'abcd' not in u'abcxxxx')
337 self.assertTrue(u'ab' in u'abcd')
338 self.assertTrue('ab' in u'abc')
339 self.assertTrue(u'ab' in 'abc')
340 self.assertTrue(u'ab' in (1,None,u'ab'))
341 self.assertTrue(u'' in u'abc')
342 self.assertTrue('' in u'abc')
344 # If the following fails either
345 # the contains operator does not propagate UnicodeErrors or
346 # someone has changed the default encoding
347 self.assertRaises(UnicodeDecodeError, 'g\xe2teau'.__contains__, u'\xe2')
348 self.assertRaises(UnicodeDecodeError, u'g\xe2teau'.__contains__, '\xe2')
350 self.assertTrue(u'' in '')
351 self.assertTrue('' in u'')
352 self.assertTrue(u'' in u'')
353 self.assertTrue(u'' in 'abc')
354 self.assertTrue('' in u'abc')
355 self.assertTrue(u'' in u'abc')
356 self.assertTrue(u'\0' not in 'abc')
357 self.assertTrue('\0' not in u'abc')
358 self.assertTrue(u'\0' not in u'abc')
359 self.assertTrue(u'\0' in '\0abc')
360 self.assertTrue('\0' in u'\0abc')
361 self.assertTrue(u'\0' in u'\0abc')
362 self.assertTrue(u'\0' in 'abc\0')
363 self.assertTrue('\0' in u'abc\0')
364 self.assertTrue(u'\0' in u'abc\0')
365 self.assertTrue(u'a' in '\0abc')
366 self.assertTrue('a' in u'\0abc')
367 self.assertTrue(u'a' in u'\0abc')
368 self.assertTrue(u'asdf' in 'asdf')
369 self.assertTrue('asdf' in u'asdf')
370 self.assertTrue(u'asdf' in u'asdf')
371 self.assertTrue(u'asdf' not in 'asd')
372 self.assertTrue('asdf' not in u'asd')
373 self.assertTrue(u'asdf' not in u'asd')
374 self.assertTrue(u'asdf' not in '')
375 self.assertTrue('asdf' not in u'')
376 self.assertTrue(u'asdf' not in u'')
378 self.assertRaises(TypeError, u"abc".__contains__)
379 self.assertRaises(TypeError, u"abc".__contains__, object())
381 def test_formatting(self):
382 string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
383 # Testing Unicode formatting strings...
384 self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
385 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000, 3.00')
386 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000, 3.00')
387 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
388 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
389 self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
390 if not sys.platform.startswith('java'):
391 self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
392 self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
393 self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
395 self.assertEqual(u'%c' % 0x1234, u'\u1234')
396 self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
398 # formatting jobs delegated from the string implementation:
399 self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
400 self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
401 self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
402 self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
403 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123}, u'...abc...')
404 self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
405 self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
406 self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
407 self.assertEqual('...%s...' % u"abc", u'...abc...')
408 self.assertEqual('%*s' % (5,u'abc',), u' abc')
409 self.assertEqual('%*s' % (-5,u'abc',), u'abc ')
410 self.assertEqual('%*.*s' % (5,2,u'abc',), u' ab')
411 self.assertEqual('%*.*s' % (5,3,u'abc',), u' abc')
412 self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10 abc')
413 self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103 abc')
414 self.assertEqual('%c' % u'a', u'a')
415 class Wrapper:
416 def __str__(self):
417 return u'\u1234'
418 self.assertEqual('%s' % Wrapper(), u'\u1234')
420 @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
421 def test_format_float(self):
422 # should not format with a comma, but always with C locale
423 self.assertEqual(u'1.0', u'%.1f' % 1.0)
425 def test_constructor(self):
426 # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
428 self.assertEqual(
429 unicode(u'unicode remains unicode'),
430 u'unicode remains unicode'
433 class UnicodeSubclass(unicode):
434 pass
436 self.assertEqual(
437 unicode(UnicodeSubclass('unicode subclass becomes unicode')),
438 u'unicode subclass becomes unicode'
441 self.assertEqual(
442 unicode('strings are converted to unicode'),
443 u'strings are converted to unicode'
446 class UnicodeCompat:
447 def __init__(self, x):
448 self.x = x
449 def __unicode__(self):
450 return self.x
452 self.assertEqual(
453 unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
454 u'__unicode__ compatible objects are recognized')
456 class StringCompat:
457 def __init__(self, x):
458 self.x = x
459 def __str__(self):
460 return self.x
462 self.assertEqual(
463 unicode(StringCompat('__str__ compatible objects are recognized')),
464 u'__str__ compatible objects are recognized'
467 # unicode(obj) is compatible to str():
469 o = StringCompat('unicode(obj) is compatible to str()')
470 self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
471 self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
473 # %-formatting and .__unicode__()
474 self.assertEqual(u'%s' %
475 UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
476 u"u'%s' % obj uses obj.__unicode__()")
477 self.assertEqual(u'%s' %
478 UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
479 u"u'%s' % obj falls back to obj.__str__()")
481 for obj in (123, 123.45, 123L):
482 self.assertEqual(unicode(obj), unicode(str(obj)))
484 # unicode(obj, encoding, error) tests (this maps to
485 # PyUnicode_FromEncodedObject() at C level)
487 if not sys.platform.startswith('java'):
488 self.assertRaises(
489 TypeError,
490 unicode,
491 u'decoding unicode is not supported',
492 'utf-8',
493 'strict'
496 self.assertEqual(
497 unicode('strings are decoded to unicode', 'utf-8', 'strict'),
498 u'strings are decoded to unicode'
501 if not sys.platform.startswith('java'):
502 # Silence Py3k warning
503 with test_support.check_warnings():
504 buf = buffer('character buffers are decoded to unicode')
505 self.assertEqual(
506 unicode(
507 buf,
508 'utf-8',
509 'strict'
511 u'character buffers are decoded to unicode'
514 self.assertRaises(TypeError, unicode, 42, 42, 42)
516 def test_codecs_utf7(self):
517 utfTests = [
518 (u'A\u2262\u0391.', 'A+ImIDkQ.'), # RFC2152 example
519 (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'), # RFC2152 example
520 (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'), # RFC2152 example
521 (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
522 (u'+', '+-'),
523 (u'+-', '+--'),
524 (u'+?', '+-?'),
525 (u'\?', '+AFw?'),
526 (u'+?', '+-?'),
527 (ur'\\?', '+AFwAXA?'),
528 (ur'\\\?', '+AFwAXABc?'),
529 (ur'++--', '+-+---'),
530 (u'\U000abcde', '+2m/c3g-'), # surrogate pairs
531 (u'/', '/'),
534 for (x, y) in utfTests:
535 self.assertEqual(x.encode('utf-7'), y)
537 # Unpaired surrogates not supported
538 self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
540 self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd\ufffd')
542 # Direct encoded characters
543 set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
544 # Optional direct characters
545 set_o = '!"#$%&*;<=>@[]^_`{|}'
546 for c in set_d:
547 self.assertEqual(c.encode('utf7'), c.encode('ascii'))
548 self.assertEqual(c.encode('ascii').decode('utf7'), c)
549 for c in set_o:
550 self.assertEqual(c.encode('ascii').decode('utf7'), c)
552 def test_codecs_utf8(self):
553 self.assertEqual(u''.encode('utf-8'), '')
554 self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
555 self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
556 self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
557 self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
558 self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
559 self.assertEqual(
560 (u'\ud800\udc02'*1000).encode('utf-8'),
561 '\xf0\x90\x80\x82'*1000
563 self.assertEqual(
564 u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
565 u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
566 u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
567 u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
568 u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
569 u' Nunstuck git und'.encode('utf-8'),
570 '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
571 '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
572 '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
573 '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
574 '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
575 '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
576 '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
577 '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
578 '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
579 '\xe3\x80\x8cWenn ist das Nunstuck git und'
582 # UTF-8 specific decoding tests
583 self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
584 self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
585 self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
587 # Other possible utf-8 test cases:
588 # * strict decoding testing for all of the
589 # UTF8_ERROR cases in PyUnicode_DecodeUTF8
591 def test_codecs_idna(self):
592 # Test whether trailing dot is preserved
593 self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
595 def test_codecs_errors(self):
596 # Error handling (encoding)
597 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
598 self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
599 self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
600 self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
601 self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'),
602 u'Andr\202 x'.encode('ascii', errors='replace'))
603 self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'),
604 u'Andr\202 x'.encode(encoding='ascii', errors='ignore'))
606 # Error handling (decoding)
607 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
608 self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
609 self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
610 self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
611 self.assertEqual(u'abcde'.decode('ascii', 'ignore'),
612 u'abcde'.decode('ascii', errors='ignore'))
613 self.assertEqual(u'abcde'.decode('ascii', 'replace'),
614 u'abcde'.decode(encoding='ascii', errors='replace'))
616 # Error handling (unknown character names)
617 self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
619 # Error handling (truncated escape sequence)
620 self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
622 self.assertRaises(TypeError, "hello".decode, "test.unicode1")
623 self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
624 self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
625 self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
626 # executes PyUnicode_Encode()
627 import imp
628 self.assertRaises(
629 ImportError,
630 imp.find_module,
631 "non-existing module",
632 [u"non-existing dir"]
635 # Error handling (wrong arguments)
636 self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
638 # Error handling (PyUnicode_EncodeDecimal())
639 self.assertRaises(UnicodeError, int, u"\u0200")
641 def test_codecs(self):
642 # Encoding
643 self.assertEqual(u'hello'.encode('ascii'), 'hello')
644 self.assertEqual(u'hello'.encode('utf-7'), 'hello')
645 self.assertEqual(u'hello'.encode('utf-8'), 'hello')
646 self.assertEqual(u'hello'.encode('utf8'), 'hello')
647 self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
648 self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
649 self.assertEqual(u'hello'.encode('latin-1'), 'hello')
651 # Roundtrip safety for BMP (just the first 1024 chars)
652 for c in xrange(1024):
653 u = unichr(c)
654 for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
655 'utf-16-be', 'raw_unicode_escape',
656 'unicode_escape', 'unicode_internal'):
657 self.assertEqual(unicode(u.encode(encoding),encoding), u)
659 # Roundtrip safety for BMP (just the first 256 chars)
660 for c in xrange(256):
661 u = unichr(c)
662 for encoding in ('latin-1',):
663 self.assertEqual(unicode(u.encode(encoding),encoding), u)
665 # Roundtrip safety for BMP (just the first 128 chars)
666 for c in xrange(128):
667 u = unichr(c)
668 for encoding in ('ascii',):
669 self.assertEqual(unicode(u.encode(encoding),encoding), u)
671 # Roundtrip safety for non-BMP (just a few chars)
672 u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
673 for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
674 #'raw_unicode_escape',
675 'unicode_escape', 'unicode_internal'):
676 self.assertEqual(unicode(u.encode(encoding),encoding), u)
678 # UTF-8 must be roundtrip safe for all UCS-2 code points
679 # This excludes surrogates: in the full range, there would be
680 # a surrogate pair (\udbff\udc00), which gets converted back
681 # to a non-BMP character (\U0010fc00)
682 u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
683 for encoding in ('utf-8',):
684 self.assertEqual(unicode(u.encode(encoding),encoding), u)
686 def test_codecs_charmap(self):
687 # 0-127
688 s = ''.join(map(chr, xrange(128)))
689 for encoding in (
690 'cp037', 'cp1026',
691 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
692 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
693 'cp863', 'cp865', 'cp866',
694 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
695 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
696 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
697 'mac_cyrillic', 'mac_latin2',
699 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
700 'cp1256', 'cp1257', 'cp1258',
701 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
703 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
704 'cp1006', 'iso8859_8',
706 ### These have undefined mappings:
707 #'cp424',
709 ### These fail the round-trip:
710 #'cp875'
713 self.assertEqual(unicode(s, encoding).encode(encoding), s)
715 # 128-255
716 s = ''.join(map(chr, xrange(128, 256)))
717 for encoding in (
718 'cp037', 'cp1026',
719 'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
720 'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
721 'cp863', 'cp865', 'cp866',
722 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
723 'iso8859_2', 'iso8859_4', 'iso8859_5',
724 'iso8859_9', 'koi8_r', 'latin_1',
725 'mac_cyrillic', 'mac_latin2',
727 ### These have undefined mappings:
728 #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
729 #'cp1256', 'cp1257', 'cp1258',
730 #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
731 #'iso8859_3', 'iso8859_6', 'iso8859_7',
732 #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
734 ### These fail the round-trip:
735 #'cp1006', 'cp875', 'iso8859_8',
738 self.assertEqual(unicode(s, encoding).encode(encoding), s)
740 def test_concatenation(self):
741 self.assertEqual((u"abc" u"def"), u"abcdef")
742 self.assertEqual(("abc" u"def"), u"abcdef")
743 self.assertEqual((u"abc" "def"), u"abcdef")
744 self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
745 self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
747 def test_printing(self):
748 class BitBucket:
749 def write(self, text):
750 pass
752 out = BitBucket()
753 print >>out, u'abc'
754 print >>out, u'abc', u'def'
755 print >>out, u'abc', 'def'
756 print >>out, 'abc', u'def'
757 print >>out, u'abc\n'
758 print >>out, u'abc\n',
759 print >>out, u'abc\n',
760 print >>out, u'def\n'
761 print >>out, u'def\n'
763 def test_ucs4(self):
764 x = u'\U00100000'
765 y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
766 self.assertEqual(x, y)
768 y = r'\U00100000'
769 x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
770 self.assertEqual(x, y)
771 y = r'\U00010000'
772 x = y.decode("raw-unicode-escape").encode("raw-unicode-escape")
773 self.assertEqual(x, y)
775 try:
776 '\U11111111'.decode("raw-unicode-escape")
777 except UnicodeDecodeError as e:
778 self.assertEqual(e.start, 0)
779 self.assertEqual(e.end, 10)
780 else:
781 self.fail("Should have raised UnicodeDecodeError")
783 def test_conversion(self):
784 # Make sure __unicode__() works properly
785 class Foo0:
786 def __str__(self):
787 return "foo"
789 class Foo1:
790 def __unicode__(self):
791 return u"foo"
793 class Foo2(object):
794 def __unicode__(self):
795 return u"foo"
797 class Foo3(object):
798 def __unicode__(self):
799 return "foo"
801 class Foo4(str):
802 def __unicode__(self):
803 return "foo"
805 class Foo5(unicode):
806 def __unicode__(self):
807 return "foo"
809 class Foo6(str):
810 def __str__(self):
811 return "foos"
813 def __unicode__(self):
814 return u"foou"
816 class Foo7(unicode):
817 def __str__(self):
818 return "foos"
819 def __unicode__(self):
820 return u"foou"
822 class Foo8(unicode):
823 def __new__(cls, content=""):
824 return unicode.__new__(cls, 2*content)
825 def __unicode__(self):
826 return self
828 class Foo9(unicode):
829 def __str__(self):
830 return "string"
831 def __unicode__(self):
832 return "not unicode"
834 self.assertEqual(unicode(Foo0()), u"foo")
835 self.assertEqual(unicode(Foo1()), u"foo")
836 self.assertEqual(unicode(Foo2()), u"foo")
837 self.assertEqual(unicode(Foo3()), u"foo")
838 self.assertEqual(unicode(Foo4("bar")), u"foo")
839 self.assertEqual(unicode(Foo5("bar")), u"foo")
840 self.assertEqual(unicode(Foo6("bar")), u"foou")
841 self.assertEqual(unicode(Foo7("bar")), u"foou")
842 self.assertEqual(unicode(Foo8("foo")), u"foofoo")
843 self.assertEqual(str(Foo9("foo")), "string")
844 self.assertEqual(unicode(Foo9("foo")), u"not unicode")
846 def test_unicode_repr(self):
847 class s1:
848 def __repr__(self):
849 return '\\n'
851 class s2:
852 def __repr__(self):
853 return u'\\n'
855 self.assertEqual(repr(s1()), '\\n')
856 self.assertEqual(repr(s2()), '\\n')
858 def test_expandtabs_overflows_gracefully(self):
859 # This test only affects 32-bit platforms because expandtabs can only take
860 # an int as the max value, not a 64-bit C long. If expandtabs is changed
861 # to take a 64-bit long, this test should apply to all platforms.
862 if sys.maxint > (1 << 32) or struct.calcsize('P') != 4:
863 return
864 self.assertRaises(OverflowError, u't\tt\t'.expandtabs, sys.maxint)
866 def test__format__(self):
867 def test(value, format, expected):
868 # test both with and without the trailing 's'
869 self.assertEqual(value.__format__(format), expected)
870 self.assertEqual(value.__format__(format + u's'), expected)
872 test(u'', u'', u'')
873 test(u'abc', u'', u'abc')
874 test(u'abc', u'.3', u'abc')
875 test(u'ab', u'.3', u'ab')
876 test(u'abcdef', u'.3', u'abc')
877 test(u'abcdef', u'.0', u'')
878 test(u'abc', u'3.3', u'abc')
879 test(u'abc', u'2.3', u'abc')
880 test(u'abc', u'2.2', u'ab')
881 test(u'abc', u'3.2', u'ab ')
882 test(u'result', u'x<0', u'result')
883 test(u'result', u'x<5', u'result')
884 test(u'result', u'x<6', u'result')
885 test(u'result', u'x<7', u'resultx')
886 test(u'result', u'x<8', u'resultxx')
887 test(u'result', u' <7', u'result ')
888 test(u'result', u'<7', u'result ')
889 test(u'result', u'>7', u' result')
890 test(u'result', u'>8', u' result')
891 test(u'result', u'^8', u' result ')
892 test(u'result', u'^9', u' result ')
893 test(u'result', u'^10', u' result ')
894 test(u'a', u'10000', u'a' + u' ' * 9999)
895 test(u'', u'10000', u' ' * 10000)
896 test(u'', u'10000000', u' ' * 10000000)
898 # test mixing unicode and str
899 self.assertEqual(u'abc'.__format__('s'), u'abc')
900 self.assertEqual(u'abc'.__format__('->10s'), u'-------abc')
902 def test_format(self):
903 self.assertEqual(u''.format(), u'')
904 self.assertEqual(u'a'.format(), u'a')
905 self.assertEqual(u'ab'.format(), u'ab')
906 self.assertEqual(u'a{{'.format(), u'a{')
907 self.assertEqual(u'a}}'.format(), u'a}')
908 self.assertEqual(u'{{b'.format(), u'{b')
909 self.assertEqual(u'}}b'.format(), u'}b')
910 self.assertEqual(u'a{{b'.format(), u'a{b')
912 # examples from the PEP:
913 import datetime
914 self.assertEqual(u"My name is {0}".format(u'Fred'), u"My name is Fred")
915 self.assertEqual(u"My name is {0[name]}".format(dict(name=u'Fred')),
916 u"My name is Fred")
917 self.assertEqual(u"My name is {0} :-{{}}".format(u'Fred'),
918 u"My name is Fred :-{}")
920 # datetime.__format__ doesn't work with unicode
921 #d = datetime.date(2007, 8, 18)
922 #self.assertEqual("The year is {0.year}".format(d),
923 # "The year is 2007")
925 # classes we'll use for testing
926 class C:
927 def __init__(self, x=100):
928 self._x = x
929 def __format__(self, spec):
930 return spec
932 class D:
933 def __init__(self, x):
934 self.x = x
935 def __format__(self, spec):
936 return str(self.x)
938 # class with __str__, but no __format__
939 class E:
940 def __init__(self, x):
941 self.x = x
942 def __str__(self):
943 return u'E(' + self.x + u')'
945 # class with __repr__, but no __format__ or __str__
946 class F:
947 def __init__(self, x):
948 self.x = x
949 def __repr__(self):
950 return u'F(' + self.x + u')'
952 # class with __format__ that forwards to string, for some format_spec's
953 class G:
954 def __init__(self, x):
955 self.x = x
956 def __str__(self):
957 return u"string is " + self.x
958 def __format__(self, format_spec):
959 if format_spec == 'd':
960 return u'G(' + self.x + u')'
961 return object.__format__(self, format_spec)
963 # class that returns a bad type from __format__
964 class H:
965 def __format__(self, format_spec):
966 return 1.0
968 class I(datetime.date):
969 def __format__(self, format_spec):
970 return self.strftime(format_spec)
972 class J(int):
973 def __format__(self, format_spec):
974 return int.__format__(self * 2, format_spec)
977 self.assertEqual(u''.format(), u'')
978 self.assertEqual(u'abc'.format(), u'abc')
979 self.assertEqual(u'{0}'.format(u'abc'), u'abc')
980 self.assertEqual(u'{0:}'.format(u'abc'), u'abc')
981 self.assertEqual(u'X{0}'.format(u'abc'), u'Xabc')
982 self.assertEqual(u'{0}X'.format(u'abc'), u'abcX')
983 self.assertEqual(u'X{0}Y'.format(u'abc'), u'XabcY')
984 self.assertEqual(u'{1}'.format(1, u'abc'), u'abc')
985 self.assertEqual(u'X{1}'.format(1, u'abc'), u'Xabc')
986 self.assertEqual(u'{1}X'.format(1, u'abc'), u'abcX')
987 self.assertEqual(u'X{1}Y'.format(1, u'abc'), u'XabcY')
988 self.assertEqual(u'{0}'.format(-15), u'-15')
989 self.assertEqual(u'{0}{1}'.format(-15, u'abc'), u'-15abc')
990 self.assertEqual(u'{0}X{1}'.format(-15, u'abc'), u'-15Xabc')
991 self.assertEqual(u'{{'.format(), u'{')
992 self.assertEqual(u'}}'.format(), u'}')
993 self.assertEqual(u'{{}}'.format(), u'{}')
994 self.assertEqual(u'{{x}}'.format(), u'{x}')
995 self.assertEqual(u'{{{0}}}'.format(123), u'{123}')
996 self.assertEqual(u'{{{{0}}}}'.format(), u'{{0}}')
997 self.assertEqual(u'}}{{'.format(), u'}{')
998 self.assertEqual(u'}}x{{'.format(), u'}x{')
1000 # weird field names
1001 self.assertEqual(u"{0[foo-bar]}".format({u'foo-bar':u'baz'}), u'baz')
1002 self.assertEqual(u"{0[foo bar]}".format({u'foo bar':u'baz'}), u'baz')
1003 self.assertEqual(u"{0[ ]}".format({u' ':3}), u'3')
1005 self.assertEqual(u'{foo._x}'.format(foo=C(20)), u'20')
1006 self.assertEqual(u'{1}{0}'.format(D(10), D(20)), u'2010')
1007 self.assertEqual(u'{0._x.x}'.format(C(D(u'abc'))), u'abc')
1008 self.assertEqual(u'{0[0]}'.format([u'abc', u'def']), u'abc')
1009 self.assertEqual(u'{0[1]}'.format([u'abc', u'def']), u'def')
1010 self.assertEqual(u'{0[1][0]}'.format([u'abc', [u'def']]), u'def')
1011 self.assertEqual(u'{0[1][0].x}'.format(['abc', [D(u'def')]]), u'def')
1013 # strings
1014 self.assertEqual(u'{0:.3s}'.format(u'abc'), u'abc')
1015 self.assertEqual(u'{0:.3s}'.format(u'ab'), u'ab')
1016 self.assertEqual(u'{0:.3s}'.format(u'abcdef'), u'abc')
1017 self.assertEqual(u'{0:.0s}'.format(u'abcdef'), u'')
1018 self.assertEqual(u'{0:3.3s}'.format(u'abc'), u'abc')
1019 self.assertEqual(u'{0:2.3s}'.format(u'abc'), u'abc')
1020 self.assertEqual(u'{0:2.2s}'.format(u'abc'), u'ab')
1021 self.assertEqual(u'{0:3.2s}'.format(u'abc'), u'ab ')
1022 self.assertEqual(u'{0:x<0s}'.format(u'result'), u'result')
1023 self.assertEqual(u'{0:x<5s}'.format(u'result'), u'result')
1024 self.assertEqual(u'{0:x<6s}'.format(u'result'), u'result')
1025 self.assertEqual(u'{0:x<7s}'.format(u'result'), u'resultx')
1026 self.assertEqual(u'{0:x<8s}'.format(u'result'), u'resultxx')
1027 self.assertEqual(u'{0: <7s}'.format(u'result'), u'result ')
1028 self.assertEqual(u'{0:<7s}'.format(u'result'), u'result ')
1029 self.assertEqual(u'{0:>7s}'.format(u'result'), u' result')
1030 self.assertEqual(u'{0:>8s}'.format(u'result'), u' result')
1031 self.assertEqual(u'{0:^8s}'.format(u'result'), u' result ')
1032 self.assertEqual(u'{0:^9s}'.format(u'result'), u' result ')
1033 self.assertEqual(u'{0:^10s}'.format(u'result'), u' result ')
1034 self.assertEqual(u'{0:10000}'.format(u'a'), u'a' + u' ' * 9999)
1035 self.assertEqual(u'{0:10000}'.format(u''), u' ' * 10000)
1036 self.assertEqual(u'{0:10000000}'.format(u''), u' ' * 10000000)
1038 # format specifiers for user defined type
1039 self.assertEqual(u'{0:abc}'.format(C()), u'abc')
1041 # !r and !s coersions
1042 self.assertEqual(u'{0!s}'.format(u'Hello'), u'Hello')
1043 self.assertEqual(u'{0!s:}'.format(u'Hello'), u'Hello')
1044 self.assertEqual(u'{0!s:15}'.format(u'Hello'), u'Hello ')
1045 self.assertEqual(u'{0!s:15s}'.format(u'Hello'), u'Hello ')
1046 self.assertEqual(u'{0!r}'.format(u'Hello'), u"u'Hello'")
1047 self.assertEqual(u'{0!r:}'.format(u'Hello'), u"u'Hello'")
1048 self.assertEqual(u'{0!r}'.format(F(u'Hello')), u'F(Hello)')
1050 # test fallback to object.__format__
1051 self.assertEqual(u'{0}'.format({}), u'{}')
1052 self.assertEqual(u'{0}'.format([]), u'[]')
1053 self.assertEqual(u'{0}'.format([1]), u'[1]')
1054 self.assertEqual(u'{0}'.format(E(u'data')), u'E(data)')
1055 self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data) ')
1056 self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data) ')
1057 self.assertEqual(u'{0:d}'.format(G(u'data')), u'G(data)')
1058 self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data')
1059 self.assertEqual(u'{0!s}'.format(G(u'data')), u'string is data')
1061 self.assertEqual(u"{0:date: %Y-%m-%d}".format(I(year=2007,
1062 month=8,
1063 day=27)),
1064 u"date: 2007-08-27")
1066 # test deriving from a builtin type and overriding __format__
1067 self.assertEqual(u"{0}".format(J(10)), u"20")
1070 # string format specifiers
1071 self.assertEqual(u'{0:}'.format('a'), u'a')
1073 # computed format specifiers
1074 self.assertEqual(u"{0:.{1}}".format(u'hello world', 5), u'hello')
1075 self.assertEqual(u"{0:.{1}s}".format(u'hello world', 5), u'hello')
1076 self.assertEqual(u"{0:.{precision}s}".format('hello world', precision=5), u'hello')
1077 self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width=10, precision=5), u'hello ')
1078 self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), u'hello ')
1080 # test various errors
1081 self.assertRaises(ValueError, u'{'.format)
1082 self.assertRaises(ValueError, u'}'.format)
1083 self.assertRaises(ValueError, u'a{'.format)
1084 self.assertRaises(ValueError, u'a}'.format)
1085 self.assertRaises(ValueError, u'{a'.format)
1086 self.assertRaises(ValueError, u'}a'.format)
1087 self.assertRaises(IndexError, u'{0}'.format)
1088 self.assertRaises(IndexError, u'{1}'.format, u'abc')
1089 self.assertRaises(KeyError, u'{x}'.format)
1090 self.assertRaises(ValueError, u"}{".format)
1091 self.assertRaises(ValueError, u"{".format)
1092 self.assertRaises(ValueError, u"}".format)
1093 self.assertRaises(ValueError, u"abc{0:{}".format)
1094 self.assertRaises(ValueError, u"{0".format)
1095 self.assertRaises(IndexError, u"{0.}".format)
1096 self.assertRaises(ValueError, u"{0.}".format, 0)
1097 self.assertRaises(IndexError, u"{0[}".format)
1098 self.assertRaises(ValueError, u"{0[}".format, [])
1099 self.assertRaises(KeyError, u"{0]}".format)
1100 self.assertRaises(ValueError, u"{0.[]}".format, 0)
1101 self.assertRaises(ValueError, u"{0..foo}".format, 0)
1102 self.assertRaises(ValueError, u"{0[0}".format, 0)
1103 self.assertRaises(ValueError, u"{0[0:foo}".format, 0)
1104 self.assertRaises(KeyError, u"{c]}".format)
1105 self.assertRaises(ValueError, u"{{ {{{0}}".format, 0)
1106 self.assertRaises(ValueError, u"{0}}".format, 0)
1107 self.assertRaises(KeyError, u"{foo}".format, bar=3)
1108 self.assertRaises(ValueError, u"{0!x}".format, 3)
1109 self.assertRaises(ValueError, u"{0!}".format, 0)
1110 self.assertRaises(ValueError, u"{0!rs}".format, 0)
1111 self.assertRaises(ValueError, u"{!}".format)
1112 self.assertRaises(IndexError, u"{:}".format)
1113 self.assertRaises(IndexError, u"{:s}".format)
1114 self.assertRaises(IndexError, u"{}".format)
1116 # issue 6089
1117 self.assertRaises(ValueError, u"{0[0]x}".format, [None])
1118 self.assertRaises(ValueError, u"{0[0](10)}".format, [None])
1120 # can't have a replacement on the field name portion
1121 self.assertRaises(TypeError, u'{0[{1}]}'.format, u'abcdefg', 4)
1123 # exceed maximum recursion depth
1124 self.assertRaises(ValueError, u"{0:{1:{2}}}".format, u'abc', u's', u'')
1125 self.assertRaises(ValueError, u"{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
1126 0, 1, 2, 3, 4, 5, 6, 7)
1128 # string format spec errors
1129 self.assertRaises(ValueError, u"{0:-s}".format, u'')
1130 self.assertRaises(ValueError, format, u"", u"-")
1131 self.assertRaises(ValueError, u"{0:=s}".format, u'')
1133 # test combining string and unicode
1134 self.assertEqual(u"foo{0}".format('bar'), u'foobar')
1135 # This will try to convert the argument from unicode to str, which
1136 # will succeed
1137 self.assertEqual("foo{0}".format(u'bar'), 'foobar')
1138 # This will try to convert the argument from unicode to str, which
1139 # will fail
1140 self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
1142 def test_format_auto_numbering(self):
1143 class C:
1144 def __init__(self, x=100):
1145 self._x = x
1146 def __format__(self, spec):
1147 return spec
1149 self.assertEqual(u'{}'.format(10), u'10')
1150 self.assertEqual(u'{:5}'.format('s'), u's ')
1151 self.assertEqual(u'{!r}'.format('s'), u"'s'")
1152 self.assertEqual(u'{._x}'.format(C(10)), u'10')
1153 self.assertEqual(u'{[1]}'.format([1, 2]), u'2')
1154 self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), u'4')
1155 self.assertEqual(u'a{}b{}c'.format(0, 1), u'a0b1c')
1157 self.assertEqual(u'a{:{}}b'.format('x', '^10'), u'a x b')
1158 self.assertEqual(u'a{:{}x}b'.format(20, '#'), u'a0x14b')
1160 # can't mix and match numbering and auto-numbering
1161 self.assertRaises(ValueError, u'{}{1}'.format, 1, 2)
1162 self.assertRaises(ValueError, u'{1}{}'.format, 1, 2)
1163 self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2)
1164 self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2)
1166 # can mix and match auto-numbering and named
1167 self.assertEqual(u'{f}{}'.format(4, f='test'), u'test4')
1168 self.assertEqual(u'{}{f}'.format(4, f='test'), u'4test')
1169 self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), u' 1g3')
1170 self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), u' 14g')
1172 def test_raiseMemError(self):
1173 # Ensure that the freelist contains a consistent object, even
1174 # when a string allocation fails with a MemoryError.
1175 # This used to crash the interpreter,
1176 # or leak references when the number was smaller.
1177 charwidth = 4 if sys.maxunicode >= 0x10000 else 2
1178 # Note: sys.maxsize is half of the actual max allocation because of
1179 # the signedness of Py_ssize_t.
1180 alloc = lambda: u"a" * (sys.maxsize // charwidth * 2)
1181 self.assertRaises(MemoryError, alloc)
1182 self.assertRaises(MemoryError, alloc)
1184 def test_main():
1185 test_support.run_unittest(__name__)
1187 if __name__ == "__main__":
1188 test_main()