move sections
[python/dscho.git] / Lib / test / string_tests.py
blob7cfe65be35eaab42fe4ca125da819d2f35f36473
1 """
2 Common tests shared by test_str, test_unicode, test_userstring and test_string.
3 """
5 import unittest, string, sys, struct
6 from test import test_support
7 from UserList import UserList
9 class Sequence:
10 def __init__(self, seq='wxyz'): self.seq = seq
11 def __len__(self): return len(self.seq)
12 def __getitem__(self, i): return self.seq[i]
14 class BadSeq1(Sequence):
15 def __init__(self): self.seq = [7, 'hello', 123L]
17 class BadSeq2(Sequence):
18 def __init__(self): self.seq = ['a', 'b', 'c']
19 def __len__(self): return 8
21 class CommonTest(unittest.TestCase):
22 # This testcase contains test that can be used in all
23 # stringlike classes. Currently this is str, unicode
24 # UserString and the string module.
26 # The type to be tested
27 # Change in subclasses to change the behaviour of fixtesttype()
28 type2test = None
30 # All tests pass their arguments to the testing methods
31 # as str objects. fixtesttype() can be used to propagate
32 # these arguments to the appropriate type
33 def fixtype(self, obj):
34 if isinstance(obj, str):
35 return self.__class__.type2test(obj)
36 elif isinstance(obj, list):
37 return [self.fixtype(x) for x in obj]
38 elif isinstance(obj, tuple):
39 return tuple([self.fixtype(x) for x in obj])
40 elif isinstance(obj, dict):
41 return dict([
42 (self.fixtype(key), self.fixtype(value))
43 for (key, value) in obj.iteritems()
45 else:
46 return obj
48 # check that object.method(*args) returns result
49 def checkequal(self, result, object, methodname, *args):
50 result = self.fixtype(result)
51 object = self.fixtype(object)
52 args = self.fixtype(args)
53 realresult = getattr(object, methodname)(*args)
54 self.assertEqual(
55 result,
56 realresult
58 # if the original is returned make sure that
59 # this doesn't happen with subclasses
60 if object == realresult:
61 class subtype(self.__class__.type2test):
62 pass
63 object = subtype(object)
64 realresult = getattr(object, methodname)(*args)
65 self.assert_(object is not realresult)
67 # check that object.method(*args) raises exc
68 def checkraises(self, exc, object, methodname, *args):
69 object = self.fixtype(object)
70 args = self.fixtype(args)
71 self.assertRaises(
72 exc,
73 getattr(object, methodname),
74 *args
77 # call object.method(*args) without any checks
78 def checkcall(self, object, methodname, *args):
79 object = self.fixtype(object)
80 args = self.fixtype(args)
81 getattr(object, methodname)(*args)
83 def test_hash(self):
84 # SF bug 1054139: += optimization was not invalidating cached hash value
85 a = self.type2test('DNSSEC')
86 b = self.type2test('')
87 for c in a:
88 b += c
89 hash(b)
90 self.assertEqual(hash(a), hash(b))
92 def test_capitalize(self):
93 self.checkequal(' hello ', ' hello ', 'capitalize')
94 self.checkequal('Hello ', 'Hello ','capitalize')
95 self.checkequal('Hello ', 'hello ','capitalize')
96 self.checkequal('Aaaa', 'aaaa', 'capitalize')
97 self.checkequal('Aaaa', 'AaAa', 'capitalize')
99 self.checkraises(TypeError, 'hello', 'capitalize', 42)
101 def test_count(self):
102 self.checkequal(3, 'aaa', 'count', 'a')
103 self.checkequal(0, 'aaa', 'count', 'b')
104 self.checkequal(3, 'aaa', 'count', 'a')
105 self.checkequal(0, 'aaa', 'count', 'b')
106 self.checkequal(3, 'aaa', 'count', 'a')
107 self.checkequal(0, 'aaa', 'count', 'b')
108 self.checkequal(0, 'aaa', 'count', 'b')
109 self.checkequal(2, 'aaa', 'count', 'a', 1)
110 self.checkequal(0, 'aaa', 'count', 'a', 10)
111 self.checkequal(1, 'aaa', 'count', 'a', -1)
112 self.checkequal(3, 'aaa', 'count', 'a', -10)
113 self.checkequal(1, 'aaa', 'count', 'a', 0, 1)
114 self.checkequal(3, 'aaa', 'count', 'a', 0, 10)
115 self.checkequal(2, 'aaa', 'count', 'a', 0, -1)
116 self.checkequal(0, 'aaa', 'count', 'a', 0, -10)
117 self.checkequal(3, 'aaa', 'count', '', 1)
118 self.checkequal(1, 'aaa', 'count', '', 3)
119 self.checkequal(0, 'aaa', 'count', '', 10)
120 self.checkequal(2, 'aaa', 'count', '', -1)
121 self.checkequal(4, 'aaa', 'count', '', -10)
123 self.checkequal(1, '', 'count', '')
124 self.checkequal(0, '', 'count', '', 1, 1)
125 self.checkequal(0, '', 'count', '', sys.maxint, 0)
127 self.checkequal(0, '', 'count', 'xx')
128 self.checkequal(0, '', 'count', 'xx', 1, 1)
129 self.checkequal(0, '', 'count', 'xx', sys.maxint, 0)
131 self.checkraises(TypeError, 'hello', 'count')
132 self.checkraises(TypeError, 'hello', 'count', 42)
134 # For a variety of combinations,
135 # verify that str.count() matches an equivalent function
136 # replacing all occurrences and then differencing the string lengths
137 charset = ['', 'a', 'b']
138 digits = 7
139 base = len(charset)
140 teststrings = set()
141 for i in xrange(base ** digits):
142 entry = []
143 for j in xrange(digits):
144 i, m = divmod(i, base)
145 entry.append(charset[m])
146 teststrings.add(''.join(entry))
147 teststrings = list(teststrings)
148 for i in teststrings:
149 i = self.fixtype(i)
150 n = len(i)
151 for j in teststrings:
152 r1 = i.count(j)
153 if j:
154 r2, rem = divmod(n - len(i.replace(j, '')), len(j))
155 else:
156 r2, rem = len(i)+1, 0
157 if rem or r1 != r2:
158 self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i))
159 self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i))
161 def test_find(self):
162 self.checkequal(0, 'abcdefghiabc', 'find', 'abc')
163 self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1)
164 self.checkequal(-1, 'abcdefghiabc', 'find', 'def', 4)
166 self.checkequal(0, 'abc', 'find', '', 0)
167 self.checkequal(3, 'abc', 'find', '', 3)
168 self.checkequal(-1, 'abc', 'find', '', 4)
170 # to check the ability to pass None as defaults
171 self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a')
172 self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4)
173 self.checkequal(-1, 'rrarrrrrrrrra', 'find', 'a', 4, 6)
174 self.checkequal(12, 'rrarrrrrrrrra', 'find', 'a', 4, None)
175 self.checkequal( 2, 'rrarrrrrrrrra', 'find', 'a', None, 6)
177 self.checkraises(TypeError, 'hello', 'find')
178 self.checkraises(TypeError, 'hello', 'find', 42)
180 self.checkequal(0, '', 'find', '')
181 self.checkequal(-1, '', 'find', '', 1, 1)
182 self.checkequal(-1, '', 'find', '', sys.maxint, 0)
184 self.checkequal(-1, '', 'find', 'xx')
185 self.checkequal(-1, '', 'find', 'xx', 1, 1)
186 self.checkequal(-1, '', 'find', 'xx', sys.maxint, 0)
188 # issue 7458
189 self.checkequal(-1, 'ab', 'find', 'xxx', sys.maxsize + 1, 0)
191 # For a variety of combinations,
192 # verify that str.find() matches __contains__
193 # and that the found substring is really at that location
194 charset = ['', 'a', 'b', 'c']
195 digits = 5
196 base = len(charset)
197 teststrings = set()
198 for i in xrange(base ** digits):
199 entry = []
200 for j in xrange(digits):
201 i, m = divmod(i, base)
202 entry.append(charset[m])
203 teststrings.add(''.join(entry))
204 teststrings = list(teststrings)
205 for i in teststrings:
206 i = self.fixtype(i)
207 for j in teststrings:
208 loc = i.find(j)
209 r1 = (loc != -1)
210 r2 = j in i
211 self.assertEqual(r1, r2)
212 if loc != -1:
213 self.assertEqual(i[loc:loc+len(j)], j)
215 def test_rfind(self):
216 self.checkequal(9, 'abcdefghiabc', 'rfind', 'abc')
217 self.checkequal(12, 'abcdefghiabc', 'rfind', '')
218 self.checkequal(0, 'abcdefghiabc', 'rfind', 'abcd')
219 self.checkequal(-1, 'abcdefghiabc', 'rfind', 'abcz')
221 self.checkequal(3, 'abc', 'rfind', '', 0)
222 self.checkequal(3, 'abc', 'rfind', '', 3)
223 self.checkequal(-1, 'abc', 'rfind', '', 4)
225 # to check the ability to pass None as defaults
226 self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a')
227 self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4)
228 self.checkequal(-1, 'rrarrrrrrrrra', 'rfind', 'a', 4, 6)
229 self.checkequal(12, 'rrarrrrrrrrra', 'rfind', 'a', 4, None)
230 self.checkequal( 2, 'rrarrrrrrrrra', 'rfind', 'a', None, 6)
232 self.checkraises(TypeError, 'hello', 'rfind')
233 self.checkraises(TypeError, 'hello', 'rfind', 42)
235 # For a variety of combinations,
236 # verify that str.rfind() matches __contains__
237 # and that the found substring is really at that location
238 charset = ['', 'a', 'b', 'c']
239 digits = 5
240 base = len(charset)
241 teststrings = set()
242 for i in xrange(base ** digits):
243 entry = []
244 for j in xrange(digits):
245 i, m = divmod(i, base)
246 entry.append(charset[m])
247 teststrings.add(''.join(entry))
248 teststrings = list(teststrings)
249 for i in teststrings:
250 i = self.fixtype(i)
251 for j in teststrings:
252 loc = i.rfind(j)
253 r1 = (loc != -1)
254 r2 = j in i
255 self.assertEqual(r1, r2)
256 if loc != -1:
257 self.assertEqual(i[loc:loc+len(j)], j)
259 # issue 7458
260 self.checkequal(-1, 'ab', 'rfind', 'xxx', sys.maxsize + 1, 0)
262 def test_index(self):
263 self.checkequal(0, 'abcdefghiabc', 'index', '')
264 self.checkequal(3, 'abcdefghiabc', 'index', 'def')
265 self.checkequal(0, 'abcdefghiabc', 'index', 'abc')
266 self.checkequal(9, 'abcdefghiabc', 'index', 'abc', 1)
268 self.checkraises(ValueError, 'abcdefghiabc', 'index', 'hib')
269 self.checkraises(ValueError, 'abcdefghiab', 'index', 'abc', 1)
270 self.checkraises(ValueError, 'abcdefghi', 'index', 'ghi', 8)
271 self.checkraises(ValueError, 'abcdefghi', 'index', 'ghi', -1)
273 # to check the ability to pass None as defaults
274 self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a')
275 self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4)
276 self.checkraises(ValueError, 'rrarrrrrrrrra', 'index', 'a', 4, 6)
277 self.checkequal(12, 'rrarrrrrrrrra', 'index', 'a', 4, None)
278 self.checkequal( 2, 'rrarrrrrrrrra', 'index', 'a', None, 6)
280 self.checkraises(TypeError, 'hello', 'index')
281 self.checkraises(TypeError, 'hello', 'index', 42)
283 def test_rindex(self):
284 self.checkequal(12, 'abcdefghiabc', 'rindex', '')
285 self.checkequal(3, 'abcdefghiabc', 'rindex', 'def')
286 self.checkequal(9, 'abcdefghiabc', 'rindex', 'abc')
287 self.checkequal(0, 'abcdefghiabc', 'rindex', 'abc', 0, -1)
289 self.checkraises(ValueError, 'abcdefghiabc', 'rindex', 'hib')
290 self.checkraises(ValueError, 'defghiabc', 'rindex', 'def', 1)
291 self.checkraises(ValueError, 'defghiabc', 'rindex', 'abc', 0, -1)
292 self.checkraises(ValueError, 'abcdefghi', 'rindex', 'ghi', 0, 8)
293 self.checkraises(ValueError, 'abcdefghi', 'rindex', 'ghi', 0, -1)
295 # to check the ability to pass None as defaults
296 self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a')
297 self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4)
298 self.checkraises(ValueError, 'rrarrrrrrrrra', 'rindex', 'a', 4, 6)
299 self.checkequal(12, 'rrarrrrrrrrra', 'rindex', 'a', 4, None)
300 self.checkequal( 2, 'rrarrrrrrrrra', 'rindex', 'a', None, 6)
302 self.checkraises(TypeError, 'hello', 'rindex')
303 self.checkraises(TypeError, 'hello', 'rindex', 42)
305 def test_lower(self):
306 self.checkequal('hello', 'HeLLo', 'lower')
307 self.checkequal('hello', 'hello', 'lower')
308 self.checkraises(TypeError, 'hello', 'lower', 42)
310 def test_upper(self):
311 self.checkequal('HELLO', 'HeLLo', 'upper')
312 self.checkequal('HELLO', 'HELLO', 'upper')
313 self.checkraises(TypeError, 'hello', 'upper', 42)
315 def test_expandtabs(self):
316 self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs')
317 self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8)
318 self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 4)
319 self.checkequal('abc\r\nab def\ng hi', 'abc\r\nab\tdef\ng\thi', 'expandtabs', 4)
320 self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs')
321 self.checkequal('abc\rab def\ng hi', 'abc\rab\tdef\ng\thi', 'expandtabs', 8)
322 self.checkequal('abc\r\nab\r\ndef\ng\r\nhi', 'abc\r\nab\r\ndef\ng\r\nhi', 'expandtabs', 4)
323 self.checkequal(' a\n b', ' \ta\n\tb', 'expandtabs', 1)
325 self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42)
326 # This test is only valid when sizeof(int) == sizeof(void*) == 4.
327 if sys.maxint < (1 << 32) and struct.calcsize('P') == 4:
328 self.checkraises(OverflowError,
329 '\ta\n\tb', 'expandtabs', sys.maxint)
331 def test_split(self):
332 self.checkequal(['this', 'is', 'the', 'split', 'function'],
333 'this is the split function', 'split')
335 # by whitespace
336 self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split')
337 self.checkequal(['a', 'b c d'], 'a b c d', 'split', None, 1)
338 self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2)
339 self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 3)
340 self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 4)
341 self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None,
342 sys.maxint-1)
343 self.checkequal(['a b c d'], 'a b c d', 'split', None, 0)
344 self.checkequal(['a b c d'], ' a b c d', 'split', None, 0)
345 self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2)
347 self.checkequal([], ' ', 'split')
348 self.checkequal(['a'], ' a ', 'split')
349 self.checkequal(['a', 'b'], ' a b ', 'split')
350 self.checkequal(['a', 'b '], ' a b ', 'split', None, 1)
351 self.checkequal(['a', 'b c '], ' a b c ', 'split', None, 1)
352 self.checkequal(['a', 'b', 'c '], ' a b c ', 'split', None, 2)
353 self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'split')
354 aaa = ' a '*20
355 self.checkequal(['a']*20, aaa, 'split')
356 self.checkequal(['a'] + [aaa[4:]], aaa, 'split', None, 1)
357 self.checkequal(['a']*19 + ['a '], aaa, 'split', None, 19)
359 # by a char
360 self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|')
361 self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0)
362 self.checkequal(['a', 'b|c|d'], 'a|b|c|d', 'split', '|', 1)
363 self.checkequal(['a', 'b', 'c|d'], 'a|b|c|d', 'split', '|', 2)
364 self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 3)
365 self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 4)
366 self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|',
367 sys.maxint-2)
368 self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0)
369 self.checkequal(['a', '', 'b||c||d'], 'a||b||c||d', 'split', '|', 2)
370 self.checkequal(['endcase ', ''], 'endcase |', 'split', '|')
371 self.checkequal(['', ' startcase'], '| startcase', 'split', '|')
372 self.checkequal(['', 'bothcase', ''], '|bothcase|', 'split', '|')
373 self.checkequal(['a', '', 'b\x00c\x00d'], 'a\x00\x00b\x00c\x00d', 'split', '\x00', 2)
375 self.checkequal(['a']*20, ('a|'*20)[:-1], 'split', '|')
376 self.checkequal(['a']*15 +['a|a|a|a|a'],
377 ('a|'*20)[:-1], 'split', '|', 15)
379 # by string
380 self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
381 self.checkequal(['a', 'b//c//d'], 'a//b//c//d', 'split', '//', 1)
382 self.checkequal(['a', 'b', 'c//d'], 'a//b//c//d', 'split', '//', 2)
383 self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 3)
384 self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 4)
385 self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//',
386 sys.maxint-10)
387 self.checkequal(['a//b//c//d'], 'a//b//c//d', 'split', '//', 0)
388 self.checkequal(['a', '', 'b////c////d'], 'a////b////c////d', 'split', '//', 2)
389 self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test')
390 self.checkequal(['', ' begincase'], 'test begincase', 'split', 'test')
391 self.checkequal(['', ' bothcase ', ''], 'test bothcase test',
392 'split', 'test')
393 self.checkequal(['a', 'bc'], 'abbbc', 'split', 'bb')
394 self.checkequal(['', ''], 'aaa', 'split', 'aaa')
395 self.checkequal(['aaa'], 'aaa', 'split', 'aaa', 0)
396 self.checkequal(['ab', 'ab'], 'abbaab', 'split', 'ba')
397 self.checkequal(['aaaa'], 'aaaa', 'split', 'aab')
398 self.checkequal([''], '', 'split', 'aaa')
399 self.checkequal(['aa'], 'aa', 'split', 'aaa')
400 self.checkequal(['A', 'bobb'], 'Abbobbbobb', 'split', 'bbobb')
401 self.checkequal(['A', 'B', ''], 'AbbobbBbbobb', 'split', 'bbobb')
403 self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'split', 'BLAH')
404 self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'split', 'BLAH', 19)
405 self.checkequal(['a']*18 + ['aBLAHa'], ('aBLAH'*20)[:-4],
406 'split', 'BLAH', 18)
408 # mixed use of str and unicode
409 self.checkequal([u'a', u'b', u'c d'], 'a b c d', 'split', u' ', 2)
411 # argument type
412 self.checkraises(TypeError, 'hello', 'split', 42, 42, 42)
414 # null case
415 self.checkraises(ValueError, 'hello', 'split', '')
416 self.checkraises(ValueError, 'hello', 'split', '', 0)
418 def test_rsplit(self):
419 self.checkequal(['this', 'is', 'the', 'rsplit', 'function'],
420 'this is the rsplit function', 'rsplit')
422 # by whitespace
423 self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'rsplit')
424 self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1)
425 self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
426 self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3)
427 self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4)
428 self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None,
429 sys.maxint-20)
430 self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0)
431 self.checkequal(['a b c d'], 'a b c d ', 'rsplit', None, 0)
432 self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2)
434 self.checkequal([], ' ', 'rsplit')
435 self.checkequal(['a'], ' a ', 'rsplit')
436 self.checkequal(['a', 'b'], ' a b ', 'rsplit')
437 self.checkequal([' a', 'b'], ' a b ', 'rsplit', None, 1)
438 self.checkequal([' a b','c'], ' a b c ', 'rsplit',
439 None, 1)
440 self.checkequal([' a', 'b', 'c'], ' a b c ', 'rsplit',
441 None, 2)
442 self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'rsplit', None, 88)
443 aaa = ' a '*20
444 self.checkequal(['a']*20, aaa, 'rsplit')
445 self.checkequal([aaa[:-4]] + ['a'], aaa, 'rsplit', None, 1)
446 self.checkequal([' a a'] + ['a']*18, aaa, 'rsplit', None, 18)
449 # by a char
450 self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|')
451 self.checkequal(['a|b|c', 'd'], 'a|b|c|d', 'rsplit', '|', 1)
452 self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2)
453 self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 3)
454 self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 4)
455 self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|',
456 sys.maxint-100)
457 self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0)
458 self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2)
459 self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|')
460 self.checkequal(['endcase ', ''], 'endcase |', 'rsplit', '|')
461 self.checkequal(['', 'bothcase', ''], '|bothcase|', 'rsplit', '|')
463 self.checkequal(['a\x00\x00b', 'c', 'd'], 'a\x00\x00b\x00c\x00d', 'rsplit', '\x00', 2)
465 self.checkequal(['a']*20, ('a|'*20)[:-1], 'rsplit', '|')
466 self.checkequal(['a|a|a|a|a']+['a']*15,
467 ('a|'*20)[:-1], 'rsplit', '|', 15)
469 # by string
470 self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//')
471 self.checkequal(['a//b//c', 'd'], 'a//b//c//d', 'rsplit', '//', 1)
472 self.checkequal(['a//b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 2)
473 self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 3)
474 self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 4)
475 self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//',
476 sys.maxint-5)
477 self.checkequal(['a//b//c//d'], 'a//b//c//d', 'rsplit', '//', 0)
478 self.checkequal(['a////b////c', '', 'd'], 'a////b////c////d', 'rsplit', '//', 2)
479 self.checkequal(['', ' begincase'], 'test begincase', 'rsplit', 'test')
480 self.checkequal(['endcase ', ''], 'endcase test', 'rsplit', 'test')
481 self.checkequal(['', ' bothcase ', ''], 'test bothcase test',
482 'rsplit', 'test')
483 self.checkequal(['ab', 'c'], 'abbbc', 'rsplit', 'bb')
484 self.checkequal(['', ''], 'aaa', 'rsplit', 'aaa')
485 self.checkequal(['aaa'], 'aaa', 'rsplit', 'aaa', 0)
486 self.checkequal(['ab', 'ab'], 'abbaab', 'rsplit', 'ba')
487 self.checkequal(['aaaa'], 'aaaa', 'rsplit', 'aab')
488 self.checkequal([''], '', 'rsplit', 'aaa')
489 self.checkequal(['aa'], 'aa', 'rsplit', 'aaa')
490 self.checkequal(['bbob', 'A'], 'bbobbbobbA', 'rsplit', 'bbobb')
491 self.checkequal(['', 'B', 'A'], 'bbobbBbbobbA', 'rsplit', 'bbobb')
493 self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH')
494 self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH', 19)
495 self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4],
496 'rsplit', 'BLAH', 18)
498 # mixed use of str and unicode
499 self.checkequal([u'a b', u'c', u'd'], 'a b c d', 'rsplit', u' ', 2)
501 # argument type
502 self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42)
504 # null case
505 self.checkraises(ValueError, 'hello', 'rsplit', '')
506 self.checkraises(ValueError, 'hello', 'rsplit', '', 0)
508 def test_strip(self):
509 self.checkequal('hello', ' hello ', 'strip')
510 self.checkequal('hello ', ' hello ', 'lstrip')
511 self.checkequal(' hello', ' hello ', 'rstrip')
512 self.checkequal('hello', 'hello', 'strip')
514 # strip/lstrip/rstrip with None arg
515 self.checkequal('hello', ' hello ', 'strip', None)
516 self.checkequal('hello ', ' hello ', 'lstrip', None)
517 self.checkequal(' hello', ' hello ', 'rstrip', None)
518 self.checkequal('hello', 'hello', 'strip', None)
520 # strip/lstrip/rstrip with str arg
521 self.checkequal('hello', 'xyzzyhelloxyzzy', 'strip', 'xyz')
522 self.checkequal('helloxyzzy', 'xyzzyhelloxyzzy', 'lstrip', 'xyz')
523 self.checkequal('xyzzyhello', 'xyzzyhelloxyzzy', 'rstrip', 'xyz')
524 self.checkequal('hello', 'hello', 'strip', 'xyz')
526 # strip/lstrip/rstrip with unicode arg
527 if test_support.have_unicode:
528 self.checkequal(unicode('hello', 'ascii'), 'xyzzyhelloxyzzy',
529 'strip', unicode('xyz', 'ascii'))
530 self.checkequal(unicode('helloxyzzy', 'ascii'), 'xyzzyhelloxyzzy',
531 'lstrip', unicode('xyz', 'ascii'))
532 self.checkequal(unicode('xyzzyhello', 'ascii'), 'xyzzyhelloxyzzy',
533 'rstrip', unicode('xyz', 'ascii'))
534 # XXX
535 #self.checkequal(unicode('hello', 'ascii'), 'hello',
536 # 'strip', unicode('xyz', 'ascii'))
538 self.checkraises(TypeError, 'hello', 'strip', 42, 42)
539 self.checkraises(TypeError, 'hello', 'lstrip', 42, 42)
540 self.checkraises(TypeError, 'hello', 'rstrip', 42, 42)
542 def test_ljust(self):
543 self.checkequal('abc ', 'abc', 'ljust', 10)
544 self.checkequal('abc ', 'abc', 'ljust', 6)
545 self.checkequal('abc', 'abc', 'ljust', 3)
546 self.checkequal('abc', 'abc', 'ljust', 2)
547 self.checkequal('abc*******', 'abc', 'ljust', 10, '*')
548 self.checkraises(TypeError, 'abc', 'ljust')
550 def test_rjust(self):
551 self.checkequal(' abc', 'abc', 'rjust', 10)
552 self.checkequal(' abc', 'abc', 'rjust', 6)
553 self.checkequal('abc', 'abc', 'rjust', 3)
554 self.checkequal('abc', 'abc', 'rjust', 2)
555 self.checkequal('*******abc', 'abc', 'rjust', 10, '*')
556 self.checkraises(TypeError, 'abc', 'rjust')
558 def test_center(self):
559 self.checkequal(' abc ', 'abc', 'center', 10)
560 self.checkequal(' abc ', 'abc', 'center', 6)
561 self.checkequal('abc', 'abc', 'center', 3)
562 self.checkequal('abc', 'abc', 'center', 2)
563 self.checkequal('***abc****', 'abc', 'center', 10, '*')
564 self.checkraises(TypeError, 'abc', 'center')
566 def test_swapcase(self):
567 self.checkequal('hEllO CoMPuTErS', 'HeLLo cOmpUteRs', 'swapcase')
569 self.checkraises(TypeError, 'hello', 'swapcase', 42)
571 def test_replace(self):
572 EQ = self.checkequal
574 # Operations on the empty string
575 EQ("", "", "replace", "", "")
576 EQ("A", "", "replace", "", "A")
577 EQ("", "", "replace", "A", "")
578 EQ("", "", "replace", "A", "A")
579 EQ("", "", "replace", "", "", 100)
580 EQ("", "", "replace", "", "", sys.maxint)
582 # interleave (from=="", 'to' gets inserted everywhere)
583 EQ("A", "A", "replace", "", "")
584 EQ("*A*", "A", "replace", "", "*")
585 EQ("*1A*1", "A", "replace", "", "*1")
586 EQ("*-#A*-#", "A", "replace", "", "*-#")
587 EQ("*-A*-A*-", "AA", "replace", "", "*-")
588 EQ("*-A*-A*-", "AA", "replace", "", "*-", -1)
589 EQ("*-A*-A*-", "AA", "replace", "", "*-", sys.maxint)
590 EQ("*-A*-A*-", "AA", "replace", "", "*-", 4)
591 EQ("*-A*-A*-", "AA", "replace", "", "*-", 3)
592 EQ("*-A*-A", "AA", "replace", "", "*-", 2)
593 EQ("*-AA", "AA", "replace", "", "*-", 1)
594 EQ("AA", "AA", "replace", "", "*-", 0)
596 # single character deletion (from=="A", to=="")
597 EQ("", "A", "replace", "A", "")
598 EQ("", "AAA", "replace", "A", "")
599 EQ("", "AAA", "replace", "A", "", -1)
600 EQ("", "AAA", "replace", "A", "", sys.maxint)
601 EQ("", "AAA", "replace", "A", "", 4)
602 EQ("", "AAA", "replace", "A", "", 3)
603 EQ("A", "AAA", "replace", "A", "", 2)
604 EQ("AA", "AAA", "replace", "A", "", 1)
605 EQ("AAA", "AAA", "replace", "A", "", 0)
606 EQ("", "AAAAAAAAAA", "replace", "A", "")
607 EQ("BCD", "ABACADA", "replace", "A", "")
608 EQ("BCD", "ABACADA", "replace", "A", "", -1)
609 EQ("BCD", "ABACADA", "replace", "A", "", sys.maxint)
610 EQ("BCD", "ABACADA", "replace", "A", "", 5)
611 EQ("BCD", "ABACADA", "replace", "A", "", 4)
612 EQ("BCDA", "ABACADA", "replace", "A", "", 3)
613 EQ("BCADA", "ABACADA", "replace", "A", "", 2)
614 EQ("BACADA", "ABACADA", "replace", "A", "", 1)
615 EQ("ABACADA", "ABACADA", "replace", "A", "", 0)
616 EQ("BCD", "ABCAD", "replace", "A", "")
617 EQ("BCD", "ABCADAA", "replace", "A", "")
618 EQ("BCD", "BCD", "replace", "A", "")
619 EQ("*************", "*************", "replace", "A", "")
620 EQ("^A^", "^"+"A"*1000+"^", "replace", "A", "", 999)
622 # substring deletion (from=="the", to=="")
623 EQ("", "the", "replace", "the", "")
624 EQ("ater", "theater", "replace", "the", "")
625 EQ("", "thethe", "replace", "the", "")
626 EQ("", "thethethethe", "replace", "the", "")
627 EQ("aaaa", "theatheatheathea", "replace", "the", "")
628 EQ("that", "that", "replace", "the", "")
629 EQ("thaet", "thaet", "replace", "the", "")
630 EQ("here and re", "here and there", "replace", "the", "")
631 EQ("here and re and re", "here and there and there",
632 "replace", "the", "", sys.maxint)
633 EQ("here and re and re", "here and there and there",
634 "replace", "the", "", -1)
635 EQ("here and re and re", "here and there and there",
636 "replace", "the", "", 3)
637 EQ("here and re and re", "here and there and there",
638 "replace", "the", "", 2)
639 EQ("here and re and there", "here and there and there",
640 "replace", "the", "", 1)
641 EQ("here and there and there", "here and there and there",
642 "replace", "the", "", 0)
643 EQ("here and re and re", "here and there and there", "replace", "the", "")
645 EQ("abc", "abc", "replace", "the", "")
646 EQ("abcdefg", "abcdefg", "replace", "the", "")
648 # substring deletion (from=="bob", to=="")
649 EQ("bob", "bbobob", "replace", "bob", "")
650 EQ("bobXbob", "bbobobXbbobob", "replace", "bob", "")
651 EQ("aaaaaaa", "aaaaaaabob", "replace", "bob", "")
652 EQ("aaaaaaa", "aaaaaaa", "replace", "bob", "")
654 # single character replace in place (len(from)==len(to)==1)
655 EQ("Who goes there?", "Who goes there?", "replace", "o", "o")
656 EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O")
657 EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", sys.maxint)
658 EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", -1)
659 EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", 3)
660 EQ("WhO gOes there?", "Who goes there?", "replace", "o", "O", 2)
661 EQ("WhO goes there?", "Who goes there?", "replace", "o", "O", 1)
662 EQ("Who goes there?", "Who goes there?", "replace", "o", "O", 0)
664 EQ("Who goes there?", "Who goes there?", "replace", "a", "q")
665 EQ("who goes there?", "Who goes there?", "replace", "W", "w")
666 EQ("wwho goes there?ww", "WWho goes there?WW", "replace", "W", "w")
667 EQ("Who goes there!", "Who goes there?", "replace", "?", "!")
668 EQ("Who goes there!!", "Who goes there??", "replace", "?", "!")
670 EQ("Who goes there?", "Who goes there?", "replace", ".", "!")
672 # substring replace in place (len(from)==len(to) > 1)
673 EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**")
674 EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", sys.maxint)
675 EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", -1)
676 EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", 4)
677 EQ("Th** ** a t**sue", "This is a tissue", "replace", "is", "**", 3)
678 EQ("Th** ** a tissue", "This is a tissue", "replace", "is", "**", 2)
679 EQ("Th** is a tissue", "This is a tissue", "replace", "is", "**", 1)
680 EQ("This is a tissue", "This is a tissue", "replace", "is", "**", 0)
681 EQ("cobob", "bobob", "replace", "bob", "cob")
682 EQ("cobobXcobocob", "bobobXbobobob", "replace", "bob", "cob")
683 EQ("bobob", "bobob", "replace", "bot", "bot")
685 # replace single character (len(from)==1, len(to)>1)
686 EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK")
687 EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", -1)
688 EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", sys.maxint)
689 EQ("ReyKKjaviKK", "Reykjavik", "replace", "k", "KK", 2)
690 EQ("ReyKKjavik", "Reykjavik", "replace", "k", "KK", 1)
691 EQ("Reykjavik", "Reykjavik", "replace", "k", "KK", 0)
692 EQ("A----B----C----", "A.B.C.", "replace", ".", "----")
694 EQ("Reykjavik", "Reykjavik", "replace", "q", "KK")
696 # replace substring (len(from)>1, len(to)!=len(from))
697 EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
698 "replace", "spam", "ham")
699 EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
700 "replace", "spam", "ham", sys.maxint)
701 EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
702 "replace", "spam", "ham", -1)
703 EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
704 "replace", "spam", "ham", 4)
705 EQ("ham, ham, eggs and ham", "spam, spam, eggs and spam",
706 "replace", "spam", "ham", 3)
707 EQ("ham, ham, eggs and spam", "spam, spam, eggs and spam",
708 "replace", "spam", "ham", 2)
709 EQ("ham, spam, eggs and spam", "spam, spam, eggs and spam",
710 "replace", "spam", "ham", 1)
711 EQ("spam, spam, eggs and spam", "spam, spam, eggs and spam",
712 "replace", "spam", "ham", 0)
714 EQ("bobob", "bobobob", "replace", "bobob", "bob")
715 EQ("bobobXbobob", "bobobobXbobobob", "replace", "bobob", "bob")
716 EQ("BOBOBOB", "BOBOBOB", "replace", "bob", "bobby")
718 with test_support.check_py3k_warnings():
719 ba = buffer('a')
720 bb = buffer('b')
721 EQ("bbc", "abc", "replace", ba, bb)
722 EQ("aac", "abc", "replace", bb, ba)
725 self.checkequal('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
726 self.checkequal('onetwothree', 'one!two!three!', 'replace', '!', '')
727 self.checkequal('one@two@three!', 'one!two!three!', 'replace', '!', '@', 2)
728 self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@', 3)
729 self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@', 4)
730 self.checkequal('one!two!three!', 'one!two!three!', 'replace', '!', '@', 0)
731 self.checkequal('one@two@three@', 'one!two!three!', 'replace', '!', '@')
732 self.checkequal('one!two!three!', 'one!two!three!', 'replace', 'x', '@')
733 self.checkequal('one!two!three!', 'one!two!three!', 'replace', 'x', '@', 2)
734 self.checkequal('-a-b-c-', 'abc', 'replace', '', '-')
735 self.checkequal('-a-b-c', 'abc', 'replace', '', '-', 3)
736 self.checkequal('abc', 'abc', 'replace', '', '-', 0)
737 self.checkequal('', '', 'replace', '', '')
738 self.checkequal('abc', 'abc', 'replace', 'ab', '--', 0)
739 self.checkequal('abc', 'abc', 'replace', 'xy', '--')
740 # Next three for SF bug 422088: [OSF1 alpha] string.replace(); died with
741 # MemoryError due to empty result (platform malloc issue when requesting
742 # 0 bytes).
743 self.checkequal('', '123', 'replace', '123', '')
744 self.checkequal('', '123123', 'replace', '123', '')
745 self.checkequal('x', '123x123', 'replace', '123', '')
747 self.checkraises(TypeError, 'hello', 'replace')
748 self.checkraises(TypeError, 'hello', 'replace', 42)
749 self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
750 self.checkraises(TypeError, 'hello', 'replace', 'h', 42)
752 def test_replace_overflow(self):
753 # Check for overflow checking on 32 bit machines
754 if sys.maxint != 2147483647 or struct.calcsize("P") > 4:
755 return
756 A2_16 = "A" * (2**16)
757 self.checkraises(OverflowError, A2_16, "replace", "", A2_16)
758 self.checkraises(OverflowError, A2_16, "replace", "A", A2_16)
759 self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16)
761 def test_zfill(self):
762 self.checkequal('123', '123', 'zfill', 2)
763 self.checkequal('123', '123', 'zfill', 3)
764 self.checkequal('0123', '123', 'zfill', 4)
765 self.checkequal('+123', '+123', 'zfill', 3)
766 self.checkequal('+123', '+123', 'zfill', 4)
767 self.checkequal('+0123', '+123', 'zfill', 5)
768 self.checkequal('-123', '-123', 'zfill', 3)
769 self.checkequal('-123', '-123', 'zfill', 4)
770 self.checkequal('-0123', '-123', 'zfill', 5)
771 self.checkequal('000', '', 'zfill', 3)
772 self.checkequal('34', '34', 'zfill', 1)
773 self.checkequal('0034', '34', 'zfill', 4)
775 self.checkraises(TypeError, '123', 'zfill')
777 # XXX alias for py3k forward compatibility
778 BaseTest = CommonTest
780 class MixinStrUnicodeUserStringTest:
781 # additional tests that only work for
782 # stringlike objects, i.e. str, unicode, UserString
783 # (but not the string module)
785 def test_islower(self):
786 self.checkequal(False, '', 'islower')
787 self.checkequal(True, 'a', 'islower')
788 self.checkequal(False, 'A', 'islower')
789 self.checkequal(False, '\n', 'islower')
790 self.checkequal(True, 'abc', 'islower')
791 self.checkequal(False, 'aBc', 'islower')
792 self.checkequal(True, 'abc\n', 'islower')
793 self.checkraises(TypeError, 'abc', 'islower', 42)
795 def test_isupper(self):
796 self.checkequal(False, '', 'isupper')
797 self.checkequal(False, 'a', 'isupper')
798 self.checkequal(True, 'A', 'isupper')
799 self.checkequal(False, '\n', 'isupper')
800 self.checkequal(True, 'ABC', 'isupper')
801 self.checkequal(False, 'AbC', 'isupper')
802 self.checkequal(True, 'ABC\n', 'isupper')
803 self.checkraises(TypeError, 'abc', 'isupper', 42)
805 def test_istitle(self):
806 self.checkequal(False, '', 'istitle')
807 self.checkequal(False, 'a', 'istitle')
808 self.checkequal(True, 'A', 'istitle')
809 self.checkequal(False, '\n', 'istitle')
810 self.checkequal(True, 'A Titlecased Line', 'istitle')
811 self.checkequal(True, 'A\nTitlecased Line', 'istitle')
812 self.checkequal(True, 'A Titlecased, Line', 'istitle')
813 self.checkequal(False, 'Not a capitalized String', 'istitle')
814 self.checkequal(False, 'Not\ta Titlecase String', 'istitle')
815 self.checkequal(False, 'Not--a Titlecase String', 'istitle')
816 self.checkequal(False, 'NOT', 'istitle')
817 self.checkraises(TypeError, 'abc', 'istitle', 42)
819 def test_isspace(self):
820 self.checkequal(False, '', 'isspace')
821 self.checkequal(False, 'a', 'isspace')
822 self.checkequal(True, ' ', 'isspace')
823 self.checkequal(True, '\t', 'isspace')
824 self.checkequal(True, '\r', 'isspace')
825 self.checkequal(True, '\n', 'isspace')
826 self.checkequal(True, ' \t\r\n', 'isspace')
827 self.checkequal(False, ' \t\r\na', 'isspace')
828 self.checkraises(TypeError, 'abc', 'isspace', 42)
830 def test_isalpha(self):
831 self.checkequal(False, '', 'isalpha')
832 self.checkequal(True, 'a', 'isalpha')
833 self.checkequal(True, 'A', 'isalpha')
834 self.checkequal(False, '\n', 'isalpha')
835 self.checkequal(True, 'abc', 'isalpha')
836 self.checkequal(False, 'aBc123', 'isalpha')
837 self.checkequal(False, 'abc\n', 'isalpha')
838 self.checkraises(TypeError, 'abc', 'isalpha', 42)
840 def test_isalnum(self):
841 self.checkequal(False, '', 'isalnum')
842 self.checkequal(True, 'a', 'isalnum')
843 self.checkequal(True, 'A', 'isalnum')
844 self.checkequal(False, '\n', 'isalnum')
845 self.checkequal(True, '123abc456', 'isalnum')
846 self.checkequal(True, 'a1b3c', 'isalnum')
847 self.checkequal(False, 'aBc000 ', 'isalnum')
848 self.checkequal(False, 'abc\n', 'isalnum')
849 self.checkraises(TypeError, 'abc', 'isalnum', 42)
851 def test_isdigit(self):
852 self.checkequal(False, '', 'isdigit')
853 self.checkequal(False, 'a', 'isdigit')
854 self.checkequal(True, '0', 'isdigit')
855 self.checkequal(True, '0123456789', 'isdigit')
856 self.checkequal(False, '0123456789a', 'isdigit')
858 self.checkraises(TypeError, 'abc', 'isdigit', 42)
860 def test_title(self):
861 self.checkequal(' Hello ', ' hello ', 'title')
862 self.checkequal('Hello ', 'hello ', 'title')
863 self.checkequal('Hello ', 'Hello ', 'title')
864 self.checkequal('Format This As Title String', "fOrMaT thIs aS titLe String", 'title')
865 self.checkequal('Format,This-As*Title;String', "fOrMaT,thIs-aS*titLe;String", 'title', )
866 self.checkequal('Getint', "getInt", 'title')
867 self.checkraises(TypeError, 'hello', 'title', 42)
869 def test_splitlines(self):
870 self.checkequal(['abc', 'def', '', 'ghi'], "abc\ndef\n\rghi", 'splitlines')
871 self.checkequal(['abc', 'def', '', 'ghi'], "abc\ndef\n\r\nghi", 'splitlines')
872 self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi", 'splitlines')
873 self.checkequal(['abc', 'def', 'ghi'], "abc\ndef\r\nghi\n", 'splitlines')
874 self.checkequal(['abc', 'def', 'ghi', ''], "abc\ndef\r\nghi\n\r", 'splitlines')
875 self.checkequal(['', 'abc', 'def', 'ghi', ''], "\nabc\ndef\r\nghi\n\r", 'splitlines')
876 self.checkequal(['\n', 'abc\n', 'def\r\n', 'ghi\n', '\r'], "\nabc\ndef\r\nghi\n\r", 'splitlines', 1)
878 self.checkraises(TypeError, 'abc', 'splitlines', 42, 42)
880 def test_startswith(self):
881 self.checkequal(True, 'hello', 'startswith', 'he')
882 self.checkequal(True, 'hello', 'startswith', 'hello')
883 self.checkequal(False, 'hello', 'startswith', 'hello world')
884 self.checkequal(True, 'hello', 'startswith', '')
885 self.checkequal(False, 'hello', 'startswith', 'ello')
886 self.checkequal(True, 'hello', 'startswith', 'ello', 1)
887 self.checkequal(True, 'hello', 'startswith', 'o', 4)
888 self.checkequal(False, 'hello', 'startswith', 'o', 5)
889 self.checkequal(True, 'hello', 'startswith', '', 5)
890 self.checkequal(False, 'hello', 'startswith', 'lo', 6)
891 self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3)
892 self.checkequal(True, 'helloworld', 'startswith', 'lowo', 3, 7)
893 self.checkequal(False, 'helloworld', 'startswith', 'lowo', 3, 6)
895 # test negative indices
896 self.checkequal(True, 'hello', 'startswith', 'he', 0, -1)
897 self.checkequal(True, 'hello', 'startswith', 'he', -53, -1)
898 self.checkequal(False, 'hello', 'startswith', 'hello', 0, -1)
899 self.checkequal(False, 'hello', 'startswith', 'hello world', -1, -10)
900 self.checkequal(False, 'hello', 'startswith', 'ello', -5)
901 self.checkequal(True, 'hello', 'startswith', 'ello', -4)
902 self.checkequal(False, 'hello', 'startswith', 'o', -2)
903 self.checkequal(True, 'hello', 'startswith', 'o', -1)
904 self.checkequal(True, 'hello', 'startswith', '', -3, -3)
905 self.checkequal(False, 'hello', 'startswith', 'lo', -9)
907 self.checkraises(TypeError, 'hello', 'startswith')
908 self.checkraises(TypeError, 'hello', 'startswith', 42)
910 # test tuple arguments
911 self.checkequal(True, 'hello', 'startswith', ('he', 'ha'))
912 self.checkequal(False, 'hello', 'startswith', ('lo', 'llo'))
913 self.checkequal(True, 'hello', 'startswith', ('hellox', 'hello'))
914 self.checkequal(False, 'hello', 'startswith', ())
915 self.checkequal(True, 'helloworld', 'startswith', ('hellowo',
916 'rld', 'lowo'), 3)
917 self.checkequal(False, 'helloworld', 'startswith', ('hellowo', 'ello',
918 'rld'), 3)
919 self.checkequal(True, 'hello', 'startswith', ('lo', 'he'), 0, -1)
920 self.checkequal(False, 'hello', 'startswith', ('he', 'hel'), 0, 1)
921 self.checkequal(True, 'hello', 'startswith', ('he', 'hel'), 0, 2)
923 self.checkraises(TypeError, 'hello', 'startswith', (42,))
925 def test_endswith(self):
926 self.checkequal(True, 'hello', 'endswith', 'lo')
927 self.checkequal(False, 'hello', 'endswith', 'he')
928 self.checkequal(True, 'hello', 'endswith', '')
929 self.checkequal(False, 'hello', 'endswith', 'hello world')
930 self.checkequal(False, 'helloworld', 'endswith', 'worl')
931 self.checkequal(True, 'helloworld', 'endswith', 'worl', 3, 9)
932 self.checkequal(True, 'helloworld', 'endswith', 'world', 3, 12)
933 self.checkequal(True, 'helloworld', 'endswith', 'lowo', 1, 7)
934 self.checkequal(True, 'helloworld', 'endswith', 'lowo', 2, 7)
935 self.checkequal(True, 'helloworld', 'endswith', 'lowo', 3, 7)
936 self.checkequal(False, 'helloworld', 'endswith', 'lowo', 4, 7)
937 self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, 8)
938 self.checkequal(False, 'ab', 'endswith', 'ab', 0, 1)
939 self.checkequal(False, 'ab', 'endswith', 'ab', 0, 0)
941 # test negative indices
942 self.checkequal(True, 'hello', 'endswith', 'lo', -2)
943 self.checkequal(False, 'hello', 'endswith', 'he', -2)
944 self.checkequal(True, 'hello', 'endswith', '', -3, -3)
945 self.checkequal(False, 'hello', 'endswith', 'hello world', -10, -2)
946 self.checkequal(False, 'helloworld', 'endswith', 'worl', -6)
947 self.checkequal(True, 'helloworld', 'endswith', 'worl', -5, -1)
948 self.checkequal(True, 'helloworld', 'endswith', 'worl', -5, 9)
949 self.checkequal(True, 'helloworld', 'endswith', 'world', -7, 12)
950 self.checkequal(True, 'helloworld', 'endswith', 'lowo', -99, -3)
951 self.checkequal(True, 'helloworld', 'endswith', 'lowo', -8, -3)
952 self.checkequal(True, 'helloworld', 'endswith', 'lowo', -7, -3)
953 self.checkequal(False, 'helloworld', 'endswith', 'lowo', 3, -4)
954 self.checkequal(False, 'helloworld', 'endswith', 'lowo', -8, -2)
956 self.checkraises(TypeError, 'hello', 'endswith')
957 self.checkraises(TypeError, 'hello', 'endswith', 42)
959 # test tuple arguments
960 self.checkequal(False, 'hello', 'endswith', ('he', 'ha'))
961 self.checkequal(True, 'hello', 'endswith', ('lo', 'llo'))
962 self.checkequal(True, 'hello', 'endswith', ('hellox', 'hello'))
963 self.checkequal(False, 'hello', 'endswith', ())
964 self.checkequal(True, 'helloworld', 'endswith', ('hellowo',
965 'rld', 'lowo'), 3)
966 self.checkequal(False, 'helloworld', 'endswith', ('hellowo', 'ello',
967 'rld'), 3, -1)
968 self.checkequal(True, 'hello', 'endswith', ('hell', 'ell'), 0, -1)
969 self.checkequal(False, 'hello', 'endswith', ('he', 'hel'), 0, 1)
970 self.checkequal(True, 'hello', 'endswith', ('he', 'hell'), 0, 4)
972 self.checkraises(TypeError, 'hello', 'endswith', (42,))
974 def test___contains__(self):
975 self.checkequal(True, '', '__contains__', '')
976 self.checkequal(True, 'abc', '__contains__', '')
977 self.checkequal(False, 'abc', '__contains__', '\0')
978 self.checkequal(True, '\0abc', '__contains__', '\0')
979 self.checkequal(True, 'abc\0', '__contains__', '\0')
980 self.checkequal(True, '\0abc', '__contains__', 'a')
981 self.checkequal(True, 'asdf', '__contains__', 'asdf')
982 self.checkequal(False, 'asd', '__contains__', 'asdf')
983 self.checkequal(False, '', '__contains__', 'asdf')
985 def test_subscript(self):
986 self.checkequal(u'a', 'abc', '__getitem__', 0)
987 self.checkequal(u'c', 'abc', '__getitem__', -1)
988 self.checkequal(u'a', 'abc', '__getitem__', 0L)
989 self.checkequal(u'abc', 'abc', '__getitem__', slice(0, 3))
990 self.checkequal(u'abc', 'abc', '__getitem__', slice(0, 1000))
991 self.checkequal(u'a', 'abc', '__getitem__', slice(0, 1))
992 self.checkequal(u'', 'abc', '__getitem__', slice(0, 0))
994 self.checkraises(TypeError, 'abc', '__getitem__', 'def')
996 def test_slice(self):
997 self.checkequal('abc', 'abc', '__getslice__', 0, 1000)
998 self.checkequal('abc', 'abc', '__getslice__', 0, 3)
999 self.checkequal('ab', 'abc', '__getslice__', 0, 2)
1000 self.checkequal('bc', 'abc', '__getslice__', 1, 3)
1001 self.checkequal('b', 'abc', '__getslice__', 1, 2)
1002 self.checkequal('', 'abc', '__getslice__', 2, 2)
1003 self.checkequal('', 'abc', '__getslice__', 1000, 1000)
1004 self.checkequal('', 'abc', '__getslice__', 2000, 1000)
1005 self.checkequal('', 'abc', '__getslice__', 2, 1)
1007 self.checkraises(TypeError, 'abc', '__getslice__', 'def')
1009 def test_extended_getslice(self):
1010 # Test extended slicing by comparing with list slicing.
1011 s = string.ascii_letters + string.digits
1012 indices = (0, None, 1, 3, 41, -1, -2, -37)
1013 for start in indices:
1014 for stop in indices:
1015 # Skip step 0 (invalid)
1016 for step in indices[1:]:
1017 L = list(s)[start:stop:step]
1018 self.checkequal(u"".join(L), s, '__getitem__',
1019 slice(start, stop, step))
1021 def test_mul(self):
1022 self.checkequal('', 'abc', '__mul__', -1)
1023 self.checkequal('', 'abc', '__mul__', 0)
1024 self.checkequal('abc', 'abc', '__mul__', 1)
1025 self.checkequal('abcabcabc', 'abc', '__mul__', 3)
1026 self.checkraises(TypeError, 'abc', '__mul__')
1027 self.checkraises(TypeError, 'abc', '__mul__', '')
1028 # XXX: on a 64-bit system, this doesn't raise an overflow error,
1029 # but either raises a MemoryError, or succeeds (if you have 54TiB)
1030 #self.checkraises(OverflowError, 10000*'abc', '__mul__', 2000000000)
1032 def test_join(self):
1033 # join now works with any sequence type
1034 # moved here, because the argument order is
1035 # different in string.join (see the test in
1036 # test.test_string.StringTest.test_join)
1037 self.checkequal('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
1038 self.checkequal('abcd', '', 'join', ('a', 'b', 'c', 'd'))
1039 self.checkequal('bd', '', 'join', ('', 'b', '', 'd'))
1040 self.checkequal('ac', '', 'join', ('a', '', 'c', ''))
1041 self.checkequal('w x y z', ' ', 'join', Sequence())
1042 self.checkequal('abc', 'a', 'join', ('abc',))
1043 self.checkequal('z', 'a', 'join', UserList(['z']))
1044 if test_support.have_unicode:
1045 self.checkequal(unicode('a.b.c'), unicode('.'), 'join', ['a', 'b', 'c'])
1046 self.checkequal(unicode('a.b.c'), '.', 'join', [unicode('a'), 'b', 'c'])
1047 self.checkequal(unicode('a.b.c'), '.', 'join', ['a', unicode('b'), 'c'])
1048 self.checkequal(unicode('a.b.c'), '.', 'join', ['a', 'b', unicode('c')])
1049 self.checkraises(TypeError, '.', 'join', ['a', unicode('b'), 3])
1050 for i in [5, 25, 125]:
1051 self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
1052 ['a' * i] * i)
1053 self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
1054 ('a' * i,) * i)
1056 self.checkraises(TypeError, ' ', 'join', BadSeq1())
1057 self.checkequal('a b c', ' ', 'join', BadSeq2())
1059 self.checkraises(TypeError, ' ', 'join')
1060 self.checkraises(TypeError, ' ', 'join', 7)
1061 self.checkraises(TypeError, ' ', 'join', Sequence([7, 'hello', 123L]))
1062 try:
1063 def f():
1064 yield 4 + ""
1065 self.fixtype(' ').join(f())
1066 except TypeError, e:
1067 if '+' not in str(e):
1068 self.fail('join() ate exception message')
1069 else:
1070 self.fail('exception not raised')
1072 def test_formatting(self):
1073 self.checkequal('+hello+', '+%s+', '__mod__', 'hello')
1074 self.checkequal('+10+', '+%d+', '__mod__', 10)
1075 self.checkequal('a', "%c", '__mod__', "a")
1076 self.checkequal('a', "%c", '__mod__', "a")
1077 self.checkequal('"', "%c", '__mod__', 34)
1078 self.checkequal('$', "%c", '__mod__', 36)
1079 self.checkequal('10', "%d", '__mod__', 10)
1080 self.checkequal('\x7f', "%c", '__mod__', 0x7f)
1082 for ordinal in (-100, 0x200000):
1083 # unicode raises ValueError, str raises OverflowError
1084 self.checkraises((ValueError, OverflowError), '%c', '__mod__', ordinal)
1086 longvalue = sys.maxint + 10L
1087 slongvalue = str(longvalue)
1088 if slongvalue[-1] in ("L","l"): slongvalue = slongvalue[:-1]
1089 self.checkequal(' 42', '%3ld', '__mod__', 42)
1090 self.checkequal('42', '%d', '__mod__', 42L)
1091 self.checkequal('42', '%d', '__mod__', 42.0)
1092 self.checkequal(slongvalue, '%d', '__mod__', longvalue)
1093 self.checkcall('%d', '__mod__', float(longvalue))
1094 self.checkequal('0042.00', '%07.2f', '__mod__', 42)
1095 self.checkequal('0042.00', '%07.2F', '__mod__', 42)
1097 self.checkraises(TypeError, 'abc', '__mod__')
1098 self.checkraises(TypeError, '%(foo)s', '__mod__', 42)
1099 self.checkraises(TypeError, '%s%s', '__mod__', (42,))
1100 self.checkraises(TypeError, '%c', '__mod__', (None,))
1101 self.checkraises(ValueError, '%(foo', '__mod__', {})
1102 self.checkraises(TypeError, '%(foo)s %(bar)s', '__mod__', ('foo', 42))
1103 self.checkraises(TypeError, '%d', '__mod__', "42") # not numeric
1104 self.checkraises(TypeError, '%d', '__mod__', (42+0j)) # no int/long conversion provided
1106 # argument names with properly nested brackets are supported
1107 self.checkequal('bar', '%((foo))s', '__mod__', {'(foo)': 'bar'})
1109 # 100 is a magic number in PyUnicode_Format, this forces a resize
1110 self.checkequal(103*'a'+'x', '%sx', '__mod__', 103*'a')
1112 self.checkraises(TypeError, '%*s', '__mod__', ('foo', 'bar'))
1113 self.checkraises(TypeError, '%10.*f', '__mod__', ('foo', 42.))
1114 self.checkraises(ValueError, '%10', '__mod__', (42,))
1116 def test_floatformatting(self):
1117 # float formatting
1118 for prec in xrange(100):
1119 format = '%%.%if' % prec
1120 value = 0.01
1121 for x in xrange(60):
1122 value = value * 3.141592655 / 3.0 * 10.0
1123 self.checkcall(format, "__mod__", value)
1125 def test_inplace_rewrites(self):
1126 # Check that strings don't copy and modify cached single-character strings
1127 self.checkequal('a', 'A', 'lower')
1128 self.checkequal(True, 'A', 'isupper')
1129 self.checkequal('A', 'a', 'upper')
1130 self.checkequal(True, 'a', 'islower')
1132 self.checkequal('a', 'A', 'replace', 'A', 'a')
1133 self.checkequal(True, 'A', 'isupper')
1135 self.checkequal('A', 'a', 'capitalize')
1136 self.checkequal(True, 'a', 'islower')
1138 self.checkequal('A', 'a', 'swapcase')
1139 self.checkequal(True, 'a', 'islower')
1141 self.checkequal('A', 'a', 'title')
1142 self.checkequal(True, 'a', 'islower')
1144 def test_partition(self):
1146 self.checkequal(('this is the par', 'ti', 'tion method'),
1147 'this is the partition method', 'partition', 'ti')
1149 # from raymond's original specification
1150 S = 'http://www.python.org'
1151 self.checkequal(('http', '://', 'www.python.org'), S, 'partition', '://')
1152 self.checkequal(('http://www.python.org', '', ''), S, 'partition', '?')
1153 self.checkequal(('', 'http://', 'www.python.org'), S, 'partition', 'http://')
1154 self.checkequal(('http://www.python.', 'org', ''), S, 'partition', 'org')
1156 self.checkraises(ValueError, S, 'partition', '')
1157 self.checkraises(TypeError, S, 'partition', None)
1159 # mixed use of str and unicode
1160 self.assertEqual('a/b/c'.partition(u'/'), ('a', '/', 'b/c'))
1162 def test_rpartition(self):
1164 self.checkequal(('this is the rparti', 'ti', 'on method'),
1165 'this is the rpartition method', 'rpartition', 'ti')
1167 # from raymond's original specification
1168 S = 'http://www.python.org'
1169 self.checkequal(('http', '://', 'www.python.org'), S, 'rpartition', '://')
1170 self.checkequal(('', '', 'http://www.python.org'), S, 'rpartition', '?')
1171 self.checkequal(('', 'http://', 'www.python.org'), S, 'rpartition', 'http://')
1172 self.checkequal(('http://www.python.', 'org', ''), S, 'rpartition', 'org')
1174 self.checkraises(ValueError, S, 'rpartition', '')
1175 self.checkraises(TypeError, S, 'rpartition', None)
1177 # mixed use of str and unicode
1178 self.assertEqual('a/b/c'.rpartition(u'/'), ('a/b', '/', 'c'))
1180 class MixinStrStringUserStringTest:
1181 # Additional tests for 8bit strings, i.e. str, UserString and
1182 # the string module
1184 def test_maketrans(self):
1185 self.assertEqual(
1186 ''.join(map(chr, xrange(256))).replace('abc', 'xyz'),
1187 string.maketrans('abc', 'xyz')
1189 self.assertRaises(ValueError, string.maketrans, 'abc', 'xyzw')
1191 def test_translate(self):
1192 table = string.maketrans('abc', 'xyz')
1193 self.checkequal('xyzxyz', 'xyzabcdef', 'translate', table, 'def')
1195 table = string.maketrans('a', 'A')
1196 self.checkequal('Abc', 'abc', 'translate', table)
1197 self.checkequal('xyz', 'xyz', 'translate', table)
1198 self.checkequal('yz', 'xyz', 'translate', table, 'x')
1199 self.checkequal('yx', 'zyzzx', 'translate', None, 'z')
1200 self.checkequal('zyzzx', 'zyzzx', 'translate', None, '')
1201 self.checkequal('zyzzx', 'zyzzx', 'translate', None)
1202 self.checkraises(ValueError, 'xyz', 'translate', 'too short', 'strip')
1203 self.checkraises(ValueError, 'xyz', 'translate', 'too short')
1206 class MixinStrUserStringTest:
1207 # Additional tests that only work with
1208 # 8bit compatible object, i.e. str and UserString
1210 if test_support.have_unicode:
1211 def test_encoding_decoding(self):
1212 codecs = [('rot13', 'uryyb jbeyq'),
1213 ('base64', 'aGVsbG8gd29ybGQ=\n'),
1214 ('hex', '68656c6c6f20776f726c64'),
1215 ('uu', 'begin 666 <data>\n+:&5L;&\\@=V]R;&0 \n \nend\n')]
1216 for encoding, data in codecs:
1217 self.checkequal(data, 'hello world', 'encode', encoding)
1218 self.checkequal('hello world', data, 'decode', encoding)
1219 # zlib is optional, so we make the test optional too...
1220 try:
1221 import zlib
1222 except ImportError:
1223 pass
1224 else:
1225 data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
1226 self.checkequal(data, 'hello world', 'encode', 'zlib')
1227 self.checkequal('hello world', data, 'decode', 'zlib')
1229 self.checkraises(TypeError, 'xyz', 'decode', 42)
1230 self.checkraises(TypeError, 'xyz', 'encode', 42)
1233 class MixinStrUnicodeTest:
1234 # Additional tests that only work with str and unicode.
1236 def test_bug1001011(self):
1237 # Make sure join returns a NEW object for single item sequences
1238 # involving a subclass.
1239 # Make sure that it is of the appropriate type.
1240 # Check the optimisation still occurs for standard objects.
1241 t = self.type2test
1242 class subclass(t):
1243 pass
1244 s1 = subclass("abcd")
1245 s2 = t().join([s1])
1246 self.assert_(s1 is not s2)
1247 self.assert_(type(s2) is t)
1249 s1 = t("abcd")
1250 s2 = t().join([s1])
1251 self.assert_(s1 is s2)
1253 # Should also test mixed-type join.
1254 if t is unicode:
1255 s1 = subclass("abcd")
1256 s2 = "".join([s1])
1257 self.assert_(s1 is not s2)
1258 self.assert_(type(s2) is t)
1260 s1 = t("abcd")
1261 s2 = "".join([s1])
1262 self.assert_(s1 is s2)
1264 elif t is str:
1265 s1 = subclass("abcd")
1266 s2 = u"".join([s1])
1267 self.assert_(s1 is not s2)
1268 self.assert_(type(s2) is unicode) # promotes!
1270 s1 = t("abcd")
1271 s2 = u"".join([s1])
1272 self.assert_(s1 is not s2)
1273 self.assert_(type(s2) is unicode) # promotes!
1275 else:
1276 self.fail("unexpected type for MixinStrUnicodeTest %r" % t)