Remove unnecessary use of context for long getters.
[python.git] / Lib / test / test_re.py
blobfeb71603e0a1a9a1e8b22a3e3e1c3b698de07627
1 import sys
2 sys.path = ['.'] + sys.path
4 from test.test_support import verbose, run_unittest
5 import re
6 from re import Scanner
7 import sys, os, traceback
8 from weakref import proxy
10 # Misc tests from Tim Peters' re.doc
12 # WARNING: Don't change details in these tests if you don't know
13 # what you're doing. Some of these tests were carefuly modeled to
14 # cover most of the code.
16 import unittest
18 class ReTests(unittest.TestCase):
20 def test_weakref(self):
21 s = 'QabbbcR'
22 x = re.compile('ab+c')
23 y = proxy(x)
24 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
26 def test_search_star_plus(self):
27 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
28 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
29 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
30 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
31 self.assertEqual(re.search('x', 'aaa'), None)
32 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
33 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
34 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
35 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
36 self.assertEqual(re.match('a+', 'xxx'), None)
38 def bump_num(self, matchobj):
39 int_value = int(matchobj.group(0))
40 return str(int_value + 1)
42 def test_basic_re_sub(self):
43 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
44 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
45 '9.3 -3 24x100y')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
47 '9.3 -3 23x99y')
49 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
50 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
52 s = r"\1\1"
53 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
54 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
55 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
57 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
58 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
59 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
63 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
64 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
65 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
66 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
68 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
70 def test_bug_449964(self):
71 # fails for group followed by other escape
72 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
73 'xx\bxx\b')
75 def test_bug_449000(self):
76 # Test for sub() on escaped characters
77 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
78 'abc\ndef\n')
79 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
86 def test_bug_1140(self):
87 # re.sub(x, y, u'') should return u'', not '', and
88 # re.sub(x, y, '') should return '', not u''.
89 # Also:
90 # re.sub(x, y, unicode(x)) should return unicode(y), and
91 # re.sub(x, y, str(x)) should return
92 # str(y) if isinstance(y, str) else unicode(y).
93 for x in 'x', u'x':
94 for y in 'y', u'y':
95 z = re.sub(x, y, u'')
96 self.assertEqual(z, u'')
97 self.assertEqual(type(z), unicode)
99 z = re.sub(x, y, '')
100 self.assertEqual(z, '')
101 self.assertEqual(type(z), str)
103 z = re.sub(x, y, unicode(x))
104 self.assertEqual(z, y)
105 self.assertEqual(type(z), unicode)
107 z = re.sub(x, y, str(x))
108 self.assertEqual(z, y)
109 self.assertEqual(type(z), type(y))
111 def test_bug_1661(self):
112 # Verify that flags do not get silently ignored with compiled patterns
113 pattern = re.compile('.')
114 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
115 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
116 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.compile, pattern, re.I)
119 def test_bug_3629(self):
120 # A regex that triggered a bug in the sre-code validator
121 re.compile("(?P<quote>)(?(quote))")
123 def test_sub_template_numeric_escape(self):
124 # bug 776311 and friends
125 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
126 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
127 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
128 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
129 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
130 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
131 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
133 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
134 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
136 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
137 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
138 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
139 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
140 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
142 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
143 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
145 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
146 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
147 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
154 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
156 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
158 # in python2.3 (etc), these loop endlessly in sre_parser.py
159 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
160 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
161 'xz8')
162 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
163 'xza')
165 def test_qualified_re_sub(self):
166 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
167 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
169 def test_bug_114660(self):
170 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
171 'hello there')
173 def test_bug_462270(self):
174 # Test for empty sub() behaviour, see SF bug #462270
175 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
176 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
178 def test_symbolic_refs(self):
179 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
180 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
181 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
182 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
183 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
184 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
185 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
186 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
187 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
189 def test_re_subn(self):
190 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
191 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
192 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
193 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
194 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
196 def test_re_split(self):
197 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
198 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
199 self.assertEqual(re.split("(:*)", ":a:b::c"),
200 ['', ':', 'a', ':', 'b', '::', 'c'])
201 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
202 self.assertEqual(re.split("(:)*", ":a:b::c"),
203 ['', ':', 'a', ':', 'b', ':', 'c'])
204 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
205 ['', ':', 'a', ':b::', 'c'])
206 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
207 ['', None, ':', 'a', None, ':', '', 'b', None, '',
208 None, '::', 'c'])
209 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
210 ['', 'a', '', '', 'c'])
212 def test_qualified_re_split(self):
213 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
214 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
215 self.assertEqual(re.split("(:)", ":a:b::c", 2),
216 ['', ':', 'a', ':', 'b::c'])
217 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
218 ['', ':', 'a', ':', 'b::c'])
220 def test_re_findall(self):
221 self.assertEqual(re.findall(":+", "abc"), [])
222 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
223 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
224 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
225 (":", ":"),
226 (":", "::")])
228 def test_bug_117612(self):
229 self.assertEqual(re.findall(r"(a|(b))", "aba"),
230 [("a", ""),("b", "b"),("a", "")])
232 def test_re_match(self):
233 self.assertEqual(re.match('a', 'a').groups(), ())
234 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
235 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
236 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
237 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
239 pat = re.compile('((a)|(b))(c)?')
240 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
241 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
242 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
243 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
244 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
246 # A single group
247 m = re.match('(a)', 'a')
248 self.assertEqual(m.group(0), 'a')
249 self.assertEqual(m.group(0), 'a')
250 self.assertEqual(m.group(1), 'a')
251 self.assertEqual(m.group(1, 1), ('a', 'a'))
253 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
254 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
255 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
256 (None, 'b', None))
257 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
259 def test_re_groupref_exists(self):
260 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
261 ('(', 'a'))
262 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
263 (None, 'a'))
264 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
265 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
266 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
267 ('a', 'b'))
268 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
269 (None, 'd'))
270 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
271 (None, 'd'))
272 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
273 ('a', ''))
275 # Tests for bug #1177831: exercise groups other than the first group
276 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
277 self.assertEqual(p.match('abc').groups(),
278 ('a', 'b', 'c'))
279 self.assertEqual(p.match('ad').groups(),
280 ('a', None, 'd'))
281 self.assertEqual(p.match('abd'), None)
282 self.assertEqual(p.match('ac'), None)
285 def test_re_groupref(self):
286 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
287 ('|', 'a'))
288 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
289 (None, 'a'))
290 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
291 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
292 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
293 ('a', 'a'))
294 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
295 (None, None))
297 def test_groupdict(self):
298 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
299 'first second').groupdict(),
300 {'first':'first', 'second':'second'})
302 def test_expand(self):
303 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
304 "first second")
305 .expand(r"\2 \1 \g<second> \g<first>"),
306 "second first second first")
308 def test_repeat_minmax(self):
309 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
310 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
311 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
312 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
314 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
315 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
316 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
317 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
318 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
319 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
320 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
321 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
323 self.assertEqual(re.match("^x{1}$", "xxx"), None)
324 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
325 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
326 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
328 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
329 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
330 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
331 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
332 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
333 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
334 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
335 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
337 self.assertEqual(re.match("^x{}$", "xxx"), None)
338 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
340 def test_getattr(self):
341 self.assertEqual(re.match("(a)", "a").pos, 0)
342 self.assertEqual(re.match("(a)", "a").endpos, 1)
343 self.assertEqual(re.match("(a)", "a").string, "a")
344 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
345 self.assertNotEqual(re.match("(a)", "a").re, None)
347 def test_special_escapes(self):
348 self.assertEqual(re.search(r"\b(b.)\b",
349 "abcd abc bcd bx").group(1), "bx")
350 self.assertEqual(re.search(r"\B(b.)\B",
351 "abc bcd bc abxd").group(1), "bx")
352 self.assertEqual(re.search(r"\b(b.)\b",
353 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
354 self.assertEqual(re.search(r"\B(b.)\B",
355 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
356 self.assertEqual(re.search(r"\b(b.)\b",
357 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
358 self.assertEqual(re.search(r"\B(b.)\B",
359 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
360 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
361 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
362 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
363 self.assertEqual(re.search(r"\b(b.)\b",
364 u"abcd abc bcd bx").group(1), "bx")
365 self.assertEqual(re.search(r"\B(b.)\B",
366 u"abc bcd bc abxd").group(1), "bx")
367 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
368 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
369 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
370 self.assertEqual(re.search(r"\d\D\w\W\s\S",
371 "1aa! a").group(0), "1aa! a")
372 self.assertEqual(re.search(r"\d\D\w\W\s\S",
373 "1aa! a", re.LOCALE).group(0), "1aa! a")
374 self.assertEqual(re.search(r"\d\D\w\W\s\S",
375 "1aa! a", re.UNICODE).group(0), "1aa! a")
377 def test_bigcharset(self):
378 self.assertEqual(re.match(u"([\u2222\u2223])",
379 u"\u2222").group(1), u"\u2222")
380 self.assertEqual(re.match(u"([\u2222\u2223])",
381 u"\u2222", re.UNICODE).group(1), u"\u2222")
383 def test_anyall(self):
384 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
385 "a\nb")
386 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
387 "a\n\nb")
389 def test_non_consuming(self):
390 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
391 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
392 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
393 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
394 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
395 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
396 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
398 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
399 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
400 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
401 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
403 def test_ignore_case(self):
404 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
405 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
406 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
407 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
408 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
409 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
410 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
411 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
412 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
413 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
415 def test_category(self):
416 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
418 def test_getlower(self):
419 import _sre
420 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
421 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
422 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
424 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
425 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
427 def test_not_literal(self):
428 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
429 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
431 def test_search_coverage(self):
432 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
433 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
435 def test_re_escape(self):
436 p=""
437 for i in range(0, 256):
438 p = p + chr(i)
439 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
440 True)
441 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
443 pat=re.compile(re.escape(p))
444 self.assertEqual(pat.match(p) is not None, True)
445 self.assertEqual(pat.match(p).span(), (0,256))
447 def test_pickling(self):
448 import pickle
449 self.pickle_test(pickle)
450 import cPickle
451 self.pickle_test(cPickle)
452 # old pickles expect the _compile() reconstructor in sre module
453 import warnings
454 with warnings.catch_warnings():
455 warnings.filterwarnings("ignore", "The sre module is deprecated",
456 DeprecationWarning)
457 from sre import _compile
459 def pickle_test(self, pickle):
460 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
461 s = pickle.dumps(oldpat)
462 newpat = pickle.loads(s)
463 self.assertEqual(oldpat, newpat)
465 def test_constants(self):
466 self.assertEqual(re.I, re.IGNORECASE)
467 self.assertEqual(re.L, re.LOCALE)
468 self.assertEqual(re.M, re.MULTILINE)
469 self.assertEqual(re.S, re.DOTALL)
470 self.assertEqual(re.X, re.VERBOSE)
472 def test_flags(self):
473 for flag in [re.I, re.M, re.X, re.S, re.L]:
474 self.assertNotEqual(re.compile('^pattern$', flag), None)
476 def test_sre_character_literals(self):
477 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
478 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
479 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
480 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
481 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
482 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
483 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
484 self.assertRaises(re.error, re.match, "\911", "")
486 def test_sre_character_class_literals(self):
487 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
488 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
489 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
490 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
491 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
492 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
493 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
494 self.assertRaises(re.error, re.match, "[\911]", "")
496 def test_bug_113254(self):
497 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
498 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
499 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
501 def test_bug_527371(self):
502 # bug described in patches 527371/672491
503 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
504 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
505 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
506 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
507 self.assertEqual(re.match("((a))", "a").lastindex, 1)
509 def test_bug_545855(self):
510 # bug 545855 -- This pattern failed to cause a compile error as it
511 # should, instead provoking a TypeError.
512 self.assertRaises(re.error, re.compile, 'foo[a-')
514 def test_bug_418626(self):
515 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
516 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
517 # pattern '*?' on a long string.
518 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
519 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
520 20003)
521 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
522 # non-simple '*?' still used to hit the recursion limit, before the
523 # non-recursive scheme was implemented.
524 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
526 def test_bug_612074(self):
527 pat=u"["+re.escape(u"\u2039")+u"]"
528 self.assertEqual(re.compile(pat) and 1, 1)
530 def test_stack_overflow(self):
531 # nasty cases that used to overflow the straightforward recursive
532 # implementation of repeated groups.
533 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
534 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
535 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
537 def test_scanner(self):
538 def s_ident(scanner, token): return token
539 def s_operator(scanner, token): return "op%s" % token
540 def s_float(scanner, token): return float(token)
541 def s_int(scanner, token): return int(token)
543 scanner = Scanner([
544 (r"[a-zA-Z_]\w*", s_ident),
545 (r"\d+\.\d*", s_float),
546 (r"\d+", s_int),
547 (r"=|\+|-|\*|/", s_operator),
548 (r"\s+", None),
551 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
553 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
554 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
555 'op+', 'bar'], ''))
557 def test_bug_448951(self):
558 # bug 448951 (similar to 429357, but with single char match)
559 # (Also test greedy matches.)
560 for op in '','?','*':
561 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
562 (None, None))
563 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
564 ('a:', 'a'))
566 def test_bug_725106(self):
567 # capturing groups in alternatives in repeats
568 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
569 ('b', 'a'))
570 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
571 ('c', 'b'))
572 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
573 ('b', None))
574 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
575 ('b', None))
576 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
577 ('b', 'a'))
578 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
579 ('c', 'b'))
580 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
581 ('b', None))
582 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
583 ('b', None))
585 def test_bug_725149(self):
586 # mark_stack_base restoring before restoring marks
587 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
588 ('a', None))
589 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
590 ('a', None, None))
592 def test_bug_764548(self):
593 # bug 764548, re.compile() barfs on str/unicode subclasses
594 try:
595 unicode
596 except NameError:
597 return # no problem if we have no unicode
598 class my_unicode(unicode): pass
599 pat = re.compile(my_unicode("abc"))
600 self.assertEqual(pat.match("xyz"), None)
602 def test_finditer(self):
603 iter = re.finditer(r":+", "a:b::c:::d")
604 self.assertEqual([item.group(0) for item in iter],
605 [":", "::", ":::"])
607 def test_bug_926075(self):
608 try:
609 unicode
610 except NameError:
611 return # no problem if we have no unicode
612 self.assert_(re.compile('bug_926075') is not
613 re.compile(eval("u'bug_926075'")))
615 def test_bug_931848(self):
616 try:
617 unicode
618 except NameError:
619 pass
620 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
621 self.assertEqual(re.compile(pattern).split("a.b.c"),
622 ['a','b','c'])
624 def test_bug_581080(self):
625 iter = re.finditer(r"\s", "a b")
626 self.assertEqual(iter.next().span(), (1,2))
627 self.assertRaises(StopIteration, iter.next)
629 scanner = re.compile(r"\s").scanner("a b")
630 self.assertEqual(scanner.search().span(), (1, 2))
631 self.assertEqual(scanner.search(), None)
633 def test_bug_817234(self):
634 iter = re.finditer(r".*", "asdf")
635 self.assertEqual(iter.next().span(), (0, 4))
636 self.assertEqual(iter.next().span(), (4, 4))
637 self.assertRaises(StopIteration, iter.next)
639 def test_empty_array(self):
640 # SF buf 1647541
641 import array
642 for typecode in 'cbBuhHiIlLfd':
643 a = array.array(typecode)
644 self.assertEqual(re.compile("bla").match(a), None)
645 self.assertEqual(re.compile("").match(a).groups(), ())
647 def test_inline_flags(self):
648 # Bug #1700
649 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
650 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
652 p = re.compile(upper_char, re.I | re.U)
653 q = p.match(lower_char)
654 self.assertNotEqual(q, None)
656 p = re.compile(lower_char, re.I | re.U)
657 q = p.match(upper_char)
658 self.assertNotEqual(q, None)
660 p = re.compile('(?i)' + upper_char, re.U)
661 q = p.match(lower_char)
662 self.assertNotEqual(q, None)
664 p = re.compile('(?i)' + lower_char, re.U)
665 q = p.match(upper_char)
666 self.assertNotEqual(q, None)
668 p = re.compile('(?iu)' + upper_char)
669 q = p.match(lower_char)
670 self.assertNotEqual(q, None)
672 p = re.compile('(?iu)' + lower_char)
673 q = p.match(upper_char)
674 self.assertNotEqual(q, None)
676 def test_dollar_matches_twice(self):
677 "$ matches the end of string, and just before the terminating \n"
678 pattern = re.compile('$')
679 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
680 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
681 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
683 pattern = re.compile('$', re.MULTILINE)
684 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
685 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
686 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
689 def run_re_tests():
690 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
691 if verbose:
692 print 'Running re_tests test suite'
693 else:
694 # To save time, only run the first and last 10 tests
695 #tests = tests[:10] + tests[-10:]
696 pass
698 for t in tests:
699 sys.stdout.flush()
700 pattern = s = outcome = repl = expected = None
701 if len(t) == 5:
702 pattern, s, outcome, repl, expected = t
703 elif len(t) == 3:
704 pattern, s, outcome = t
705 else:
706 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
708 try:
709 obj = re.compile(pattern)
710 except re.error:
711 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
712 else:
713 print '=== Syntax error:', t
714 except KeyboardInterrupt: raise KeyboardInterrupt
715 except:
716 print '*** Unexpected error ***', t
717 if verbose:
718 traceback.print_exc(file=sys.stdout)
719 else:
720 try:
721 result = obj.search(s)
722 except re.error, msg:
723 print '=== Unexpected exception', t, repr(msg)
724 if outcome == SYNTAX_ERROR:
725 # This should have been a syntax error; forget it.
726 pass
727 elif outcome == FAIL:
728 if result is None: pass # No match, as expected
729 else: print '=== Succeeded incorrectly', t
730 elif outcome == SUCCEED:
731 if result is not None:
732 # Matched, as expected, so now we compute the
733 # result string and compare it to our expected result.
734 start, end = result.span(0)
735 vardict={'found': result.group(0),
736 'groups': result.group(),
737 'flags': result.re.flags}
738 for i in range(1, 100):
739 try:
740 gi = result.group(i)
741 # Special hack because else the string concat fails:
742 if gi is None:
743 gi = "None"
744 except IndexError:
745 gi = "Error"
746 vardict['g%d' % i] = gi
747 for i in result.re.groupindex.keys():
748 try:
749 gi = result.group(i)
750 if gi is None:
751 gi = "None"
752 except IndexError:
753 gi = "Error"
754 vardict[i] = gi
755 repl = eval(repl, vardict)
756 if repl != expected:
757 print '=== grouping error', t,
758 print repr(repl) + ' should be ' + repr(expected)
759 else:
760 print '=== Failed incorrectly', t
762 # Try the match on a unicode string, and check that it
763 # still succeeds.
764 try:
765 result = obj.search(unicode(s, "latin-1"))
766 if result is None:
767 print '=== Fails on unicode match', t
768 except NameError:
769 continue # 1.5.2
770 except TypeError:
771 continue # unicode test case
773 # Try the match on a unicode pattern, and check that it
774 # still succeeds.
775 obj=re.compile(unicode(pattern, "latin-1"))
776 result = obj.search(s)
777 if result is None:
778 print '=== Fails on unicode pattern match', t
780 # Try the match with the search area limited to the extent
781 # of the match and see if it still succeeds. \B will
782 # break (because it won't match at the end or start of a
783 # string), so we'll ignore patterns that feature it.
785 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
786 and result is not None:
787 obj = re.compile(pattern)
788 result = obj.search(s, result.start(0), result.end(0) + 1)
789 if result is None:
790 print '=== Failed on range-limited match', t
792 # Try the match with IGNORECASE enabled, and check that it
793 # still succeeds.
794 obj = re.compile(pattern, re.IGNORECASE)
795 result = obj.search(s)
796 if result is None:
797 print '=== Fails on case-insensitive match', t
799 # Try the match with LOCALE enabled, and check that it
800 # still succeeds.
801 obj = re.compile(pattern, re.LOCALE)
802 result = obj.search(s)
803 if result is None:
804 print '=== Fails on locale-sensitive match', t
806 # Try the match with UNICODE locale enabled, and check
807 # that it still succeeds.
808 obj = re.compile(pattern, re.UNICODE)
809 result = obj.search(s)
810 if result is None:
811 print '=== Fails on unicode-sensitive match', t
813 def test_main():
814 run_unittest(ReTests)
815 run_re_tests()
817 if __name__ == "__main__":
818 test_main()