3 ; this file contains a script of tests to run through regress.exe
5 ; comments start with a semicolon and proceed to the end of the line
7 ; changes to regular expression compile flags start with a "-" as the first
8 ; non-whitespace character and consist of a list of the printable names
9 ; of the flags, for example "match_default"
11 ; Other lines contain a test to perform using the current flag status
12 ; the first token contains the expression to compile, the second the string
13 ; to match it against. If the second string is "!" then the expression should
14 ; not compile, that is the first string is an invalid regular expression.
15 ; This is then followed by a list of integers that specify what should match,
16 ; each pair represents the starting and ending positions of a subexpression
17 ; starting with the zeroth subexpression (the whole match).
18 ; A value of -1 indicates that the subexpression should not take part in the
19 ; match at all, if the first value is -1 then no part of the expression should
22 ; Tests taken from BOOST testsuite and adapted to glibc regex.
24 ; Boost Software License - Version 1.0 - August 17th, 2003
26 ; Permission is hereby granted, free of charge, to any person or organization
27 ; obtaining a copy of the software and accompanying documentation covered by
28 ; this license (the "Software") to use, reproduce, display, distribute,
29 ; execute, and transmit the Software, and to prepare derivative works of the
30 ; Software, and to permit third-parties to whom the Software is furnished to
31 ; do so, all subject to the following:
33 ; The copyright notices in the Software and this entire statement, including
34 ; the above license grant, this restriction and the following disclaimer,
35 ; must be included in all copies of the Software, in whole or in part, and
36 ; all derivative works of the Software, unless such copies or derivative
37 ; works are solely in the form of machine-executable object code generated by
38 ; a source language processor.
40 ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
41 ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
42 ; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
43 ; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
44 ; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
45 ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
46 ; DEALINGS IN THE SOFTWARE.
49 - match_default normal REG_EXTENDED
52 ; try some really simple literals:
58 ; and some simple brackets:
71 p(a)rameter ABCparameterXYZ 3 12 4 5
72 [pq](a)rameter ABCparameterXYZ 3 12 4 5
74 ; now try escaped brackets:
75 - match_default bk_parens REG_BASIC
88 ; now move on to "." wildcards
89 - match_default normal REG_EXTENDED REG_STARTEND
96 ; now move on to the repetition ops,
97 ; starting with operator *
98 - match_default normal REG_EXTENDED
102 ab* sssabbbbbbsss 3 10
117 ab+ sssabbbbbbsss 3 10
131 - match_default normal REG_EXTENDED
135 ab? sssabbbbbbsss 3 5
148 ; now try operator {}
149 - match_default normal REG_EXTENDED
166 - match_default normal REG_BASIC
180 ; now test the alternation operator |
181 - match_default normal REG_EXTENDED
186 a(b|c) ad -1 -1 -1 -1
189 ; now test the set operator []
190 - match_default normal REG_EXTENDED
191 ; try some literals first
227 a[[:alpha:]]c abc 0 3
237 [[:alnum:]]+ -%@a0X_- 3 6
238 [[:alpha:]]+ -%@aX_0- 3 5
239 [[:blank:]]+ "a \tb" 1 4
240 [[:cntrl:]]+ a\n\tb 1 3
241 [[:digit:]]+ a019b 1 4
242 [[:graph:]]+ " a%b " 1 4
243 [[:lower:]]+ AabC 1 3
244 ; This test fails with STLPort, disable for now as this is a corner case anyway...
245 ;[[:print:]]+ "\na b\n" 1 4
246 [[:punct:]]+ " %-&\t" 1 4
247 [[:space:]]+ "a \n\t\rb" 1 5
248 [[:upper:]]+ aBCd 1 3
249 [[:xdigit:]]+ p0f3Cx 1 5
251 ; now test flag settings:
252 - escape_in_lists REG_NO_POSIX_TEST
257 - match_default normal REG_EXTENDED
262 - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
269 - match_default normal REG_PERL
272 a(b*)c\1d abbcbbd 0 7 1 3
273 a(b*)c\1d abbcbd -1 -1
274 a(b*)c\1d abbcbbbd -1 -1
276 a([bc])\1d abcdabbd 4 8 5 6
277 ; strictly speaking this is at best ambiguous, at worst wrong, this is what most
278 ; re implementations will match though.
279 a(([bc])\2)*d abbccd 0 6 3 5 3 4
281 a(([bc])\2)*d abbcbd -1 -1
282 a((b)*\2)*d abbbd 0 5 1 4 2 3
284 (ab*)[ab]*\1 ababaaa 0 7 0 1
285 (a)\1bcd aabcd 0 5 0 1
286 (a)\1bc*d aabcd 0 5 0 1
287 (a)\1bc*d aabd 0 4 0 1
288 (a)\1bc*d aabcccd 0 7 0 1
289 (a)\1bc*[ce]d aabcccd 0 7 0 1
290 ^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
293 - match_default extended REG_EXTENDED
294 (ab*)[ab]*\1 ababaaa 0 7 0 1
353 ; now follows various complex expressions designed to try and bust the matcher:
354 a(((b)))c abc 0 3 1 2 1 2 1 2
355 a(b|(c))d abd 0 3 1 2 -1 -1
356 a(b|(c))d acd 0 3 1 2 1 2
357 a(b*|c)d abbd 0 4 1 3
358 ; just gotta have one DFA-buster, of course
359 a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
360 ; and an inline expansion in case somebody gets tricky
361 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
362 ; and in case somebody just slips in an NFA...
363 a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
365 1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
366 ; fish for problems as brackets go past 8
367 [ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
368 [ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
369 [ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
370 [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
371 ; and as parenthesis go past 9:
372 (a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
373 (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
374 (a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
375 (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
376 (a)d|(b)c abc 1 3 -1 -1 1 2
377 _+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19
379 ; subtleties of matching
380 ;a(b)?c\1d acd 0 3 -1 -1
381 ; POSIX is about the following test:
382 a(b)?c\1d acd -1 -1 -1 -1
383 a(b?c)+d accd 0 4 2 3
384 (wee|week)(knights|night) weeknights 0 10 0 3 3 10
386 a(b|(c))d abd 0 3 1 2 -1 -1
387 a(b|(c))d acd 0 3 1 2 1 2
388 a(b*|c|e)d abbd 0 4 1 3
389 a(b*|c|e)d acd 0 3 1 2
390 a(b*|c|e)d ad 0 2 1 1
396 (a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
397 a([bc]?)c abc 0 3 1 2
399 a([bc]+)c abc 0 3 1 2
400 a([bc]+)c abcc 0 4 1 3
401 a([bc]+)bc abcbc 0 5 1 3
402 a(bb+|b)b abb 0 3 1 2
403 a(bbb+|bb+|b)b abb 0 3 1 2
404 a(bbb+|bb+|b)b abbb 0 4 1 3
405 a(bbb+|bb+|b)bb abbb 0 4 1 2
406 (.*).* abcdef 0 6 0 6
408 xyx*xz xyxxxxyxxxz 5 11
410 ; do we get the right subexpression when it is used more than once?
411 a(b|c)*d ad 0 2 -1 -1
412 a(b|c)*d abcd 0 4 2 3
414 a(b|c)+d abcd 0 4 2 3
416 a(b|c){0,0}d ad 0 2 -1 -1
417 a(b|c){0,1}d ad 0 2 -1 -1
418 a(b|c){0,1}d abd 0 3 1 2
419 a(b|c){0,2}d ad 0 2 -1 -1
420 a(b|c){0,2}d abcd 0 4 2 3
421 a(b|c){0,}d ad 0 2 -1 -1
422 a(b|c){0,}d abcd 0 4 2 3
423 a(b|c){1,1}d abd 0 3 1 2
424 a(b|c){1,2}d abd 0 3 1 2
425 a(b|c){1,2}d abcd 0 4 2 3
426 a(b|c){1,}d abd 0 3 1 2
427 a(b|c){1,}d abcd 0 4 2 3
428 a(b|c){2,2}d acbd 0 4 2 3
429 a(b|c){2,2}d abcd 0 4 2 3
430 a(b|c){2,4}d abcd 0 4 2 3
431 a(b|c){2,4}d abcbd 0 5 3 4
432 a(b|c){2,4}d abcbcd 0 6 4 5
433 a(b|c){2,}d abcd 0 4 2 3
434 a(b|c){2,}d abcbd 0 5 3 4
435 ; perl only: these conflict with the POSIX test below
436 ;a(b|c?)+d abcd 0 4 3 3
437 ;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
438 ;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
441 - match_default extended REG_EXTENDED REG_STARTEND
443 a(b|c?)+d abcd 0 4 2 3
444 a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
445 a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
446 a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
447 a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1
448 a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3
449 a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1
450 a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3
451 a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1
453 - match_default normal REG_PERL
454 ; try to match C++ syntax elements:
456 //[^\n]* "++i //here is a line comment\n" 4 28
458 /\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
459 /\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
460 /\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
461 /\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
462 /\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
463 /\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
464 ; preprossor directives:
465 ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
466 ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
468 ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42
470 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
471 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1
472 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
473 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1
474 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24
476 '([^\\']|\\.)*' '\\x3A' 0 6 4 5
477 '([^\\']|\\.)*' '\\'' 0 4 1 3
478 '([^\\']|\\.)*' '\\n' 0 4 1 3
480 ; finally try some case insensitive matches:
481 - match_default normal REG_EXTENDED REG_ICASE
482 ; upper and lower have no meaning here so they fail, however these
483 ; may compile with other libraries...
486 0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
488 ; known and suspected bugs:
489 - match_default normal REG_EXTENDED
530 (\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
532 - match_default normal REG_EXTENDED REG_ICASE
541 [[:lower:]]+ abyzABYZ 0 8
542 [[:upper:]]+ abzABZ 0 6
543 [[:alpha:]]+ abyzABYZ 0 8
544 [[:alnum:]]+ 09abyzABYZ 0 10
557 ; collating elements and rewritten set code:
558 - match_default normal REG_EXTENDED REG_STARTEND
564 ;[[.right-curly-bracket.]] } 0 1
569 ;[[=right-curly-bracket=]] } 0 1
570 - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
577 - match_default normal REG_EXTENDED REG_STARTEND
583 ; try mutli-character ligatures:
594 - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
604 - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST
608 - match_default normal REG_EXTENDED REG_STARTEND
616 ; extended repeat checking to exercise new algorithms:
624 ab.{2,5}xy ab__xy_ 0 6
625 ab.{2,5}xy ab____xy_ 0 8
626 ab.{2,5}xy ab_____xy_ 0 9
627 ab.{2,5}xy ab__xy 0 6
628 ab.{2,5}xy ab_____xy 0 9
630 ab.{2,5} ab_______ 0 7
631 ab.{2,5}xy ab______xy -1 -1
632 ab.{2,5}xy ab_xy -1 -1
641 ab.{2,5}?xy ab__xy_ 0 6
642 ab.{2,5}?xy ab____xy_ 0 8
643 ab.{2,5}?xy ab_____xy_ 0 9
644 ab.{2,5}?xy ab__xy 0 6
645 ab.{2,5}?xy ab_____xy 0 9
647 ab.{2,5}? ab_______ 0 7
648 ab.{2,5}?xy ab______xy -1 -1
649 ab.{2,5}xy ab_xy -1 -1
651 ; again but with slower algorithm variant:
652 - match_default REG_EXTENDED
653 ; now again for single character repeats:
662 ab_{2,5}xy ab__xy_ 0 6
663 ab_{2,5}xy ab____xy_ 0 8
664 ab_{2,5}xy ab_____xy_ 0 9
665 ab_{2,5}xy ab__xy 0 6
666 ab_{2,5}xy ab_____xy 0 9
668 ab_{2,5} ab_______ 0 7
669 ab_{2,5}xy ab______xy -1 -1
670 ab_{2,5}xy ab_xy -1 -1
679 ab_{2,5}?xy ab__xy_ 0 6
680 ab_{2,5}?xy ab____xy_ 0 8
681 ab_{2,5}?xy ab_____xy_ 0 9
682 ab_{2,5}?xy ab__xy 0 6
683 ab_{2,5}?xy ab_____xy 0 9
685 ab_{2,5}? ab_______ 0 7
686 ab_{2,5}?xy ab______xy -1 -1
687 ab_{2,5}xy ab_xy -1 -1
689 ; and again for sets:
691 ab[_,;]*xy ab_xy_ 0 5
697 ab[_,;]{2,5}xy ab__xy_ 0 6
698 ab[_,;]{2,5}xy ab____xy_ 0 8
699 ab[_,;]{2,5}xy ab_____xy_ 0 9
700 ab[_,;]{2,5}xy ab__xy 0 6
701 ab[_,;]{2,5}xy ab_____xy 0 9
702 ab[_,;]{2,5} ab__ 0 4
703 ab[_,;]{2,5} ab_______ 0 7
704 ab[_,;]{2,5}xy ab______xy -1 -1
705 ab[_,;]{2,5}xy ab_xy -1 -1
707 ab[_,;]*?xy abxy_ 0 4
708 ab[_,;]*?xy ab_xy_ 0 5
710 ab[_,;]*?xy ab_xy 0 5
714 ab[_,;]{2,5}?xy ab__xy_ 0 6
715 ab[_,;]{2,5}?xy ab____xy_ 0 8
716 ab[_,;]{2,5}?xy ab_____xy_ 0 9
717 ab[_,;]{2,5}?xy ab__xy 0 6
718 ab[_,;]{2,5}?xy ab_____xy 0 9
719 ab[_,;]{2,5}? ab__ 0 4
720 ab[_,;]{2,5}? ab_______ 0 7
721 ab[_,;]{2,5}?xy ab______xy -1 -1
722 ab[_,;]{2,5}xy ab_xy -1 -1
724 ; and again for tricky sets with digraphs:
725 ;ab[_[.ae.]]*xy abxy_ 0 4
726 ;ab[_[.ae.]]*xy ab_xy_ 0 5
727 ;ab[_[.ae.]]*xy abxy 0 4
728 ;ab[_[.ae.]]*xy ab_xy 0 5
730 ;ab[_[.ae.]]* ab__ 0 4
732 ;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6
733 ;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8
734 ;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9
735 ;ab[_[.ae.]]{2,5}xy ab__xy 0 6
736 ;ab[_[.ae.]]{2,5}xy ab_____xy 0 9
737 ;ab[_[.ae.]]{2,5} ab__ 0 4
738 ;ab[_[.ae.]]{2,5} ab_______ 0 7
739 ;ab[_[.ae.]]{2,5}xy ab______xy -1 -1
740 ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
742 ;ab[_[.ae.]]*?xy abxy_ 0 4
743 ;ab[_[.ae.]]*?xy ab_xy_ 0 5
744 ;ab[_[.ae.]]*?xy abxy 0 4
745 ;ab[_[.ae.]]*?xy ab_xy 0 5
746 ;ab[_[.ae.]]*? ab 0 2
747 ;ab[_[.ae.]]*? ab__ 0 2
749 ;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6
750 ;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8
751 ;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9
752 ;ab[_[.ae.]]{2,5}?xy ab__xy 0 6
753 ;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9
754 ;ab[_[.ae.]]{2,5}? ab__ 0 4
755 ;ab[_[.ae.]]{2,5}? ab_______ 0 4
756 ;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1
757 ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
759 ; new bugs detected in spring 2003:
760 - normal match_continuous REG_NO_POSIX_TEST
768 ^((){2}) abc 0 0 0 0 0 0
775 ; subtleties of matching with no sub-expressions marked
776 - normal match_nosubs REG_NO_POSIX_TEST
778 (wee|week)(knights|night) weeknights 0 10
790 (a|ab)(bc([de]+)f|cde) abcdef 0 6
797 a(bbb+|bb+|b)b abb 0 3
798 a(bbb+|bb+|b)b abbb 0 4
799 a(bbb+|bb+|b)bb abbb 0 4
803 - normal nosubs REG_NO_POSIX_TEST
805 (wee|week)(knights|night) weeknights 0 10
817 (a|ab)(bc([de]+)f|cde) abcdef 0 6
824 a(bbb+|bb+|b)b abb 0 3
825 a(bbb+|bb+|b)b abbb 0 4
826 a(bbb+|bb+|b)bb abbb 0 4