var-tracking.c (vt_add_function_parameter): Adjust for VEC changes.
[official-gcc.git] / libgo / go / regexp / exec_test.go
bloba84bedc69d79cb8d377e7a82f2edc758a746b371
1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package regexp_test
7 import (
8 . "regexp"
10 "bufio"
11 "compress/bzip2"
12 "fmt"
13 "io"
14 "math/rand"
15 "os"
16 "path/filepath"
17 "regexp/syntax"
18 "strconv"
19 "strings"
20 "testing"
21 "unicode/utf8"
24 // TestRE2 tests this package's regexp API against test cases
25 // considered during RE2's exhaustive tests, which run all possible
26 // regexps over a given set of atoms and operators, up to a given
27 // complexity, over all possible strings over a given alphabet,
28 // up to a given size. Rather than try to link with RE2, we read a
29 // log file containing the test cases and the expected matches.
30 // The log file, re2.txt, is generated by running 'make exhaustive-log'
31 // in the open source RE2 distribution. http://code.google.com/p/re2/
33 // The test file format is a sequence of stanzas like:
35 // strings
36 // "abc"
37 // "123x"
38 // regexps
39 // "[a-z]+"
40 // 0-3;0-3
41 // -;-
42 // "([0-9])([0-9])([0-9])"
43 // -;-
44 // -;0-3 0-1 1-2 2-3
46 // The stanza begins by defining a set of strings, quoted
47 // using Go double-quote syntax, one per line. Then the
48 // regexps section gives a sequence of regexps to run on
49 // the strings. In the block that follows a regexp, each line
50 // gives the semicolon-separated match results of running
51 // the regexp on the corresponding string.
52 // Each match result is either a single -, meaning no match, or a
53 // space-separated sequence of pairs giving the match and
54 // submatch indices. An unmatched subexpression formats
55 // its pair as a single - (not illustrated above). For now
56 // each regexp run produces two match results, one for a
57 // ``full match'' that restricts the regexp to matching the entire
58 // string or nothing, and one for a ``partial match'' that gives
59 // the leftmost first match found in the string.
61 // Lines beginning with # are comments. Lines beginning with
62 // a capital letter are test names printed during RE2's test suite
63 // and are echoed into t but otherwise ignored.
65 // At time of writing, re2.txt is 32 MB but compresses to 760 kB,
66 // so we store re2.txt.gz in the repository and decompress it on the fly.
68 func TestRE2Search(t *testing.T) {
69 testRE2(t, "testdata/re2-search.txt")
72 func TestRE2Exhaustive(t *testing.T) {
73 if testing.Short() {
74 t.Log("skipping TestRE2Exhaustive during short test")
75 return
77 testRE2(t, "testdata/re2-exhaustive.txt.bz2")
80 func testRE2(t *testing.T, file string) {
81 f, err := os.Open(file)
82 if err != nil {
83 t.Fatal(err)
85 defer f.Close()
86 var txt io.Reader
87 if strings.HasSuffix(file, ".bz2") {
88 z := bzip2.NewReader(f)
89 txt = z
90 file = file[:len(file)-len(".bz2")] // for error messages
91 } else {
92 txt = f
94 lineno := 0
95 r := bufio.NewReader(txt)
96 var (
97 str []string
98 input []string
99 inStrings bool
100 re *Regexp
101 refull *Regexp
102 nfail int
103 ncase int
105 for {
106 line, err := r.ReadString('\n')
107 if err != nil {
108 if err == io.EOF {
109 break
111 t.Fatalf("%s:%d: %v", file, lineno, err)
113 line = line[:len(line)-1] // chop \n
114 lineno++
115 switch {
116 case line == "":
117 t.Fatalf("%s:%d: unexpected blank line", file, lineno)
118 case line[0] == '#':
119 continue
120 case 'A' <= line[0] && line[0] <= 'Z':
121 // Test name.
122 t.Logf("%s\n", line)
123 continue
124 case line == "strings":
125 str = str[:0]
126 inStrings = true
127 case line == "regexps":
128 inStrings = false
129 case line[0] == '"':
130 q, err := strconv.Unquote(line)
131 if err != nil {
132 // Fatal because we'll get out of sync.
133 t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
135 if inStrings {
136 str = append(str, q)
137 continue
139 // Is a regexp.
140 if len(input) != 0 {
141 t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
143 re, err = tryCompile(q)
144 if err != nil {
145 if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
146 // We don't and likely never will support \C; keep going.
147 continue
149 t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
150 if nfail++; nfail >= 100 {
151 t.Fatalf("stopping after %d errors", nfail)
153 continue
155 full := `\A(?:` + q + `)\z`
156 refull, err = tryCompile(full)
157 if err != nil {
158 // Fatal because q worked, so this should always work.
159 t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
161 input = str
162 case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
163 // A sequence of match results.
164 ncase++
165 if re == nil {
166 // Failed to compile: skip results.
167 continue
169 if len(input) == 0 {
170 t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
172 var text string
173 text, input = input[0], input[1:]
174 if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
175 // RE2's \B considers every byte position,
176 // so it sees 'not word boundary' in the
177 // middle of UTF-8 sequences. This package
178 // only considers the positions between runes,
179 // so it disagrees. Skip those cases.
180 continue
182 res := strings.Split(line, ";")
183 if len(res) != len(run) {
184 t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
186 for i := range res {
187 have, suffix := run[i](re, refull, text)
188 want := parseResult(t, file, lineno, res[i])
189 if !same(have, want) {
190 t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
191 if nfail++; nfail >= 100 {
192 t.Fatalf("stopping after %d errors", nfail)
194 continue
196 b, suffix := match[i](re, refull, text)
197 if b != (want != nil) {
198 t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
199 if nfail++; nfail >= 100 {
200 t.Fatalf("stopping after %d errors", nfail)
202 continue
206 default:
207 t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
210 if len(input) != 0 {
211 t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
213 t.Logf("%d cases tested", ncase)
216 var run = []func(*Regexp, *Regexp, string) ([]int, string){
217 runFull,
218 runPartial,
219 runFullLongest,
220 runPartialLongest,
223 func runFull(re, refull *Regexp, text string) ([]int, string) {
224 refull.SetLongest(false)
225 return refull.FindStringSubmatchIndex(text), "[full]"
228 func runPartial(re, refull *Regexp, text string) ([]int, string) {
229 re.SetLongest(false)
230 return re.FindStringSubmatchIndex(text), ""
233 func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
234 refull.SetLongest(true)
235 return refull.FindStringSubmatchIndex(text), "[full,longest]"
238 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
239 re.SetLongest(true)
240 return re.FindStringSubmatchIndex(text), "[longest]"
243 var match = []func(*Regexp, *Regexp, string) (bool, string){
244 matchFull,
245 matchPartial,
246 matchFullLongest,
247 matchPartialLongest,
250 func matchFull(re, refull *Regexp, text string) (bool, string) {
251 refull.SetLongest(false)
252 return refull.MatchString(text), "[full]"
255 func matchPartial(re, refull *Regexp, text string) (bool, string) {
256 re.SetLongest(false)
257 return re.MatchString(text), ""
260 func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
261 refull.SetLongest(true)
262 return refull.MatchString(text), "[full,longest]"
265 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
266 re.SetLongest(true)
267 return re.MatchString(text), "[longest]"
270 func isSingleBytes(s string) bool {
271 for _, c := range s {
272 if c >= utf8.RuneSelf {
273 return false
276 return true
279 func tryCompile(s string) (re *Regexp, err error) {
280 // Protect against panic during Compile.
281 defer func() {
282 if r := recover(); r != nil {
283 err = fmt.Errorf("panic: %v", r)
286 return Compile(s)
289 func parseResult(t *testing.T, file string, lineno int, res string) []int {
290 // A single - indicates no match.
291 if res == "-" {
292 return nil
294 // Otherwise, a space-separated list of pairs.
295 n := 1
296 for j := 0; j < len(res); j++ {
297 if res[j] == ' ' {
301 out := make([]int, 2*n)
302 i := 0
303 n = 0
304 for j := 0; j <= len(res); j++ {
305 if j == len(res) || res[j] == ' ' {
306 // Process a single pair. - means no submatch.
307 pair := res[i:j]
308 if pair == "-" {
309 out[n] = -1
310 out[n+1] = -1
311 } else {
312 k := strings.Index(pair, "-")
313 if k < 0 {
314 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
316 lo, err1 := strconv.Atoi(pair[:k])
317 hi, err2 := strconv.Atoi(pair[k+1:])
318 if err1 != nil || err2 != nil || lo > hi {
319 t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
321 out[n] = lo
322 out[n+1] = hi
324 n += 2
325 i = j + 1
328 return out
331 func same(x, y []int) bool {
332 if len(x) != len(y) {
333 return false
335 for i, xi := range x {
336 if xi != y[i] {
337 return false
340 return true
343 // TestFowler runs this package's regexp API against the
344 // POSIX regular expression tests collected by Glenn Fowler
345 // at http://www2.research.att.com/~gsf/testregex/.
346 func TestFowler(t *testing.T) {
347 files, err := filepath.Glob("testdata/*.dat")
348 if err != nil {
349 t.Fatal(err)
351 for _, file := range files {
352 t.Log(file)
353 testFowler(t, file)
357 var notab = MustCompilePOSIX(`[^\t]+`)
359 func testFowler(t *testing.T, file string) {
360 f, err := os.Open(file)
361 if err != nil {
362 t.Error(err)
363 return
365 defer f.Close()
366 b := bufio.NewReader(f)
367 lineno := 0
368 lastRegexp := ""
369 Reading:
370 for {
371 lineno++
372 line, err := b.ReadString('\n')
373 if err != nil {
374 if err != io.EOF {
375 t.Errorf("%s:%d: %v", file, lineno, err)
377 break Reading
380 // http://www2.research.att.com/~gsf/man/man1/testregex.html
382 // INPUT FORMAT
383 // Input lines may be blank, a comment beginning with #, or a test
384 // specification. A specification is five fields separated by one
385 // or more tabs. NULL denotes the empty string and NIL denotes the
386 // 0 pointer.
387 if line[0] == '#' || line[0] == '\n' {
388 continue Reading
390 line = line[:len(line)-1]
391 field := notab.FindAllString(line, -1)
392 for i, f := range field {
393 if f == "NULL" {
394 field[i] = ""
396 if f == "NIL" {
397 t.Logf("%s:%d: skip: %s", file, lineno, line)
398 continue Reading
401 if len(field) == 0 {
402 continue Reading
405 // Field 1: the regex(3) flags to apply, one character per REG_feature
406 // flag. The test is skipped if REG_feature is not supported by the
407 // implementation. If the first character is not [BEASKLP] then the
408 // specification is a global control line. One or more of [BEASKLP] may be
409 // specified; the test will be repeated for each mode.
411 // B basic BRE (grep, ed, sed)
412 // E REG_EXTENDED ERE (egrep)
413 // A REG_AUGMENTED ARE (egrep with negation)
414 // S REG_SHELL SRE (sh glob)
415 // K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
416 // L REG_LITERAL LRE (fgrep)
418 // a REG_LEFT|REG_RIGHT implicit ^...$
419 // b REG_NOTBOL lhs does not match ^
420 // c REG_COMMENT ignore space and #...\n
421 // d REG_SHELL_DOT explicit leading . match
422 // e REG_NOTEOL rhs does not match $
423 // f REG_MULTIPLE multiple \n separated patterns
424 // g FNM_LEADING_DIR testfnmatch only -- match until /
425 // h REG_MULTIREF multiple digit backref
426 // i REG_ICASE ignore case
427 // j REG_SPAN . matches \n
428 // k REG_ESCAPE \ to ecape [...] delimiter
429 // l REG_LEFT implicit ^...
430 // m REG_MINIMAL minimal match
431 // n REG_NEWLINE explicit \n match
432 // o REG_ENCLOSED (|&) magic inside [@|&](...)
433 // p REG_SHELL_PATH explicit / match
434 // q REG_DELIMITED delimited pattern
435 // r REG_RIGHT implicit ...$
436 // s REG_SHELL_ESCAPED \ not special
437 // t REG_MUSTDELIM all delimiters must be specified
438 // u standard unspecified behavior -- errors not counted
439 // v REG_CLASS_ESCAPE \ special inside [...]
440 // w REG_NOSUB no subexpression match array
441 // x REG_LENIENT let some errors slide
442 // y REG_LEFT regexec() implicit ^...
443 // z REG_NULL NULL subexpressions ok
444 // $ expand C \c escapes in fields 2 and 3
445 // / field 2 is a regsubcomp() expression
446 // = field 3 is a regdecomp() expression
448 // Field 1 control lines:
450 // C set LC_COLLATE and LC_CTYPE to locale in field 2
452 // ?test ... output field 5 if passed and != EXPECTED, silent otherwise
453 // &test ... output field 5 if current and previous passed
454 // |test ... output field 5 if current passed and previous failed
455 // ; ... output field 2 if previous failed
456 // {test ... skip if failed until }
457 // } end of skip
459 // : comment comment copied as output NOTE
460 // :comment:test :comment: ignored
461 // N[OTE] comment comment copied as output NOTE
462 // T[EST] comment comment
464 // number use number for nmatch (20 by default)
465 flag := field[0]
466 switch flag[0] {
467 case '?', '&', '|', ';', '{', '}':
468 // Ignore all the control operators.
469 // Just run everything.
470 flag = flag[1:]
471 if flag == "" {
472 continue Reading
474 case ':':
475 i := strings.Index(flag[1:], ":")
476 if i < 0 {
477 t.Logf("skip: %s", line)
478 continue Reading
480 flag = flag[1+i+1:]
481 case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
482 t.Logf("skip: %s", line)
483 continue Reading
486 // Can check field count now that we've handled the myriad comment formats.
487 if len(field) < 4 {
488 t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
489 continue Reading
492 // Expand C escapes (a.k.a. Go escapes).
493 if strings.Contains(flag, "$") {
494 f := `"` + field[1] + `"`
495 if field[1], err = strconv.Unquote(f); err != nil {
496 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
498 f = `"` + field[2] + `"`
499 if field[2], err = strconv.Unquote(f); err != nil {
500 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
504 // Field 2: the regular expression pattern; SAME uses the pattern from
505 // the previous specification.
507 if field[1] == "SAME" {
508 field[1] = lastRegexp
510 lastRegexp = field[1]
512 // Field 3: the string to match.
513 text := field[2]
515 // Field 4: the test outcome...
516 ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
517 if !ok {
518 t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
519 continue Reading
522 // Field 5: optional comment appended to the report.
524 Testing:
525 // Run test once for each specified capital letter mode that we support.
526 for _, c := range flag {
527 pattern := field[1]
528 syn := syntax.POSIX | syntax.ClassNL
529 switch c {
530 default:
531 continue Testing
532 case 'E':
533 // extended regexp (what we support)
534 case 'L':
535 // literal
536 pattern = QuoteMeta(pattern)
539 for _, c := range flag {
540 switch c {
541 case 'i':
542 syn |= syntax.FoldCase
546 re, err := CompileInternal(pattern, syn, true)
547 if err != nil {
548 if shouldCompile {
549 t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
551 continue Testing
553 if !shouldCompile {
554 t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
555 continue Testing
557 match := re.MatchString(text)
558 if match != shouldMatch {
559 t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
560 continue Testing
562 have := re.FindStringSubmatchIndex(text)
563 if (len(have) > 0) != match {
564 t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
565 continue Testing
567 if len(have) > len(pos) {
568 have = have[:len(pos)]
570 if !same(have, pos) {
571 t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
577 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
578 // Field 4: the test outcome. This is either one of the posix error
579 // codes (with REG_ omitted) or the match array, a list of (m,n)
580 // entries with m and n being first and last+1 positions in the
581 // field 3 string, or NULL if REG_NOSUB is in effect and success
582 // is expected. BADPAT is acceptable in place of any regcomp(3)
583 // error code. The match[] array is initialized to (-2,-2) before
584 // each test. All array elements from 0 to nmatch-1 must be specified
585 // in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
586 // Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
587 // matched (?{...}) expression, where x is the text enclosed by {...},
588 // o is the expression ordinal counting from 1, and n is the length of
589 // the unmatched portion of the subject string. If x starts with a
590 // number then that is the return value of re_execf(), otherwise 0 is
591 // returned.
592 switch {
593 case s == "":
594 // Match with no position information.
595 ok = true
596 compiled = true
597 matched = true
598 return
599 case s == "NOMATCH":
600 // Match failure.
601 ok = true
602 compiled = true
603 matched = false
604 return
605 case 'A' <= s[0] && s[0] <= 'Z':
606 // All the other error codes are compile errors.
607 ok = true
608 compiled = false
609 return
611 compiled = true
613 var x []int
614 for s != "" {
615 var end byte = ')'
616 if len(x)%2 == 0 {
617 if s[0] != '(' {
618 ok = false
619 return
621 s = s[1:]
622 end = ','
624 i := 0
625 for i < len(s) && s[i] != end {
628 if i == 0 || i == len(s) {
629 ok = false
630 return
632 var v = -1
633 var err error
634 if s[:i] != "?" {
635 v, err = strconv.Atoi(s[:i])
636 if err != nil {
637 ok = false
638 return
641 x = append(x, v)
642 s = s[i+1:]
644 if len(x)%2 != 0 {
645 ok = false
646 return
648 ok = true
649 matched = true
650 pos = x
651 return
654 var text []byte
656 func makeText(n int) []byte {
657 if len(text) >= n {
658 return text[:n]
660 text = make([]byte, n)
661 for i := range text {
662 if rand.Intn(30) == 0 {
663 text[i] = '\n'
664 } else {
665 text[i] = byte(rand.Intn(0x7E+1-0x20) + 0x20)
668 return text
671 func benchmark(b *testing.B, re string, n int) {
672 r := MustCompile(re)
673 t := makeText(n)
674 b.ResetTimer()
675 b.SetBytes(int64(n))
676 for i := 0; i < b.N; i++ {
677 if r.Match(t) {
678 b.Fatal("match!")
683 const (
684 easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
685 easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
686 medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
687 hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
688 parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
689 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
692 func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) }
693 func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) }
694 func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) }
695 func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) }
696 func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) }
697 func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) }
698 func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) }
699 func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) }
700 func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) }
701 func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) }
702 func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 1<<0) }
703 func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) }
704 func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
705 func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) }
706 func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
707 func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) }
708 func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) }
709 func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) }
710 func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) }
711 func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) }