libgo/go/regexp/exec_test.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package regexp
   6
   7 import (
   8         "bufio"
   9         "compress/bzip2"
  10         "fmt"
  11         "internal/testenv"
  12         "io"
  13         "os"
  14         "path/filepath"
  15         "regexp/syntax"
  16         "strconv"
  17         "strings"
  18         "testing"
  19         "unicode/utf8"
  20 )
  21
  22 // TestRE2 tests this package's regexp API against test cases
  23 // considered during RE2's exhaustive tests, which run all possible
  24 // regexps over a given set of atoms and operators, up to a given
  25 // complexity, over all possible strings over a given alphabet,
  26 // up to a given size. Rather than try to link with RE2, we read a
  27 // log file containing the test cases and the expected matches.
  28 // The log file, re2-exhaustive.txt, is generated by running 'make log'
  29 // in the open source RE2 distribution https://github.com/google/re2/.
  30 //
  31 // The test file format is a sequence of stanzas like:
  32 //
  33 //      strings
  34 //      "abc"
  35 //      "123x"
  36 //      regexps
  37 //      "[a-z]+"
  38 //      0-3;0-3
  39 //      -;-
  40 //      "([0-9])([0-9])([0-9])"
  41 //      -;-
  42 //      -;0-3 0-1 1-2 2-3
  43 //
  44 // The stanza begins by defining a set of strings, quoted
  45 // using Go double-quote syntax, one per line. Then the
  46 // regexps section gives a sequence of regexps to run on
  47 // the strings. In the block that follows a regexp, each line
  48 // gives the semicolon-separated match results of running
  49 // the regexp on the corresponding string.
  50 // Each match result is either a single -, meaning no match, or a
  51 // space-separated sequence of pairs giving the match and
  52 // submatch indices. An unmatched subexpression formats
  53 // its pair as a single - (not illustrated above).  For now
  54 // each regexp run produces two match results, one for a
  55 // ``full match'' that restricts the regexp to matching the entire
  56 // string or nothing, and one for a ``partial match'' that gives
  57 // the leftmost first match found in the string.
  58 //
  59 // Lines beginning with # are comments. Lines beginning with
  60 // a capital letter are test names printed during RE2's test suite
  61 // and are echoed into t but otherwise ignored.
  62 //
  63 // At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB,
  64 // so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly.
  65 //
  66 func TestRE2Search(t *testing.T) {
  67         testRE2(t, "testdata/re2-search.txt")
  68 }
  69
  70 func testRE2(t *testing.T, file string) {
  71         f, err := os.Open(file)
  72         if err != nil {
  73                 t.Fatal(err)
  74         }
  75         defer f.Close()
  76         var txt io.Reader
  77         if strings.HasSuffix(file, ".bz2") {
  78                 z := bzip2.NewReader(f)
  79                 txt = z
  80                 file = file[:len(file)-len(".bz2")] // for error messages
  81         } else {
  82                 txt = f
  83         }
  84         lineno := 0
  85         scanner := bufio.NewScanner(txt)
  86         var (
  87                 str       []string
  88                 input     []string
  89                 inStrings bool
  90                 re        *Regexp
  91                 refull    *Regexp
  92                 nfail     int
  93                 ncase     int
  94         )
  95         for lineno := 1; scanner.Scan(); lineno++ {
  96                 line := scanner.Text()
  97                 switch {
  98                 case line == "":
  99                         t.Fatalf("%s:%d: unexpected blank line", file, lineno)
 100                 case line[0] == '#':
 101                         continue
 102                 case 'A' <= line[0] && line[0] <= 'Z':
 103                         // Test name.
 104                         t.Logf("%s\n", line)
 105                         continue
 106                 case line == "strings":
 107                         str = str[:0]
 108                         inStrings = true
 109                 case line == "regexps":
 110                         inStrings = false
 111                 case line[0] == '"':
 112                         q, err := strconv.Unquote(line)
 113                         if err != nil {
 114                                 // Fatal because we'll get out of sync.
 115                                 t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
 116                         }
 117                         if inStrings {
 118                                 str = append(str, q)
 119                                 continue
 120                         }
 121                         // Is a regexp.
 122                         if len(input) != 0 {
 123                                 t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
 124                         }
 125                         re, err = tryCompile(q)
 126                         if err != nil {
 127                                 if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
 128                                         // We don't and likely never will support \C; keep going.
 129                                         continue
 130                                 }
 131                                 t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
 132                                 if nfail++; nfail >= 100 {
 133                                         t.Fatalf("stopping after %d errors", nfail)
 134                                 }
 135                                 continue
 136                         }
 137                         full := `\A(?:` + q + `)\z`
 138                         refull, err = tryCompile(full)
 139                         if err != nil {
 140                                 // Fatal because q worked, so this should always work.
 141                                 t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
 142                         }
 143                         input = str
 144                 case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
 145                         // A sequence of match results.
 146                         ncase++
 147                         if re == nil {
 148                                 // Failed to compile: skip results.
 149                                 continue
 150                         }
 151                         if len(input) == 0 {
 152                                 t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
 153                         }
 154                         var text string
 155                         text, input = input[0], input[1:]
 156                         if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
 157                                 // RE2's \B considers every byte position,
 158                                 // so it sees 'not word boundary' in the
 159                                 // middle of UTF-8 sequences. This package
 160                                 // only considers the positions between runes,
 161                                 // so it disagrees. Skip those cases.
 162                                 continue
 163                         }
 164                         res := strings.Split(line, ";")
 165                         if len(res) != len(run) {
 166                                 t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
 167                         }
 168                         for i := range res {
 169                                 have, suffix := run[i](re, refull, text)
 170                                 want := parseResult(t, file, lineno, res[i])
 171                                 if !same(have, want) {
 172                                         t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
 173                                         if nfail++; nfail >= 100 {
 174                                                 t.Fatalf("stopping after %d errors", nfail)
 175                                         }
 176                                         continue
 177                                 }
 178                                 b, suffix := match[i](re, refull, text)
 179                                 if b != (want != nil) {
 180                                         t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
 181                                         if nfail++; nfail >= 100 {
 182                                                 t.Fatalf("stopping after %d errors", nfail)
 183                                         }
 184                                         continue
 185                                 }
 186                         }
 187
 188                 default:
 189                         t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
 190                 }
 191         }
 192         if err := scanner.Err(); err != nil {
 193                 t.Fatalf("%s:%d: %v", file, lineno, err)
 194         }
 195         if len(input) != 0 {
 196                 t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
 197         }
 198         t.Logf("%d cases tested", ncase)
 199 }
 200
 201 var run = []func(*Regexp, *Regexp, string) ([]int, string){
 202         runFull,
 203         runPartial,
 204         runFullLongest,
 205         runPartialLongest,
 206 }
 207
 208 func runFull(re, refull *Regexp, text string) ([]int, string) {
 209         refull.longest = false
 210         return refull.FindStringSubmatchIndex(text), "[full]"
 211 }
 212
 213 func runPartial(re, refull *Regexp, text string) ([]int, string) {
 214         re.longest = false
 215         return re.FindStringSubmatchIndex(text), ""
 216 }
 217
 218 func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
 219         refull.longest = true
 220         return refull.FindStringSubmatchIndex(text), "[full,longest]"
 221 }
 222
 223 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
 224         re.longest = true
 225         return re.FindStringSubmatchIndex(text), "[longest]"
 226 }
 227
 228 var match = []func(*Regexp, *Regexp, string) (bool, string){
 229         matchFull,
 230         matchPartial,
 231         matchFullLongest,
 232         matchPartialLongest,
 233 }
 234
 235 func matchFull(re, refull *Regexp, text string) (bool, string) {
 236         refull.longest = false
 237         return refull.MatchString(text), "[full]"
 238 }
 239
 240 func matchPartial(re, refull *Regexp, text string) (bool, string) {
 241         re.longest = false
 242         return re.MatchString(text), ""
 243 }
 244
 245 func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
 246         refull.longest = true
 247         return refull.MatchString(text), "[full,longest]"
 248 }
 249
 250 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
 251         re.longest = true
 252         return re.MatchString(text), "[longest]"
 253 }
 254
 255 func isSingleBytes(s string) bool {
 256         for _, c := range s {
 257                 if c >= utf8.RuneSelf {
 258                         return false
 259                 }
 260         }
 261         return true
 262 }
 263
 264 func tryCompile(s string) (re *Regexp, err error) {
 265         // Protect against panic during Compile.
 266         defer func() {
 267                 if r := recover(); r != nil {
 268                         err = fmt.Errorf("panic: %v", r)
 269                 }
 270         }()
 271         return Compile(s)
 272 }
 273
 274 func parseResult(t *testing.T, file string, lineno int, res string) []int {
 275         // A single - indicates no match.
 276         if res == "-" {
 277                 return nil
 278         }
 279         // Otherwise, a space-separated list of pairs.
 280         n := 1
 281         for j := 0; j < len(res); j++ {
 282                 if res[j] == ' ' {
 283                         n++
 284                 }
 285         }
 286         out := make([]int, 2*n)
 287         i := 0
 288         n = 0
 289         for j := 0; j <= len(res); j++ {
 290                 if j == len(res) || res[j] == ' ' {
 291                         // Process a single pair.  - means no submatch.
 292                         pair := res[i:j]
 293                         if pair == "-" {
 294                                 out[n] = -1
 295                                 out[n+1] = -1
 296                         } else {
 297                                 k := strings.Index(pair, "-")
 298                                 if k < 0 {
 299                                         t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 300                                 }
 301                                 lo, err1 := strconv.Atoi(pair[:k])
 302                                 hi, err2 := strconv.Atoi(pair[k+1:])
 303                                 if err1 != nil || err2 != nil || lo > hi {
 304                                         t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 305                                 }
 306                                 out[n] = lo
 307                                 out[n+1] = hi
 308                         }
 309                         n += 2
 310                         i = j + 1
 311                 }
 312         }
 313         return out
 314 }
 315
 316 func same(x, y []int) bool {
 317         if len(x) != len(y) {
 318                 return false
 319         }
 320         for i, xi := range x {
 321                 if xi != y[i] {
 322                         return false
 323                 }
 324         }
 325         return true
 326 }
 327
 328 // TestFowler runs this package's regexp API against the
 329 // POSIX regular expression tests collected by Glenn Fowler
 330 // at http://www2.research.att.com/~astopen/testregex/testregex.html.
 331 func TestFowler(t *testing.T) {
 332         files, err := filepath.Glob("testdata/*.dat")
 333         if err != nil {
 334                 t.Fatal(err)
 335         }
 336         for _, file := range files {
 337                 t.Log(file)
 338                 testFowler(t, file)
 339         }
 340 }
 341
 342 var notab = MustCompilePOSIX(`[^\t]+`)
 343
 344 func testFowler(t *testing.T, file string) {
 345         f, err := os.Open(file)
 346         if err != nil {
 347                 t.Error(err)
 348                 return
 349         }
 350         defer f.Close()
 351         b := bufio.NewReader(f)
 352         lineno := 0
 353         lastRegexp := ""
 354 Reading:
 355         for {
 356                 lineno++
 357                 line, err := b.ReadString('\n')
 358                 if err != nil {
 359                         if err != io.EOF {
 360                                 t.Errorf("%s:%d: %v", file, lineno, err)
 361                         }
 362                         break Reading
 363                 }
 364
 365                 // http://www2.research.att.com/~astopen/man/man1/testregex.html
 366                 //
 367                 // INPUT FORMAT
 368                 //   Input lines may be blank, a comment beginning with #, or a test
 369                 //   specification. A specification is five fields separated by one
 370                 //   or more tabs. NULL denotes the empty string and NIL denotes the
 371                 //   0 pointer.
 372                 if line[0] == '#' || line[0] == '\n' {
 373                         continue Reading
 374                 }
 375                 line = line[:len(line)-1]
 376                 field := notab.FindAllString(line, -1)
 377                 for i, f := range field {
 378                         if f == "NULL" {
 379                                 field[i] = ""
 380                         }
 381                         if f == "NIL" {
 382                                 t.Logf("%s:%d: skip: %s", file, lineno, line)
 383                                 continue Reading
 384                         }
 385                 }
 386                 if len(field) == 0 {
 387                         continue Reading
 388                 }
 389
 390                 //   Field 1: the regex(3) flags to apply, one character per REG_feature
 391                 //   flag. The test is skipped if REG_feature is not supported by the
 392                 //   implementation. If the first character is not [BEASKLP] then the
 393                 //   specification is a global control line. One or more of [BEASKLP] may be
 394                 //   specified; the test will be repeated for each mode.
 395                 //
 396                 //     B        basic                   BRE     (grep, ed, sed)
 397                 //     E        REG_EXTENDED            ERE     (egrep)
 398                 //     A        REG_AUGMENTED           ARE     (egrep with negation)
 399                 //     S        REG_SHELL               SRE     (sh glob)
 400                 //     K        REG_SHELL|REG_AUGMENTED KRE     (ksh glob)
 401                 //     L        REG_LITERAL             LRE     (fgrep)
 402                 //
 403                 //     a        REG_LEFT|REG_RIGHT      implicit ^...$
 404                 //     b        REG_NOTBOL              lhs does not match ^
 405                 //     c        REG_COMMENT             ignore space and #...\n
 406                 //     d        REG_SHELL_DOT           explicit leading . match
 407                 //     e        REG_NOTEOL              rhs does not match $
 408                 //     f        REG_MULTIPLE            multiple \n separated patterns
 409                 //     g        FNM_LEADING_DIR         testfnmatch only -- match until /
 410                 //     h        REG_MULTIREF            multiple digit backref
 411                 //     i        REG_ICASE               ignore case
 412                 //     j        REG_SPAN                . matches \n
 413                 //     k        REG_ESCAPE              \ to escape [...] delimiter
 414                 //     l        REG_LEFT                implicit ^...
 415                 //     m        REG_MINIMAL             minimal match
 416                 //     n        REG_NEWLINE             explicit \n match
 417                 //     o        REG_ENCLOSED            (|&) magic inside [@|&](...)
 418                 //     p        REG_SHELL_PATH          explicit / match
 419                 //     q        REG_DELIMITED           delimited pattern
 420                 //     r        REG_RIGHT               implicit ...$
 421                 //     s        REG_SHELL_ESCAPED       \ not special
 422                 //     t        REG_MUSTDELIM           all delimiters must be specified
 423                 //     u        standard unspecified behavior -- errors not counted
 424                 //     v        REG_CLASS_ESCAPE        \ special inside [...]
 425                 //     w        REG_NOSUB               no subexpression match array
 426                 //     x        REG_LENIENT             let some errors slide
 427                 //     y        REG_LEFT                regexec() implicit ^...
 428                 //     z        REG_NULL                NULL subexpressions ok
 429                 //     $                                expand C \c escapes in fields 2 and 3
 430                 //     /                                field 2 is a regsubcomp() expression
 431                 //     =                                field 3 is a regdecomp() expression
 432                 //
 433                 //   Field 1 control lines:
 434                 //
 435                 //     C                set LC_COLLATE and LC_CTYPE to locale in field 2
 436                 //
 437                 //     ?test ...        output field 5 if passed and != EXPECTED, silent otherwise
 438                 //     &test ...        output field 5 if current and previous passed
 439                 //     |test ...        output field 5 if current passed and previous failed
 440                 //     ; ...    output field 2 if previous failed
 441                 //     {test ...        skip if failed until }
 442                 //     }                end of skip
 443                 //
 444                 //     : comment                comment copied as output NOTE
 445                 //     :comment:test    :comment: ignored
 446                 //     N[OTE] comment   comment copied as output NOTE
 447                 //     T[EST] comment   comment
 448                 //
 449                 //     number           use number for nmatch (20 by default)
 450                 flag := field[0]
 451                 switch flag[0] {
 452                 case '?', '&', '|', ';', '{', '}':
 453                         // Ignore all the control operators.
 454                         // Just run everything.
 455                         flag = flag[1:]
 456                         if flag == "" {
 457                                 continue Reading
 458                         }
 459                 case ':':
 460                         i := strings.Index(flag[1:], ":")
 461                         if i < 0 {
 462                                 t.Logf("skip: %s", line)
 463                                 continue Reading
 464                         }
 465                         flag = flag[1+i+1:]
 466                 case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 467                         t.Logf("skip: %s", line)
 468                         continue Reading
 469                 }
 470
 471                 // Can check field count now that we've handled the myriad comment formats.
 472                 if len(field) < 4 {
 473                         t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
 474                         continue Reading
 475                 }
 476
 477                 // Expand C escapes (a.k.a. Go escapes).
 478                 if strings.Contains(flag, "$") {
 479                         f := `"` + field[1] + `"`
 480                         if field[1], err = strconv.Unquote(f); err != nil {
 481                                 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
 482                         }
 483                         f = `"` + field[2] + `"`
 484                         if field[2], err = strconv.Unquote(f); err != nil {
 485                                 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
 486                         }
 487                 }
 488
 489                 //   Field 2: the regular expression pattern; SAME uses the pattern from
 490                 //     the previous specification.
 491                 //
 492                 if field[1] == "SAME" {
 493                         field[1] = lastRegexp
 494                 }
 495                 lastRegexp = field[1]
 496
 497                 //   Field 3: the string to match.
 498                 text := field[2]
 499
 500                 //   Field 4: the test outcome...
 501                 ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
 502                 if !ok {
 503                         t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
 504                         continue Reading
 505                 }
 506
 507                 //   Field 5: optional comment appended to the report.
 508
 509         Testing:
 510                 // Run test once for each specified capital letter mode that we support.
 511                 for _, c := range flag {
 512                         pattern := field[1]
 513                         syn := syntax.POSIX | syntax.ClassNL
 514                         switch c {
 515                         default:
 516                                 continue Testing
 517                         case 'E':
 518                                 // extended regexp (what we support)
 519                         case 'L':
 520                                 // literal
 521                                 pattern = QuoteMeta(pattern)
 522                         }
 523
 524                         for _, c := range flag {
 525                                 switch c {
 526                                 case 'i':
 527                                         syn |= syntax.FoldCase
 528                                 }
 529                         }
 530
 531                         re, err := compile(pattern, syn, true)
 532                         if err != nil {
 533                                 if shouldCompile {
 534                                         t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
 535                                 }
 536                                 continue Testing
 537                         }
 538                         if !shouldCompile {
 539                                 t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
 540                                 continue Testing
 541                         }
 542                         match := re.MatchString(text)
 543                         if match != shouldMatch {
 544                                 t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
 545                                 continue Testing
 546                         }
 547                         have := re.FindStringSubmatchIndex(text)
 548                         if (len(have) > 0) != match {
 549                                 t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
 550                                 continue Testing
 551                         }
 552                         if len(have) > len(pos) {
 553                                 have = have[:len(pos)]
 554                         }
 555                         if !same(have, pos) {
 556                                 t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
 557                         }
 558                 }
 559         }
 560 }
 561
 562 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
 563         //   Field 4: the test outcome. This is either one of the posix error
 564         //     codes (with REG_ omitted) or the match array, a list of (m,n)
 565         //     entries with m and n being first and last+1 positions in the
 566         //     field 3 string, or NULL if REG_NOSUB is in effect and success
 567         //     is expected. BADPAT is acceptable in place of any regcomp(3)
 568         //     error code. The match[] array is initialized to (-2,-2) before
 569         //     each test. All array elements from 0 to nmatch-1 must be specified
 570         //     in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
 571         //     Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
 572         //     matched (?{...}) expression, where x is the text enclosed by {...},
 573         //     o is the expression ordinal counting from 1, and n is the length of
 574         //     the unmatched portion of the subject string. If x starts with a
 575         //     number then that is the return value of re_execf(), otherwise 0 is
 576         //     returned.
 577         switch {
 578         case s == "":
 579                 // Match with no position information.
 580                 ok = true
 581                 compiled = true
 582                 matched = true
 583                 return
 584         case s == "NOMATCH":
 585                 // Match failure.
 586                 ok = true
 587                 compiled = true
 588                 matched = false
 589                 return
 590         case 'A' <= s[0] && s[0] <= 'Z':
 591                 // All the other error codes are compile errors.
 592                 ok = true
 593                 compiled = false
 594                 return
 595         }
 596         compiled = true
 597
 598         var x []int
 599         for s != "" {
 600                 var end byte = ')'
 601                 if len(x)%2 == 0 {
 602                         if s[0] != '(' {
 603                                 ok = false
 604                                 return
 605                         }
 606                         s = s[1:]
 607                         end = ','
 608                 }
 609                 i := 0
 610                 for i < len(s) && s[i] != end {
 611                         i++
 612                 }
 613                 if i == 0 || i == len(s) {
 614                         ok = false
 615                         return
 616                 }
 617                 var v = -1
 618                 var err error
 619                 if s[:i] != "?" {
 620                         v, err = strconv.Atoi(s[:i])
 621                         if err != nil {
 622                                 ok = false
 623                                 return
 624                         }
 625                 }
 626                 x = append(x, v)
 627                 s = s[i+1:]
 628         }
 629         if len(x)%2 != 0 {
 630                 ok = false
 631                 return
 632         }
 633         ok = true
 634         matched = true
 635         pos = x
 636         return
 637 }
 638
 639 var text []byte
 640
 641 func makeText(n int) []byte {
 642         if len(text) >= n {
 643                 return text[:n]
 644         }
 645         text = make([]byte, n)
 646         x := ^uint32(0)
 647         for i := range text {
 648                 x += x
 649                 x ^= 1
 650                 if int32(x) < 0 {
 651                         x ^= 0x88888eef
 652                 }
 653                 if x%31 == 0 {
 654                         text[i] = '\n'
 655                 } else {
 656                         text[i] = byte(x%(0x7E+1-0x20) + 0x20)
 657                 }
 658         }
 659         return text
 660 }
 661
 662 func BenchmarkMatch(b *testing.B) {
 663         isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
 664
 665         for _, data := range benchData {
 666                 r := MustCompile(data.re)
 667                 for _, size := range benchSizes {
 668                         if isRaceBuilder && size.n > 1<<10 {
 669                                 continue
 670                         }
 671                         t := makeText(size.n)
 672                         b.Run(data.name+"/"+size.name, func(b *testing.B) {
 673                                 b.SetBytes(int64(size.n))
 674                                 for i := 0; i < b.N; i++ {
 675                                         if r.Match(t) {
 676                                                 b.Fatal("match!")
 677                                         }
 678                                 }
 679                         })
 680                 }
 681         }
 682 }
 683
 684 func BenchmarkMatch_onepass_regex(b *testing.B) {
 685         isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
 686         r := MustCompile(`(?s)\A.*\z`)
 687         if r.get().op == notOnePass {
 688                 b.Fatalf("want onepass regex, but %q is not onepass", r)
 689         }
 690         for _, size := range benchSizes {
 691                 if isRaceBuilder && size.n > 1<<10 {
 692                         continue
 693                 }
 694                 t := makeText(size.n)
 695                 bs := make([][]byte, len(t))
 696                 for i, s := range t {
 697                         bs[i] = []byte{s}
 698                 }
 699                 b.Run(size.name, func(b *testing.B) {
 700                         b.SetBytes(int64(size.n))
 701                         b.ReportAllocs()
 702                         for i := 0; i < b.N; i++ {
 703                                 for _, byts := range bs {
 704                                         if !r.Match(byts) {
 705                                                 b.Fatal("not match!")
 706                                         }
 707                                 }
 708                         }
 709                 })
 710         }
 711 }
 712
 713 var benchData = []struct{ name, re string }{
 714         {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
 715         {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
 716         {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
 717         {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
 718         {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
 719         {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
 720 }
 721
 722 var benchSizes = []struct {
 723         name string
 724         n    int
 725 }{
 726         {"32", 32},
 727         {"1K", 1 << 10},
 728         {"32K", 32 << 10},
 729         {"1M", 1 << 20},
 730         {"32M", 32 << 20},
 731 }
 732
 733 func TestLongest(t *testing.T) {
 734         re, err := Compile(`a(|b)`)
 735         if err != nil {
 736                 t.Fatal(err)
 737         }
 738         if g, w := re.FindString("ab"), "a"; g != w {
 739                 t.Errorf("first match was %q, want %q", g, w)
 740         }
 741         re.Longest()
 742         if g, w := re.FindString("ab"), "ab"; g != w {
 743                 t.Errorf("longest match was %q, want %q", g, w)
 744         }
 745 }
 746
 747 // TestProgramTooLongForBacktrack tests that a regex which is too long
 748 // for the backtracker still executes properly.
 749 func TestProgramTooLongForBacktrack(t *testing.T) {
 750         longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
 751         if !longRegex.MatchString("two") {
 752                 t.Errorf("longRegex.MatchString(\"two\") was false, want true")
 753         }
 754         if longRegex.MatchString("xxx") {
 755                 t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
 756         }
 757 }