libgo/go/regexp/exec_test.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package regexp_test
   6
   7 import (
   8         . "regexp"
   9
  10         "bufio"
  11         "compress/bzip2"
  12         "fmt"
  13         "io"
  14         "math/rand"
  15         "os"
  16         "path/filepath"
  17         "regexp/syntax"
  18         "strconv"
  19         "strings"
  20         "testing"
  21         "unicode/utf8"
  22 )
  23
  24 // TestRE2 tests this package's regexp API against test cases
  25 // considered during RE2's exhaustive tests, which run all possible
  26 // regexps over a given set of atoms and operators, up to a given
  27 // complexity, over all possible strings over a given alphabet,
  28 // up to a given size.  Rather than try to link with RE2, we read a
  29 // log file containing the test cases and the expected matches.
  30 // The log file, re2.txt, is generated by running 'make exhaustive-log'
  31 // in the open source RE2 distribution.  http://code.google.com/p/re2/
  32 //
  33 // The test file format is a sequence of stanzas like:
  34 //
  35 //      strings
  36 //      "abc"
  37 //      "123x"
  38 //      regexps
  39 //      "[a-z]+"
  40 //      0-3;0-3
  41 //      -;-
  42 //      "([0-9])([0-9])([0-9])"
  43 //      -;-
  44 //      -;0-3 0-1 1-2 2-3
  45 //
  46 // The stanza begins by defining a set of strings, quoted
  47 // using Go double-quote syntax, one per line.  Then the
  48 // regexps section gives a sequence of regexps to run on
  49 // the strings.  In the block that follows a regexp, each line
  50 // gives the semicolon-separated match results of running
  51 // the regexp on the corresponding string.
  52 // Each match result is either a single -, meaning no match, or a
  53 // space-separated sequence of pairs giving the match and
  54 // submatch indices.  An unmatched subexpression formats
  55 // its pair as a single - (not illustrated above).  For now
  56 // each regexp run produces two match results, one for a
  57 // ``full match'' that restricts the regexp to matching the entire
  58 // string or nothing, and one for a ``partial match'' that gives
  59 // the leftmost first match found in the string.
  60 //
  61 // Lines beginning with # are comments.  Lines beginning with
  62 // a capital letter are test names printed during RE2's test suite
  63 // and are echoed into t but otherwise ignored.
  64 //
  65 // At time of writing, re2.txt is 32 MB but compresses to 760 kB,
  66 // so we store re2.txt.gz in the repository and decompress it on the fly.
  67 //
  68 func TestRE2Search(t *testing.T) {
  69         testRE2(t, "testdata/re2-search.txt")
  70 }
  71
  72 func TestRE2Exhaustive(t *testing.T) {
  73         if testing.Short() {
  74                 t.Log("skipping TestRE2Exhaustive during short test")
  75                 return
  76         }
  77         testRE2(t, "testdata/re2-exhaustive.txt.bz2")
  78 }
  79
  80 func testRE2(t *testing.T, file string) {
  81         f, err := os.Open(file)
  82         if err != nil {
  83                 t.Fatal(err)
  84         }
  85         defer f.Close()
  86         var txt io.Reader
  87         if strings.HasSuffix(file, ".bz2") {
  88                 z := bzip2.NewReader(f)
  89                 txt = z
  90                 file = file[:len(file)-len(".bz2")] // for error messages
  91         } else {
  92                 txt = f
  93         }
  94         lineno := 0
  95         r := bufio.NewReader(txt)
  96         var (
  97                 str       []string
  98                 input     []string
  99                 inStrings bool
 100                 re        *Regexp
 101                 refull    *Regexp
 102                 nfail     int
 103                 ncase     int
 104         )
 105         for {
 106                 line, err := r.ReadString('\n')
 107                 if err != nil {
 108                         if err == io.EOF {
 109                                 break
 110                         }
 111                         t.Fatalf("%s:%d: %v", file, lineno, err)
 112                 }
 113                 line = line[:len(line)-1] // chop \n
 114                 lineno++
 115                 switch {
 116                 case line == "":
 117                         t.Fatalf("%s:%d: unexpected blank line", file, lineno)
 118                 case line[0] == '#':
 119                         continue
 120                 case 'A' <= line[0] && line[0] <= 'Z':
 121                         // Test name.
 122                         t.Logf("%s\n", line)
 123                         continue
 124                 case line == "strings":
 125                         str = str[:0]
 126                         inStrings = true
 127                 case line == "regexps":
 128                         inStrings = false
 129                 case line[0] == '"':
 130                         q, err := strconv.Unquote(line)
 131                         if err != nil {
 132                                 // Fatal because we'll get out of sync.
 133                                 t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
 134                         }
 135                         if inStrings {
 136                                 str = append(str, q)
 137                                 continue
 138                         }
 139                         // Is a regexp.
 140                         if len(input) != 0 {
 141                                 t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
 142                         }
 143                         re, err = tryCompile(q)
 144                         if err != nil {
 145                                 if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
 146                                         // We don't and likely never will support \C; keep going.
 147                                         continue
 148                                 }
 149                                 t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
 150                                 if nfail++; nfail >= 100 {
 151                                         t.Fatalf("stopping after %d errors", nfail)
 152                                 }
 153                                 continue
 154                         }
 155                         full := `\A(?:` + q + `)\z`
 156                         refull, err = tryCompile(full)
 157                         if err != nil {
 158                                 // Fatal because q worked, so this should always work.
 159                                 t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
 160                         }
 161                         input = str
 162                 case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
 163                         // A sequence of match results.
 164                         ncase++
 165                         if re == nil {
 166                                 // Failed to compile: skip results.
 167                                 continue
 168                         }
 169                         if len(input) == 0 {
 170                                 t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
 171                         }
 172                         var text string
 173                         text, input = input[0], input[1:]
 174                         if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
 175                                 // RE2's \B considers every byte position,
 176                                 // so it sees 'not word boundary' in the
 177                                 // middle of UTF-8 sequences.  This package
 178                                 // only considers the positions between runes,
 179                                 // so it disagrees.  Skip those cases.
 180                                 continue
 181                         }
 182                         res := strings.Split(line, ";")
 183                         if len(res) != len(run) {
 184                                 t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
 185                         }
 186                         for i := range res {
 187                                 have, suffix := run[i](re, refull, text)
 188                                 want := parseResult(t, file, lineno, res[i])
 189                                 if !same(have, want) {
 190                                         t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
 191                                         if nfail++; nfail >= 100 {
 192                                                 t.Fatalf("stopping after %d errors", nfail)
 193                                         }
 194                                         continue
 195                                 }
 196                                 b, suffix := match[i](re, refull, text)
 197                                 if b != (want != nil) {
 198                                         t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
 199                                         if nfail++; nfail >= 100 {
 200                                                 t.Fatalf("stopping after %d errors", nfail)
 201                                         }
 202                                         continue
 203                                 }
 204                         }
 205
 206                 default:
 207                         t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
 208                 }
 209         }
 210         if len(input) != 0 {
 211                 t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
 212         }
 213         t.Logf("%d cases tested", ncase)
 214 }
 215
 216 var run = []func(*Regexp, *Regexp, string) ([]int, string){
 217         runFull,
 218         runPartial,
 219         runFullLongest,
 220         runPartialLongest,
 221 }
 222
 223 func runFull(re, refull *Regexp, text string) ([]int, string) {
 224         refull.SetLongest(false)
 225         return refull.FindStringSubmatchIndex(text), "[full]"
 226 }
 227
 228 func runPartial(re, refull *Regexp, text string) ([]int, string) {
 229         re.SetLongest(false)
 230         return re.FindStringSubmatchIndex(text), ""
 231 }
 232
 233 func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
 234         refull.SetLongest(true)
 235         return refull.FindStringSubmatchIndex(text), "[full,longest]"
 236 }
 237
 238 func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
 239         re.SetLongest(true)
 240         return re.FindStringSubmatchIndex(text), "[longest]"
 241 }
 242
 243 var match = []func(*Regexp, *Regexp, string) (bool, string){
 244         matchFull,
 245         matchPartial,
 246         matchFullLongest,
 247         matchPartialLongest,
 248 }
 249
 250 func matchFull(re, refull *Regexp, text string) (bool, string) {
 251         refull.SetLongest(false)
 252         return refull.MatchString(text), "[full]"
 253 }
 254
 255 func matchPartial(re, refull *Regexp, text string) (bool, string) {
 256         re.SetLongest(false)
 257         return re.MatchString(text), ""
 258 }
 259
 260 func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
 261         refull.SetLongest(true)
 262         return refull.MatchString(text), "[full,longest]"
 263 }
 264
 265 func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
 266         re.SetLongest(true)
 267         return re.MatchString(text), "[longest]"
 268 }
 269
 270 func isSingleBytes(s string) bool {
 271         for _, c := range s {
 272                 if c >= utf8.RuneSelf {
 273                         return false
 274                 }
 275         }
 276         return true
 277 }
 278
 279 func tryCompile(s string) (re *Regexp, err error) {
 280         // Protect against panic during Compile.
 281         defer func() {
 282                 if r := recover(); r != nil {
 283                         err = fmt.Errorf("panic: %v", r)
 284                 }
 285         }()
 286         return Compile(s)
 287 }
 288
 289 func parseResult(t *testing.T, file string, lineno int, res string) []int {
 290         // A single - indicates no match.
 291         if res == "-" {
 292                 return nil
 293         }
 294         // Otherwise, a space-separated list of pairs.
 295         n := 1
 296         for j := 0; j < len(res); j++ {
 297                 if res[j] == ' ' {
 298                         n++
 299                 }
 300         }
 301         out := make([]int, 2*n)
 302         i := 0
 303         n = 0
 304         for j := 0; j <= len(res); j++ {
 305                 if j == len(res) || res[j] == ' ' {
 306                         // Process a single pair.  - means no submatch.
 307                         pair := res[i:j]
 308                         if pair == "-" {
 309                                 out[n] = -1
 310                                 out[n+1] = -1
 311                         } else {
 312                                 k := strings.Index(pair, "-")
 313                                 if k < 0 {
 314                                         t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 315                                 }
 316                                 lo, err1 := strconv.Atoi(pair[:k])
 317                                 hi, err2 := strconv.Atoi(pair[k+1:])
 318                                 if err1 != nil || err2 != nil || lo > hi {
 319                                         t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 320                                 }
 321                                 out[n] = lo
 322                                 out[n+1] = hi
 323                         }
 324                         n += 2
 325                         i = j + 1
 326                 }
 327         }
 328         return out
 329 }
 330
 331 func same(x, y []int) bool {
 332         if len(x) != len(y) {
 333                 return false
 334         }
 335         for i, xi := range x {
 336                 if xi != y[i] {
 337                         return false
 338                 }
 339         }
 340         return true
 341 }
 342
 343 // TestFowler runs this package's regexp API against the
 344 // POSIX regular expression tests collected by Glenn Fowler
 345 // at http://www2.research.att.com/~gsf/testregex/.
 346 func TestFowler(t *testing.T) {
 347         files, err := filepath.Glob("testdata/*.dat")
 348         if err != nil {
 349                 t.Fatal(err)
 350         }
 351         for _, file := range files {
 352                 t.Log(file)
 353                 testFowler(t, file)
 354         }
 355 }
 356
 357 var notab = MustCompilePOSIX(`[^\t]+`)
 358
 359 func testFowler(t *testing.T, file string) {
 360         f, err := os.Open(file)
 361         if err != nil {
 362                 t.Error(err)
 363                 return
 364         }
 365         defer f.Close()
 366         b := bufio.NewReader(f)
 367         lineno := 0
 368         lastRegexp := ""
 369 Reading:
 370         for {
 371                 lineno++
 372                 line, err := b.ReadString('\n')
 373                 if err != nil {
 374                         if err != io.EOF {
 375                                 t.Errorf("%s:%d: %v", file, lineno, err)
 376                         }
 377                         break Reading
 378                 }
 379
 380                 // http://www2.research.att.com/~gsf/man/man1/testregex.html
 381                 //
 382                 // INPUT FORMAT
 383                 //   Input lines may be blank, a comment beginning with #, or a test
 384                 //   specification. A specification is five fields separated by one
 385                 //   or more tabs. NULL denotes the empty string and NIL denotes the
 386                 //   0 pointer.
 387                 if line[0] == '#' || line[0] == '\n' {
 388                         continue Reading
 389                 }
 390                 line = line[:len(line)-1]
 391                 field := notab.FindAllString(line, -1)
 392                 for i, f := range field {
 393                         if f == "NULL" {
 394                                 field[i] = ""
 395                         }
 396                         if f == "NIL" {
 397                                 t.Logf("%s:%d: skip: %s", file, lineno, line)
 398                                 continue Reading
 399                         }
 400                 }
 401                 if len(field) == 0 {
 402                         continue Reading
 403                 }
 404
 405                 //   Field 1: the regex(3) flags to apply, one character per REG_feature
 406                 //   flag. The test is skipped if REG_feature is not supported by the
 407                 //   implementation. If the first character is not [BEASKLP] then the
 408                 //   specification is a global control line. One or more of [BEASKLP] may be
 409                 //   specified; the test will be repeated for each mode.
 410                 //
 411                 //     B        basic                   BRE     (grep, ed, sed)
 412                 //     E        REG_EXTENDED            ERE     (egrep)
 413                 //     A        REG_AUGMENTED           ARE     (egrep with negation)
 414                 //     S        REG_SHELL               SRE     (sh glob)
 415                 //     K        REG_SHELL|REG_AUGMENTED KRE     (ksh glob)
 416                 //     L        REG_LITERAL             LRE     (fgrep)
 417                 //
 418                 //     a        REG_LEFT|REG_RIGHT      implicit ^...$
 419                 //     b        REG_NOTBOL              lhs does not match ^
 420                 //     c        REG_COMMENT             ignore space and #...\n
 421                 //     d        REG_SHELL_DOT           explicit leading . match
 422                 //     e        REG_NOTEOL              rhs does not match $
 423                 //     f        REG_MULTIPLE            multiple \n separated patterns
 424                 //     g        FNM_LEADING_DIR         testfnmatch only -- match until /
 425                 //     h        REG_MULTIREF            multiple digit backref
 426                 //     i        REG_ICASE               ignore case
 427                 //     j        REG_SPAN                . matches \n
 428                 //     k        REG_ESCAPE              \ to ecape [...] delimiter
 429                 //     l        REG_LEFT                implicit ^...
 430                 //     m        REG_MINIMAL             minimal match
 431                 //     n        REG_NEWLINE             explicit \n match
 432                 //     o        REG_ENCLOSED            (|&) magic inside [@|&](...)
 433                 //     p        REG_SHELL_PATH          explicit / match
 434                 //     q        REG_DELIMITED           delimited pattern
 435                 //     r        REG_RIGHT               implicit ...$
 436                 //     s        REG_SHELL_ESCAPED       \ not special
 437                 //     t        REG_MUSTDELIM           all delimiters must be specified
 438                 //     u        standard unspecified behavior -- errors not counted
 439                 //     v        REG_CLASS_ESCAPE        \ special inside [...]
 440                 //     w        REG_NOSUB               no subexpression match array
 441                 //     x        REG_LENIENT             let some errors slide
 442                 //     y        REG_LEFT                regexec() implicit ^...
 443                 //     z        REG_NULL                NULL subexpressions ok
 444                 //     $                                expand C \c escapes in fields 2 and 3
 445                 //     /                                field 2 is a regsubcomp() expression
 446                 //     =                                field 3 is a regdecomp() expression
 447                 //
 448                 //   Field 1 control lines:
 449                 //
 450                 //     C                set LC_COLLATE and LC_CTYPE to locale in field 2
 451                 //
 452                 //     ?test ...        output field 5 if passed and != EXPECTED, silent otherwise
 453                 //     &test ...        output field 5 if current and previous passed
 454                 //     |test ...        output field 5 if current passed and previous failed
 455                 //     ; ...    output field 2 if previous failed
 456                 //     {test ...        skip if failed until }
 457                 //     }                end of skip
 458                 //
 459                 //     : comment                comment copied as output NOTE
 460                 //     :comment:test    :comment: ignored
 461                 //     N[OTE] comment   comment copied as output NOTE
 462                 //     T[EST] comment   comment
 463                 //
 464                 //     number           use number for nmatch (20 by default)
 465                 flag := field[0]
 466                 switch flag[0] {
 467                 case '?', '&', '|', ';', '{', '}':
 468                         // Ignore all the control operators.
 469                         // Just run everything.
 470                         flag = flag[1:]
 471                         if flag == "" {
 472                                 continue Reading
 473                         }
 474                 case ':':
 475                         i := strings.Index(flag[1:], ":")
 476                         if i < 0 {
 477                                 t.Logf("skip: %s", line)
 478                                 continue Reading
 479                         }
 480                         flag = flag[1+i+1:]
 481                 case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 482                         t.Logf("skip: %s", line)
 483                         continue Reading
 484                 }
 485
 486                 // Can check field count now that we've handled the myriad comment formats.
 487                 if len(field) < 4 {
 488                         t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
 489                         continue Reading
 490                 }
 491
 492                 // Expand C escapes (a.k.a. Go escapes).
 493                 if strings.Contains(flag, "$") {
 494                         f := `"` + field[1] + `"`
 495                         if field[1], err = strconv.Unquote(f); err != nil {
 496                                 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
 497                         }
 498                         f = `"` + field[2] + `"`
 499                         if field[2], err = strconv.Unquote(f); err != nil {
 500                                 t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
 501                         }
 502                 }
 503
 504                 //   Field 2: the regular expression pattern; SAME uses the pattern from
 505                 //     the previous specification.
 506                 //
 507                 if field[1] == "SAME" {
 508                         field[1] = lastRegexp
 509                 }
 510                 lastRegexp = field[1]
 511
 512                 //   Field 3: the string to match.
 513                 text := field[2]
 514
 515                 //   Field 4: the test outcome...
 516                 ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
 517                 if !ok {
 518                         t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
 519                         continue Reading
 520                 }
 521
 522                 //   Field 5: optional comment appended to the report.
 523
 524         Testing:
 525                 // Run test once for each specified capital letter mode that we support.
 526                 for _, c := range flag {
 527                         pattern := field[1]
 528                         syn := syntax.POSIX | syntax.ClassNL
 529                         switch c {
 530                         default:
 531                                 continue Testing
 532                         case 'E':
 533                                 // extended regexp (what we support)
 534                         case 'L':
 535                                 // literal
 536                                 pattern = QuoteMeta(pattern)
 537                         }
 538
 539                         for _, c := range flag {
 540                                 switch c {
 541                                 case 'i':
 542                                         syn |= syntax.FoldCase
 543                                 }
 544                         }
 545
 546                         re, err := CompileInternal(pattern, syn, true)
 547                         if err != nil {
 548                                 if shouldCompile {
 549                                         t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
 550                                 }
 551                                 continue Testing
 552                         }
 553                         if !shouldCompile {
 554                                 t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
 555                                 continue Testing
 556                         }
 557                         match := re.MatchString(text)
 558                         if match != shouldMatch {
 559                                 t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
 560                                 continue Testing
 561                         }
 562                         have := re.FindStringSubmatchIndex(text)
 563                         if (len(have) > 0) != match {
 564                                 t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
 565                                 continue Testing
 566                         }
 567                         if len(have) > len(pos) {
 568                                 have = have[:len(pos)]
 569                         }
 570                         if !same(have, pos) {
 571                                 t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
 572                         }
 573                 }
 574         }
 575 }
 576
 577 func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
 578         //   Field 4: the test outcome. This is either one of the posix error
 579         //     codes (with REG_ omitted) or the match array, a list of (m,n)
 580         //     entries with m and n being first and last+1 positions in the
 581         //     field 3 string, or NULL if REG_NOSUB is in effect and success
 582         //     is expected. BADPAT is acceptable in place of any regcomp(3)
 583         //     error code. The match[] array is initialized to (-2,-2) before
 584         //     each test. All array elements from 0 to nmatch-1 must be specified
 585         //     in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
 586         //     Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
 587         //     matched (?{...}) expression, where x is the text enclosed by {...},
 588         //     o is the expression ordinal counting from 1, and n is the length of
 589         //     the unmatched portion of the subject string. If x starts with a
 590         //     number then that is the return value of re_execf(), otherwise 0 is
 591         //     returned.
 592         switch {
 593         case s == "":
 594                 // Match with no position information.
 595                 ok = true
 596                 compiled = true
 597                 matched = true
 598                 return
 599         case s == "NOMATCH":
 600                 // Match failure.
 601                 ok = true
 602                 compiled = true
 603                 matched = false
 604                 return
 605         case 'A' <= s[0] && s[0] <= 'Z':
 606                 // All the other error codes are compile errors.
 607                 ok = true
 608                 compiled = false
 609                 return
 610         }
 611         compiled = true
 612
 613         var x []int
 614         for s != "" {
 615                 var end byte = ')'
 616                 if len(x)%2 == 0 {
 617                         if s[0] != '(' {
 618                                 ok = false
 619                                 return
 620                         }
 621                         s = s[1:]
 622                         end = ','
 623                 }
 624                 i := 0
 625                 for i < len(s) && s[i] != end {
 626                         i++
 627                 }
 628                 if i == 0 || i == len(s) {
 629                         ok = false
 630                         return
 631                 }
 632                 var v = -1
 633                 var err error
 634                 if s[:i] != "?" {
 635                         v, err = strconv.Atoi(s[:i])
 636                         if err != nil {
 637                                 ok = false
 638                                 return
 639                         }
 640                 }
 641                 x = append(x, v)
 642                 s = s[i+1:]
 643         }
 644         if len(x)%2 != 0 {
 645                 ok = false
 646                 return
 647         }
 648         ok = true
 649         matched = true
 650         pos = x
 651         return
 652 }
 653
 654 var text []byte
 655
 656 func makeText(n int) []byte {
 657         if len(text) >= n {
 658                 return text[:n]
 659         }
 660         text = make([]byte, n)
 661         for i := range text {
 662                 if rand.Intn(30) == 0 {
 663                         text[i] = '\n'
 664                 } else {
 665                         text[i] = byte(rand.Intn(0x7E+1-0x20) + 0x20)
 666                 }
 667         }
 668         return text
 669 }
 670
 671 func benchmark(b *testing.B, re string, n int) {
 672         r := MustCompile(re)
 673         t := makeText(n)
 674         b.ResetTimer()
 675         b.SetBytes(int64(n))
 676         for i := 0; i < b.N; i++ {
 677                 if r.Match(t) {
 678                         b.Fatal("match!")
 679                 }
 680         }
 681 }
 682
 683 const (
 684         easy0  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
 685         easy1  = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
 686         medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
 687         hard   = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
 688         parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
 689                 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
 690 )
 691
 692 func BenchmarkMatchEasy0_32(b *testing.B)   { benchmark(b, easy0, 32<<0) }
 693 func BenchmarkMatchEasy0_1K(b *testing.B)   { benchmark(b, easy0, 1<<10) }
 694 func BenchmarkMatchEasy0_32K(b *testing.B)  { benchmark(b, easy0, 32<<10) }
 695 func BenchmarkMatchEasy0_1M(b *testing.B)   { benchmark(b, easy0, 1<<20) }
 696 func BenchmarkMatchEasy0_32M(b *testing.B)  { benchmark(b, easy0, 32<<20) }
 697 func BenchmarkMatchEasy1_32(b *testing.B)   { benchmark(b, easy1, 32<<0) }
 698 func BenchmarkMatchEasy1_1K(b *testing.B)   { benchmark(b, easy1, 1<<10) }
 699 func BenchmarkMatchEasy1_32K(b *testing.B)  { benchmark(b, easy1, 32<<10) }
 700 func BenchmarkMatchEasy1_1M(b *testing.B)   { benchmark(b, easy1, 1<<20) }
 701 func BenchmarkMatchEasy1_32M(b *testing.B)  { benchmark(b, easy1, 32<<20) }
 702 func BenchmarkMatchMedium_32(b *testing.B)  { benchmark(b, medium, 1<<0) }
 703 func BenchmarkMatchMedium_1K(b *testing.B)  { benchmark(b, medium, 1<<10) }
 704 func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
 705 func BenchmarkMatchMedium_1M(b *testing.B)  { benchmark(b, medium, 1<<20) }
 706 func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
 707 func BenchmarkMatchHard_32(b *testing.B)    { benchmark(b, hard, 32<<0) }
 708 func BenchmarkMatchHard_1K(b *testing.B)    { benchmark(b, hard, 1<<10) }
 709 func BenchmarkMatchHard_32K(b *testing.B)   { benchmark(b, hard, 32<<10) }
 710 func BenchmarkMatchHard_1M(b *testing.B)    { benchmark(b, hard, 1<<20) }
 711 func BenchmarkMatchHard_32M(b *testing.B)   { benchmark(b, hard, 32<<20) }