libgo/go/regexp/syntax/parse_test.go

   1 // Copyright 2011 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package syntax
   6
   7 import (
   8         "fmt"
   9         "strings"
  10         "testing"
  11         "unicode"
  12 )
  13
  14 type parseTest struct {
  15         Regexp string
  16         Dump   string
  17 }
  18
  19 var parseTests = []parseTest{
  20         // Base cases
  21         {`a`, `lit{a}`},
  22         {`a.`, `cat{lit{a}dot{}}`},
  23         {`a.b`, `cat{lit{a}dot{}lit{b}}`},
  24         {`ab`, `str{ab}`},
  25         {`a.b.c`, `cat{lit{a}dot{}lit{b}dot{}lit{c}}`},
  26         {`abc`, `str{abc}`},
  27         {`a|^`, `alt{lit{a}bol{}}`},
  28         {`a|b`, `cc{0x61-0x62}`},
  29         {`(a)`, `cap{lit{a}}`},
  30         {`(a)|b`, `alt{cap{lit{a}}lit{b}}`},
  31         {`a*`, `star{lit{a}}`},
  32         {`a+`, `plus{lit{a}}`},
  33         {`a?`, `que{lit{a}}`},
  34         {`a{2}`, `rep{2,2 lit{a}}`},
  35         {`a{2,3}`, `rep{2,3 lit{a}}`},
  36         {`a{2,}`, `rep{2,-1 lit{a}}`},
  37         {`a*?`, `nstar{lit{a}}`},
  38         {`a+?`, `nplus{lit{a}}`},
  39         {`a??`, `nque{lit{a}}`},
  40         {`a{2}?`, `nrep{2,2 lit{a}}`},
  41         {`a{2,3}?`, `nrep{2,3 lit{a}}`},
  42         {`a{2,}?`, `nrep{2,-1 lit{a}}`},
  43         // Malformed { } are treated as literals.
  44         {`x{1001`, `str{x{1001}`},
  45         {`x{9876543210`, `str{x{9876543210}`},
  46         {`x{9876543210,`, `str{x{9876543210,}`},
  47         {`x{2,1`, `str{x{2,1}`},
  48         {`x{1,9876543210`, `str{x{1,9876543210}`},
  49         {``, `emp{}`},
  50         {`|`, `emp{}`}, // alt{emp{}emp{}} but got factored
  51         {`|x|`, `alt{emp{}lit{x}emp{}}`},
  52         {`.`, `dot{}`},
  53         {`^`, `bol{}`},
  54         {`$`, `eol{}`},
  55         {`\|`, `lit{|}`},
  56         {`\(`, `lit{(}`},
  57         {`\)`, `lit{)}`},
  58         {`\*`, `lit{*}`},
  59         {`\+`, `lit{+}`},
  60         {`\?`, `lit{?}`},
  61         {`{`, `lit{{}`},
  62         {`}`, `lit{}}`},
  63         {`\.`, `lit{.}`},
  64         {`\^`, `lit{^}`},
  65         {`\$`, `lit{$}`},
  66         {`\\`, `lit{\}`},
  67         {`[ace]`, `cc{0x61 0x63 0x65}`},
  68         {`[abc]`, `cc{0x61-0x63}`},
  69         {`[a-z]`, `cc{0x61-0x7a}`},
  70         {`[a]`, `lit{a}`},
  71         {`\-`, `lit{-}`},
  72         {`-`, `lit{-}`},
  73         {`\_`, `lit{_}`},
  74         {`abc`, `str{abc}`},
  75         {`abc|def`, `alt{str{abc}str{def}}`},
  76         {`abc|def|ghi`, `alt{str{abc}str{def}str{ghi}}`},
  77
  78         // Posix and Perl extensions
  79         {`[[:lower:]]`, `cc{0x61-0x7a}`},
  80         {`[a-z]`, `cc{0x61-0x7a}`},
  81         {`[^[:lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
  82         {`[[:^lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
  83         {`(?i)[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
  84         {`(?i)[a-z]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
  85         {`(?i)[^[:lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
  86         {`(?i)[[:^lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
  87         {`\d`, `cc{0x30-0x39}`},
  88         {`\D`, `cc{0x0-0x2f 0x3a-0x10ffff}`},
  89         {`\s`, `cc{0x9-0xa 0xc-0xd 0x20}`},
  90         {`\S`, `cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}`},
  91         {`\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}`},
  92         {`\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}`},
  93         {`(?i)\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}`},
  94         {`(?i)\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
  95         {`[^\\]`, `cc{0x0-0x5b 0x5d-0x10ffff}`},
  96         //      { `\C`, `byte{}` },  // probably never
  97
  98         // Unicode, negatives, and a double negative.
  99         {`\p{Braille}`, `cc{0x2800-0x28ff}`},
 100         {`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
 101         {`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
 102         {`\P{^Braille}`, `cc{0x2800-0x28ff}`},
 103         {`\pZ`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
 104         {`[\p{Braille}]`, `cc{0x2800-0x28ff}`},
 105         {`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
 106         {`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
 107         {`[\P{^Braille}]`, `cc{0x2800-0x28ff}`},
 108         {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
 109         {`\p{Lu}`, mkCharClass(unicode.IsUpper)},
 110         {`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
 111         {`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
 112         {`\p{Any}`, `dot{}`},
 113         {`\p{^Any}`, `cc{}`},
 114
 115         // Hex, octal.
 116         {`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`},
 117         {`[\x{41}-\x7a]\x61`, `cat{cc{0x41-0x7a}lit{a}}`},
 118
 119         // More interesting regular expressions.
 120         {`a{,2}`, `str{a{,2}}`},
 121         {`\.\^\$\\`, `str{.^$\}`},
 122         {`[a-zABC]`, `cc{0x41-0x43 0x61-0x7a}`},
 123         {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
 124         {`[α-ε☺]`, `cc{0x3b1-0x3b5 0x263a}`}, // utf-8
 125         {`a*{`, `cat{star{lit{a}}lit{{}}`},
 126
 127         // Test precedences
 128         {`(?:ab)*`, `star{str{ab}}`},
 129         {`(ab)*`, `star{cap{str{ab}}}`},
 130         {`ab|cd`, `alt{str{ab}str{cd}}`},
 131         {`a(b|c)d`, `cat{lit{a}cap{cc{0x62-0x63}}lit{d}}`},
 132
 133         // Test flattening.
 134         {`(?:a)`, `lit{a}`},
 135         {`(?:ab)(?:cd)`, `str{abcd}`},
 136         {`(?:a+b+)(?:c+d+)`, `cat{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
 137         {`(?:a+|b+)|(?:c+|d+)`, `alt{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
 138         {`(?:a|b)|(?:c|d)`, `cc{0x61-0x64}`},
 139         {`a|.`, `dot{}`},
 140         {`.|a`, `dot{}`},
 141         {`(?:[abc]|A|Z|hello|world)`, `alt{cc{0x41 0x5a 0x61-0x63}str{hello}str{world}}`},
 142         {`(?:[abc]|A|Z)`, `cc{0x41 0x5a 0x61-0x63}`},
 143
 144         // Test Perl quoted literals
 145         {`\Q+|*?{[\E`, `str{+|*?{[}`},
 146         {`\Q+\E+`, `plus{lit{+}}`},
 147         {`\Qab\E+`, `cat{lit{a}plus{lit{b}}}`},
 148         {`\Q\\E`, `lit{\}`},
 149         {`\Q\\\E`, `str{\\}`},
 150
 151         // Test Perl \A and \z
 152         {`(?m)^`, `bol{}`},
 153         {`(?m)$`, `eol{}`},
 154         {`(?-m)^`, `bot{}`},
 155         {`(?-m)$`, `eot{}`},
 156         {`(?m)\A`, `bot{}`},
 157         {`(?m)\z`, `eot{\z}`},
 158         {`(?-m)\A`, `bot{}`},
 159         {`(?-m)\z`, `eot{\z}`},
 160
 161         // Test named captures
 162         {`(?P<name>a)`, `cap{name:lit{a}}`},
 163
 164         // Case-folded literals
 165         {`[Aa]`, `litfold{A}`},
 166         {`[\x{100}\x{101}]`, `litfold{Ā}`},
 167         {`[Δδ]`, `litfold{Δ}`},
 168
 169         // Strings
 170         {`abcde`, `str{abcde}`},
 171         {`[Aa][Bb]cd`, `cat{strfold{AB}str{cd}}`},
 172
 173         // Factoring.
 174         {`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`},
 175         {`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}lit{y}}cat{plus{lit{x}}lit{z}}cat{plus{lit{y}}lit{w}}}}`},
 176
 177         // Bug fixes.
 178         {`(?:.)`, `dot{}`},
 179         {`(?:x|(?:xa))`, `cat{lit{x}alt{emp{}lit{a}}}`},
 180         {`(?:.|(?:.a))`, `cat{dot{}alt{emp{}lit{a}}}`},
 181         {`(?:A(?:A|a))`, `cat{lit{A}litfold{A}}`},
 182         {`(?:A|a)`, `litfold{A}`},
 183         {`A|(?:A|a)`, `litfold{A}`},
 184         {`(?s).`, `dot{}`},
 185         {`(?-s).`, `dnl{}`},
 186         {`(?:(?:^).)`, `cat{bol{}dot{}}`},
 187         {`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`},
 188
 189         // RE2 prefix_tests
 190         {`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`},
 191         {`a(?:b)c|abd`, `cat{str{ab}cc{0x63-0x64}}`},
 192         {`abc|abd|aef|bcx|bcy`,
 193                 `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}` +
 194                         `cat{str{bc}cc{0x78-0x79}}}`},
 195         {`abc|x|abd`, `alt{str{abc}lit{x}str{abd}}`},
 196         {`(?i)abc|ABD`, `cat{strfold{AB}cc{0x43-0x44 0x63-0x64}}`},
 197         {`[ab]c|[ab]d`, `cat{cc{0x61-0x62}cc{0x63-0x64}}`},
 198         {`.c|.d`, `cat{dot{}cc{0x63-0x64}}`},
 199         {`x{2}|x{2}[0-9]`,
 200                 `cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
 201         {`x{2}y|x{2}[0-9]y`,
 202                 `cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
 203         {`a.*?c|a.*?b`,
 204                 `cat{lit{a}alt{cat{nstar{dot{}}lit{c}}cat{nstar{dot{}}lit{b}}}}`},
 205
 206         // Valid repetitions.
 207         {`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
 208         {`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
 209 }
 210
 211 const testFlags = MatchNL | PerlX | UnicodeGroups
 212
 213 func TestParseSimple(t *testing.T) {
 214         testParseDump(t, parseTests, testFlags)
 215 }
 216
 217 var foldcaseTests = []parseTest{
 218         {`AbCdE`, `strfold{ABCDE}`},
 219         {`[Aa]`, `litfold{A}`},
 220         {`a`, `litfold{A}`},
 221
 222         // 0x17F is an old English long s (looks like an f) and folds to s.
 223         // 0x212A is the Kelvin symbol and folds to k.
 224         {`A[F-g]`, `cat{litfold{A}cc{0x41-0x7a 0x17f 0x212a}}`}, // [Aa][A-z...]
 225         {`[[:upper:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
 226         {`[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
 227 }
 228
 229 func TestParseFoldCase(t *testing.T) {
 230         testParseDump(t, foldcaseTests, FoldCase)
 231 }
 232
 233 var literalTests = []parseTest{
 234         {"(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}"},
 235 }
 236
 237 func TestParseLiteral(t *testing.T) {
 238         testParseDump(t, literalTests, Literal)
 239 }
 240
 241 var matchnlTests = []parseTest{
 242         {`.`, `dot{}`},
 243         {"\n", "lit{\n}"},
 244         {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
 245         {`[a\n]`, `cc{0xa 0x61}`},
 246 }
 247
 248 func TestParseMatchNL(t *testing.T) {
 249         testParseDump(t, matchnlTests, MatchNL)
 250 }
 251
 252 var nomatchnlTests = []parseTest{
 253         {`.`, `dnl{}`},
 254         {"\n", "lit{\n}"},
 255         {`[^a]`, `cc{0x0-0x9 0xb-0x60 0x62-0x10ffff}`},
 256         {`[a\n]`, `cc{0xa 0x61}`},
 257 }
 258
 259 func TestParseNoMatchNL(t *testing.T) {
 260         testParseDump(t, nomatchnlTests, 0)
 261 }
 262
 263 // Test Parse -> Dump.
 264 func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
 265         for _, tt := range tests {
 266                 re, err := Parse(tt.Regexp, flags)
 267                 if err != nil {
 268                         t.Errorf("Parse(%#q): %v", tt.Regexp, err)
 269                         continue
 270                 }
 271                 if tt.Dump == "" {
 272                         // It parsed. That's all we care about.
 273                         continue
 274                 }
 275                 d := dump(re)
 276                 if d != tt.Dump {
 277                         t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
 278                 }
 279         }
 280 }
 281
 282 // dump prints a string representation of the regexp showing
 283 // the structure explicitly.
 284 func dump(re *Regexp) string {
 285         var b strings.Builder
 286         dumpRegexp(&b, re)
 287         return b.String()
 288 }
 289
 290 var opNames = []string{
 291         OpNoMatch:        "no",
 292         OpEmptyMatch:     "emp",
 293         OpLiteral:        "lit",
 294         OpCharClass:      "cc",
 295         OpAnyCharNotNL:   "dnl",
 296         OpAnyChar:        "dot",
 297         OpBeginLine:      "bol",
 298         OpEndLine:        "eol",
 299         OpBeginText:      "bot",
 300         OpEndText:        "eot",
 301         OpWordBoundary:   "wb",
 302         OpNoWordBoundary: "nwb",
 303         OpCapture:        "cap",
 304         OpStar:           "star",
 305         OpPlus:           "plus",
 306         OpQuest:          "que",
 307         OpRepeat:         "rep",
 308         OpConcat:         "cat",
 309         OpAlternate:      "alt",
 310 }
 311
 312 // dumpRegexp writes an encoding of the syntax tree for the regexp re to b.
 313 // It is used during testing to distinguish between parses that might print
 314 // the same using re's String method.
 315 func dumpRegexp(b *strings.Builder, re *Regexp) {
 316         if int(re.Op) >= len(opNames) || opNames[re.Op] == "" {
 317                 fmt.Fprintf(b, "op%d", re.Op)
 318         } else {
 319                 switch re.Op {
 320                 default:
 321                         b.WriteString(opNames[re.Op])
 322                 case OpStar, OpPlus, OpQuest, OpRepeat:
 323                         if re.Flags&NonGreedy != 0 {
 324                                 b.WriteByte('n')
 325                         }
 326                         b.WriteString(opNames[re.Op])
 327                 case OpLiteral:
 328                         if len(re.Rune) > 1 {
 329                                 b.WriteString("str")
 330                         } else {
 331                                 b.WriteString("lit")
 332                         }
 333                         if re.Flags&FoldCase != 0 {
 334                                 for _, r := range re.Rune {
 335                                         if unicode.SimpleFold(r) != r {
 336                                                 b.WriteString("fold")
 337                                                 break
 338                                         }
 339                                 }
 340                         }
 341                 }
 342         }
 343         b.WriteByte('{')
 344         switch re.Op {
 345         case OpEndText:
 346                 if re.Flags&WasDollar == 0 {
 347                         b.WriteString(`\z`)
 348                 }
 349         case OpLiteral:
 350                 for _, r := range re.Rune {
 351                         b.WriteRune(r)
 352                 }
 353         case OpConcat, OpAlternate:
 354                 for _, sub := range re.Sub {
 355                         dumpRegexp(b, sub)
 356                 }
 357         case OpStar, OpPlus, OpQuest:
 358                 dumpRegexp(b, re.Sub[0])
 359         case OpRepeat:
 360                 fmt.Fprintf(b, "%d,%d ", re.Min, re.Max)
 361                 dumpRegexp(b, re.Sub[0])
 362         case OpCapture:
 363                 if re.Name != "" {
 364                         b.WriteString(re.Name)
 365                         b.WriteByte(':')
 366                 }
 367                 dumpRegexp(b, re.Sub[0])
 368         case OpCharClass:
 369                 sep := ""
 370                 for i := 0; i < len(re.Rune); i += 2 {
 371                         b.WriteString(sep)
 372                         sep = " "
 373                         lo, hi := re.Rune[i], re.Rune[i+1]
 374                         if lo == hi {
 375                                 fmt.Fprintf(b, "%#x", lo)
 376                         } else {
 377                                 fmt.Fprintf(b, "%#x-%#x", lo, hi)
 378                         }
 379                 }
 380         }
 381         b.WriteByte('}')
 382 }
 383
 384 func mkCharClass(f func(rune) bool) string {
 385         re := &Regexp{Op: OpCharClass}
 386         lo := rune(-1)
 387         for i := rune(0); i <= unicode.MaxRune; i++ {
 388                 if f(i) {
 389                         if lo < 0 {
 390                                 lo = i
 391                         }
 392                 } else {
 393                         if lo >= 0 {
 394                                 re.Rune = append(re.Rune, lo, i-1)
 395                                 lo = -1
 396                         }
 397                 }
 398         }
 399         if lo >= 0 {
 400                 re.Rune = append(re.Rune, lo, unicode.MaxRune)
 401         }
 402         return dump(re)
 403 }
 404
 405 func isUpperFold(r rune) bool {
 406         if unicode.IsUpper(r) {
 407                 return true
 408         }
 409         c := unicode.SimpleFold(r)
 410         for c != r {
 411                 if unicode.IsUpper(c) {
 412                         return true
 413                 }
 414                 c = unicode.SimpleFold(c)
 415         }
 416         return false
 417 }
 418
 419 func TestFoldConstants(t *testing.T) {
 420         last := rune(-1)
 421         for i := rune(0); i <= unicode.MaxRune; i++ {
 422                 if unicode.SimpleFold(i) == i {
 423                         continue
 424                 }
 425                 if last == -1 && minFold != i {
 426                         t.Errorf("minFold=%#U should be %#U", minFold, i)
 427                 }
 428                 last = i
 429         }
 430         if maxFold != last {
 431                 t.Errorf("maxFold=%#U should be %#U", maxFold, last)
 432         }
 433 }
 434
 435 func TestAppendRangeCollapse(t *testing.T) {
 436         // AppendRange should collapse each of the new ranges
 437         // into the earlier ones (it looks back two ranges), so that
 438         // the slice never grows very large.
 439         // Note that we are not calling cleanClass.
 440         var r []rune
 441         for i := rune('A'); i <= 'Z'; i++ {
 442                 r = appendRange(r, i, i)
 443                 r = appendRange(r, i+'a'-'A', i+'a'-'A')
 444         }
 445         if string(r) != "AZaz" {
 446                 t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r))
 447         }
 448 }
 449
 450 var invalidRegexps = []string{
 451         `(`,
 452         `)`,
 453         `(a`,
 454         `a)`,
 455         `(a))`,
 456         `(a|b|`,
 457         `a|b|)`,
 458         `(a|b|))`,
 459         `(a|b`,
 460         `a|b)`,
 461         `(a|b))`,
 462         `[a-z`,
 463         `([a-z)`,
 464         `[a-z)`,
 465         `([a-z]))`,
 466         `x{1001}`,
 467         `x{9876543210}`,
 468         `x{2,1}`,
 469         `x{1,9876543210}`,
 470         "\xff", // Invalid UTF-8
 471         "[\xff]",
 472         "[\\\xff]",
 473         "\\\xff",
 474         `(?P<name>a`,
 475         `(?P<name>`,
 476         `(?P<name`,
 477         `(?P<x y>a)`,
 478         `(?P<>a)`,
 479         `[a-Z]`,
 480         `(?i)[a-Z]`,
 481         `a{100000}`,
 482         `a{100000,}`,
 483         "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
 484         `\Q\E*`,
 485 }
 486
 487 var onlyPerl = []string{
 488         `[a-b-c]`,
 489         `\Qabc\E`,
 490         `\Q*+?{[\E`,
 491         `\Q\\E`,
 492         `\Q\\\E`,
 493         `\Q\\\\E`,
 494         `\Q\\\\\E`,
 495         `(?:a)`,
 496         `(?P<name>a)`,
 497 }
 498
 499 var onlyPOSIX = []string{
 500         "a++",
 501         "a**",
 502         "a?*",
 503         "a+*",
 504         "a{1}*",
 505         ".{1}{2}.{3}",
 506 }
 507
 508 func TestParseInvalidRegexps(t *testing.T) {
 509         for _, regexp := range invalidRegexps {
 510                 if re, err := Parse(regexp, Perl); err == nil {
 511                         t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
 512                 }
 513                 if re, err := Parse(regexp, POSIX); err == nil {
 514                         t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
 515                 }
 516         }
 517         for _, regexp := range onlyPerl {
 518                 if _, err := Parse(regexp, Perl); err != nil {
 519                         t.Errorf("Parse(%#q, Perl): %v", regexp, err)
 520                 }
 521                 if re, err := Parse(regexp, POSIX); err == nil {
 522                         t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
 523                 }
 524         }
 525         for _, regexp := range onlyPOSIX {
 526                 if re, err := Parse(regexp, Perl); err == nil {
 527                         t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
 528                 }
 529                 if _, err := Parse(regexp, POSIX); err != nil {
 530                         t.Errorf("Parse(%#q, POSIX): %v", regexp, err)
 531                 }
 532         }
 533 }
 534
 535 func TestToStringEquivalentParse(t *testing.T) {
 536         for _, tt := range parseTests {
 537                 re, err := Parse(tt.Regexp, testFlags)
 538                 if err != nil {
 539                         t.Errorf("Parse(%#q): %v", tt.Regexp, err)
 540                         continue
 541                 }
 542                 if tt.Dump == "" {
 543                         // It parsed. That's all we care about.
 544                         continue
 545                 }
 546                 d := dump(re)
 547                 if d != tt.Dump {
 548                         t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
 549                         continue
 550                 }
 551
 552                 s := re.String()
 553                 if s != tt.Regexp {
 554                         // If ToString didn't return the original regexp,
 555                         // it must have found one with fewer parens.
 556                         // Unfortunately we can't check the length here, because
 557                         // ToString produces "\\{" for a literal brace,
 558                         // but "{" is a shorter equivalent in some contexts.
 559                         nre, err := Parse(s, testFlags)
 560                         if err != nil {
 561                                 t.Errorf("Parse(%#q.String() = %#q): %v", tt.Regexp, s, err)
 562                                 continue
 563                         }
 564                         nd := dump(nre)
 565                         if d != nd {
 566                                 t.Errorf("Parse(%#q) -> %#q; %#q vs %#q", tt.Regexp, s, d, nd)
 567                         }
 568
 569                         ns := nre.String()
 570                         if s != ns {
 571                                 t.Errorf("Parse(%#q) -> %#q -> %#q", tt.Regexp, s, ns)
 572                         }
 573                 }
 574         }
 575 }