libgo: update to go1.9
[official-gcc.git] / libgo / go / regexp / regexp.go
blobb1af23e85040e3c2a64361b16958ad9ef1ca8ad7
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package regexp implements regular expression search.
6 //
7 // The syntax of the regular expressions accepted is the same
8 // general syntax used by Perl, Python, and other languages.
9 // More precisely, it is the syntax accepted by RE2 and described at
10 // https://golang.org/s/re2syntax, except for \C.
11 // For an overview of the syntax, run
12 // go doc regexp/syntax
14 // The regexp implementation provided by this package is
15 // guaranteed to run in time linear in the size of the input.
16 // (This is a property not guaranteed by most open source
17 // implementations of regular expressions.) For more information
18 // about this property, see
19 // http://swtch.com/~rsc/regexp/regexp1.html
20 // or any book about automata theory.
22 // All characters are UTF-8-encoded code points.
24 // There are 16 methods of Regexp that match a regular expression and identify
25 // the matched text. Their names are matched by this regular expression:
27 // Find(All)?(String)?(Submatch)?(Index)?
29 // If 'All' is present, the routine matches successive non-overlapping
30 // matches of the entire expression. Empty matches abutting a preceding
31 // match are ignored. The return value is a slice containing the successive
32 // return values of the corresponding non-'All' routine. These routines take
33 // an extra integer argument, n; if n >= 0, the function returns at most n
34 // matches/submatches.
36 // If 'String' is present, the argument is a string; otherwise it is a slice
37 // of bytes; return values are adjusted as appropriate.
39 // If 'Submatch' is present, the return value is a slice identifying the
40 // successive submatches of the expression. Submatches are matches of
41 // parenthesized subexpressions (also known as capturing groups) within the
42 // regular expression, numbered from left to right in order of opening
43 // parenthesis. Submatch 0 is the match of the entire expression, submatch 1
44 // the match of the first parenthesized subexpression, and so on.
46 // If 'Index' is present, matches and submatches are identified by byte index
47 // pairs within the input string: result[2*n:2*n+1] identifies the indexes of
48 // the nth submatch. The pair for n==0 identifies the match of the entire
49 // expression. If 'Index' is not present, the match is identified by the
50 // text of the match/submatch. If an index is negative, it means that
51 // subexpression did not match any string in the input.
53 // There is also a subset of the methods that can be applied to text read
54 // from a RuneReader:
56 // MatchReader, FindReaderIndex, FindReaderSubmatchIndex
58 // This set may grow. Note that regular expression matches may need to
59 // examine text beyond the text returned by a match, so the methods that
60 // match text from a RuneReader may read arbitrarily far into the input
61 // before returning.
63 // (There are a few other methods that do not match this pattern.)
65 package regexp
67 import (
68 "bytes"
69 "io"
70 "regexp/syntax"
71 "strconv"
72 "strings"
73 "sync"
74 "unicode"
75 "unicode/utf8"
78 // Regexp is the representation of a compiled regular expression.
79 // A Regexp is safe for concurrent use by multiple goroutines,
80 // except for configuration methods, such as Longest.
81 type Regexp struct {
82 // read-only after Compile
83 regexpRO
85 // cache of machines for running regexp
86 mu sync.Mutex
87 machine []*machine
90 type regexpRO struct {
91 expr string // as passed to Compile
92 prog *syntax.Prog // compiled program
93 onepass *onePassProg // onepass program or nil
94 prefix string // required prefix in unanchored matches
95 prefixBytes []byte // prefix, as a []byte
96 prefixComplete bool // prefix is the entire regexp
97 prefixRune rune // first rune in prefix
98 prefixEnd uint32 // pc for last rune in prefix
99 cond syntax.EmptyOp // empty-width conditions required at start of match
100 numSubexp int
101 subexpNames []string
102 longest bool
105 // String returns the source text used to compile the regular expression.
106 func (re *Regexp) String() string {
107 return re.expr
110 // Copy returns a new Regexp object copied from re.
112 // When using a Regexp in multiple goroutines, giving each goroutine
113 // its own copy helps to avoid lock contention.
114 func (re *Regexp) Copy() *Regexp {
115 // It is not safe to copy Regexp by value
116 // since it contains a sync.Mutex.
117 return &Regexp{
118 regexpRO: re.regexpRO,
122 // Compile parses a regular expression and returns, if successful,
123 // a Regexp object that can be used to match against text.
125 // When matching against text, the regexp returns a match that
126 // begins as early as possible in the input (leftmost), and among those
127 // it chooses the one that a backtracking search would have found first.
128 // This so-called leftmost-first matching is the same semantics
129 // that Perl, Python, and other implementations use, although this
130 // package implements it without the expense of backtracking.
131 // For POSIX leftmost-longest matching, see CompilePOSIX.
132 func Compile(expr string) (*Regexp, error) {
133 return compile(expr, syntax.Perl, false)
136 // CompilePOSIX is like Compile but restricts the regular expression
137 // to POSIX ERE (egrep) syntax and changes the match semantics to
138 // leftmost-longest.
140 // That is, when matching against text, the regexp returns a match that
141 // begins as early as possible in the input (leftmost), and among those
142 // it chooses a match that is as long as possible.
143 // This so-called leftmost-longest matching is the same semantics
144 // that early regular expression implementations used and that POSIX
145 // specifies.
147 // However, there can be multiple leftmost-longest matches, with different
148 // submatch choices, and here this package diverges from POSIX.
149 // Among the possible leftmost-longest matches, this package chooses
150 // the one that a backtracking search would have found first, while POSIX
151 // specifies that the match be chosen to maximize the length of the first
152 // subexpression, then the second, and so on from left to right.
153 // The POSIX rule is computationally prohibitive and not even well-defined.
154 // See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
155 func CompilePOSIX(expr string) (*Regexp, error) {
156 return compile(expr, syntax.POSIX, true)
159 // Longest makes future searches prefer the leftmost-longest match.
160 // That is, when matching against text, the regexp returns a match that
161 // begins as early as possible in the input (leftmost), and among those
162 // it chooses a match that is as long as possible.
163 // This method modifies the Regexp and may not be called concurrently
164 // with any other methods.
165 func (re *Regexp) Longest() {
166 re.longest = true
169 func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
170 re, err := syntax.Parse(expr, mode)
171 if err != nil {
172 return nil, err
174 maxCap := re.MaxCap()
175 capNames := re.CapNames()
177 re = re.Simplify()
178 prog, err := syntax.Compile(re)
179 if err != nil {
180 return nil, err
182 regexp := &Regexp{
183 regexpRO: regexpRO{
184 expr: expr,
185 prog: prog,
186 onepass: compileOnePass(prog),
187 numSubexp: maxCap,
188 subexpNames: capNames,
189 cond: prog.StartCond(),
190 longest: longest,
193 if regexp.onepass == notOnePass {
194 regexp.prefix, regexp.prefixComplete = prog.Prefix()
195 } else {
196 regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog)
198 if regexp.prefix != "" {
199 // TODO(rsc): Remove this allocation by adding
200 // IndexString to package bytes.
201 regexp.prefixBytes = []byte(regexp.prefix)
202 regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
204 return regexp, nil
207 // get returns a machine to use for matching re.
208 // It uses the re's machine cache if possible, to avoid
209 // unnecessary allocation.
210 func (re *Regexp) get() *machine {
211 re.mu.Lock()
212 if n := len(re.machine); n > 0 {
213 z := re.machine[n-1]
214 re.machine = re.machine[:n-1]
215 re.mu.Unlock()
216 return z
218 re.mu.Unlock()
219 z := progMachine(re.prog, re.onepass)
220 z.re = re
221 return z
224 // put returns a machine to the re's machine cache.
225 // There is no attempt to limit the size of the cache, so it will
226 // grow to the maximum number of simultaneous matches
227 // run using re. (The cache empties when re gets garbage collected.)
228 func (re *Regexp) put(z *machine) {
229 re.mu.Lock()
230 re.machine = append(re.machine, z)
231 re.mu.Unlock()
234 // MustCompile is like Compile but panics if the expression cannot be parsed.
235 // It simplifies safe initialization of global variables holding compiled regular
236 // expressions.
237 func MustCompile(str string) *Regexp {
238 regexp, error := Compile(str)
239 if error != nil {
240 panic(`regexp: Compile(` + quote(str) + `): ` + error.Error())
242 return regexp
245 // MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed.
246 // It simplifies safe initialization of global variables holding compiled regular
247 // expressions.
248 func MustCompilePOSIX(str string) *Regexp {
249 regexp, error := CompilePOSIX(str)
250 if error != nil {
251 panic(`regexp: CompilePOSIX(` + quote(str) + `): ` + error.Error())
253 return regexp
256 func quote(s string) string {
257 if strconv.CanBackquote(s) {
258 return "`" + s + "`"
260 return strconv.Quote(s)
263 // NumSubexp returns the number of parenthesized subexpressions in this Regexp.
264 func (re *Regexp) NumSubexp() int {
265 return re.numSubexp
268 // SubexpNames returns the names of the parenthesized subexpressions
269 // in this Regexp. The name for the first sub-expression is names[1],
270 // so that if m is a match slice, the name for m[i] is SubexpNames()[i].
271 // Since the Regexp as a whole cannot be named, names[0] is always
272 // the empty string. The slice should not be modified.
273 func (re *Regexp) SubexpNames() []string {
274 return re.subexpNames
277 const endOfText rune = -1
279 // input abstracts different representations of the input text. It provides
280 // one-character lookahead.
281 type input interface {
282 step(pos int) (r rune, width int) // advance one rune
283 canCheckPrefix() bool // can we look ahead without losing info?
284 hasPrefix(re *Regexp) bool
285 index(re *Regexp, pos int) int
286 context(pos int) syntax.EmptyOp
289 // inputString scans a string.
290 type inputString struct {
291 str string
294 func (i *inputString) step(pos int) (rune, int) {
295 if pos < len(i.str) {
296 c := i.str[pos]
297 if c < utf8.RuneSelf {
298 return rune(c), 1
300 return utf8.DecodeRuneInString(i.str[pos:])
302 return endOfText, 0
305 func (i *inputString) canCheckPrefix() bool {
306 return true
309 func (i *inputString) hasPrefix(re *Regexp) bool {
310 return strings.HasPrefix(i.str, re.prefix)
313 func (i *inputString) index(re *Regexp, pos int) int {
314 return strings.Index(i.str[pos:], re.prefix)
317 func (i *inputString) context(pos int) syntax.EmptyOp {
318 r1, r2 := endOfText, endOfText
319 // 0 < pos && pos <= len(i.str)
320 if uint(pos-1) < uint(len(i.str)) {
321 r1 = rune(i.str[pos-1])
322 if r1 >= utf8.RuneSelf {
323 r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
326 // 0 <= pos && pos < len(i.str)
327 if uint(pos) < uint(len(i.str)) {
328 r2 = rune(i.str[pos])
329 if r2 >= utf8.RuneSelf {
330 r2, _ = utf8.DecodeRuneInString(i.str[pos:])
333 return syntax.EmptyOpContext(r1, r2)
336 // inputBytes scans a byte slice.
337 type inputBytes struct {
338 str []byte
341 func (i *inputBytes) step(pos int) (rune, int) {
342 if pos < len(i.str) {
343 c := i.str[pos]
344 if c < utf8.RuneSelf {
345 return rune(c), 1
347 return utf8.DecodeRune(i.str[pos:])
349 return endOfText, 0
352 func (i *inputBytes) canCheckPrefix() bool {
353 return true
356 func (i *inputBytes) hasPrefix(re *Regexp) bool {
357 return bytes.HasPrefix(i.str, re.prefixBytes)
360 func (i *inputBytes) index(re *Regexp, pos int) int {
361 return bytes.Index(i.str[pos:], re.prefixBytes)
364 func (i *inputBytes) context(pos int) syntax.EmptyOp {
365 r1, r2 := endOfText, endOfText
366 // 0 < pos && pos <= len(i.str)
367 if uint(pos-1) < uint(len(i.str)) {
368 r1 = rune(i.str[pos-1])
369 if r1 >= utf8.RuneSelf {
370 r1, _ = utf8.DecodeLastRune(i.str[:pos])
373 // 0 <= pos && pos < len(i.str)
374 if uint(pos) < uint(len(i.str)) {
375 r2 = rune(i.str[pos])
376 if r2 >= utf8.RuneSelf {
377 r2, _ = utf8.DecodeRune(i.str[pos:])
380 return syntax.EmptyOpContext(r1, r2)
383 // inputReader scans a RuneReader.
384 type inputReader struct {
385 r io.RuneReader
386 atEOT bool
387 pos int
390 func (i *inputReader) step(pos int) (rune, int) {
391 if !i.atEOT && pos != i.pos {
392 return endOfText, 0
395 r, w, err := i.r.ReadRune()
396 if err != nil {
397 i.atEOT = true
398 return endOfText, 0
400 i.pos += w
401 return r, w
404 func (i *inputReader) canCheckPrefix() bool {
405 return false
408 func (i *inputReader) hasPrefix(re *Regexp) bool {
409 return false
412 func (i *inputReader) index(re *Regexp, pos int) int {
413 return -1
416 func (i *inputReader) context(pos int) syntax.EmptyOp {
417 return 0
420 // LiteralPrefix returns a literal string that must begin any match
421 // of the regular expression re. It returns the boolean true if the
422 // literal string comprises the entire regular expression.
423 func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
424 return re.prefix, re.prefixComplete
427 // MatchReader reports whether the Regexp matches the text read by the
428 // RuneReader.
429 func (re *Regexp) MatchReader(r io.RuneReader) bool {
430 return re.doMatch(r, nil, "")
433 // MatchString reports whether the Regexp matches the string s.
434 func (re *Regexp) MatchString(s string) bool {
435 return re.doMatch(nil, nil, s)
438 // Match reports whether the Regexp matches the byte slice b.
439 func (re *Regexp) Match(b []byte) bool {
440 return re.doMatch(nil, b, "")
443 // MatchReader checks whether a textual regular expression matches the text
444 // read by the RuneReader. More complicated queries need to use Compile and
445 // the full Regexp interface.
446 func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) {
447 re, err := Compile(pattern)
448 if err != nil {
449 return false, err
451 return re.MatchReader(r), nil
454 // MatchString checks whether a textual regular expression
455 // matches a string. More complicated queries need
456 // to use Compile and the full Regexp interface.
457 func MatchString(pattern string, s string) (matched bool, err error) {
458 re, err := Compile(pattern)
459 if err != nil {
460 return false, err
462 return re.MatchString(s), nil
465 // Match checks whether a textual regular expression
466 // matches a byte slice. More complicated queries need
467 // to use Compile and the full Regexp interface.
468 func Match(pattern string, b []byte) (matched bool, err error) {
469 re, err := Compile(pattern)
470 if err != nil {
471 return false, err
473 return re.Match(b), nil
476 // ReplaceAllString returns a copy of src, replacing matches of the Regexp
477 // with the replacement string repl. Inside repl, $ signs are interpreted as
478 // in Expand, so for instance $1 represents the text of the first submatch.
479 func (re *Regexp) ReplaceAllString(src, repl string) string {
480 n := 2
481 if strings.Contains(repl, "$") {
482 n = 2 * (re.numSubexp + 1)
484 b := re.replaceAll(nil, src, n, func(dst []byte, match []int) []byte {
485 return re.expand(dst, repl, nil, src, match)
487 return string(b)
490 // ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp
491 // with the replacement string repl. The replacement repl is substituted directly,
492 // without using Expand.
493 func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
494 return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
495 return append(dst, repl...)
499 // ReplaceAllStringFunc returns a copy of src in which all matches of the
500 // Regexp have been replaced by the return value of function repl applied
501 // to the matched substring. The replacement returned by repl is substituted
502 // directly, without using Expand.
503 func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
504 b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte {
505 return append(dst, repl(src[match[0]:match[1]])...)
507 return string(b)
510 func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst []byte, m []int) []byte) []byte {
511 lastMatchEnd := 0 // end position of the most recent match
512 searchPos := 0 // position where we next look for a match
513 var buf []byte
514 var endPos int
515 if bsrc != nil {
516 endPos = len(bsrc)
517 } else {
518 endPos = len(src)
520 if nmatch > re.prog.NumCap {
521 nmatch = re.prog.NumCap
524 var dstCap [2]int
525 for searchPos <= endPos {
526 a := re.doExecute(nil, bsrc, src, searchPos, nmatch, dstCap[:0])
527 if len(a) == 0 {
528 break // no more matches
531 // Copy the unmatched characters before this match.
532 if bsrc != nil {
533 buf = append(buf, bsrc[lastMatchEnd:a[0]]...)
534 } else {
535 buf = append(buf, src[lastMatchEnd:a[0]]...)
538 // Now insert a copy of the replacement string, but not for a
539 // match of the empty string immediately after another match.
540 // (Otherwise, we get double replacement for patterns that
541 // match both empty and nonempty strings.)
542 if a[1] > lastMatchEnd || a[0] == 0 {
543 buf = repl(buf, a)
545 lastMatchEnd = a[1]
547 // Advance past this match; always advance at least one character.
548 var width int
549 if bsrc != nil {
550 _, width = utf8.DecodeRune(bsrc[searchPos:])
551 } else {
552 _, width = utf8.DecodeRuneInString(src[searchPos:])
554 if searchPos+width > a[1] {
555 searchPos += width
556 } else if searchPos+1 > a[1] {
557 // This clause is only needed at the end of the input
558 // string. In that case, DecodeRuneInString returns width=0.
559 searchPos++
560 } else {
561 searchPos = a[1]
565 // Copy the unmatched characters after the last match.
566 if bsrc != nil {
567 buf = append(buf, bsrc[lastMatchEnd:]...)
568 } else {
569 buf = append(buf, src[lastMatchEnd:]...)
572 return buf
575 // ReplaceAll returns a copy of src, replacing matches of the Regexp
576 // with the replacement text repl. Inside repl, $ signs are interpreted as
577 // in Expand, so for instance $1 represents the text of the first submatch.
578 func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
579 n := 2
580 if bytes.IndexByte(repl, '$') >= 0 {
581 n = 2 * (re.numSubexp + 1)
583 srepl := ""
584 b := re.replaceAll(src, "", n, func(dst []byte, match []int) []byte {
585 if len(srepl) != len(repl) {
586 srepl = string(repl)
588 return re.expand(dst, srepl, src, "", match)
590 return b
593 // ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
594 // with the replacement bytes repl. The replacement repl is substituted directly,
595 // without using Expand.
596 func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
597 return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
598 return append(dst, repl...)
602 // ReplaceAllFunc returns a copy of src in which all matches of the
603 // Regexp have been replaced by the return value of function repl applied
604 // to the matched byte slice. The replacement returned by repl is substituted
605 // directly, without using Expand.
606 func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
607 return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte {
608 return append(dst, repl(src[match[0]:match[1]])...)
612 // Bitmap used by func special to check whether a character needs to be escaped.
613 var specialBytes [16]byte
615 // special reports whether byte b needs to be escaped by QuoteMeta.
616 func special(b byte) bool {
617 return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
620 func init() {
621 for _, b := range []byte(`\.+*?()|[]{}^$`) {
622 specialBytes[b%16] |= 1 << (b / 16)
626 // QuoteMeta returns a string that quotes all regular expression metacharacters
627 // inside the argument text; the returned string is a regular expression matching
628 // the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
629 func QuoteMeta(s string) string {
630 // A byte loop is correct because all metacharacters are ASCII.
631 var i int
632 for i = 0; i < len(s); i++ {
633 if special(s[i]) {
634 break
637 // No meta characters found, so return original string.
638 if i >= len(s) {
639 return s
642 b := make([]byte, 2*len(s)-i)
643 copy(b, s[:i])
644 j := i
645 for ; i < len(s); i++ {
646 if special(s[i]) {
647 b[j] = '\\'
650 b[j] = s[i]
653 return string(b[:j])
656 // The number of capture values in the program may correspond
657 // to fewer capturing expressions than are in the regexp.
658 // For example, "(a){0}" turns into an empty program, so the
659 // maximum capture in the program is 0 but we need to return
660 // an expression for \1. Pad appends -1s to the slice a as needed.
661 func (re *Regexp) pad(a []int) []int {
662 if a == nil {
663 // No match.
664 return nil
666 n := (1 + re.numSubexp) * 2
667 for len(a) < n {
668 a = append(a, -1)
670 return a
673 // Find matches in slice b if b is non-nil, otherwise find matches in string s.
674 func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
675 var end int
676 if b == nil {
677 end = len(s)
678 } else {
679 end = len(b)
682 for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
683 matches := re.doExecute(nil, b, s, pos, re.prog.NumCap, nil)
684 if len(matches) == 0 {
685 break
688 accept := true
689 if matches[1] == pos {
690 // We've found an empty match.
691 if matches[0] == prevMatchEnd {
692 // We don't allow an empty match right
693 // after a previous match, so ignore it.
694 accept = false
696 var width int
697 // TODO: use step()
698 if b == nil {
699 _, width = utf8.DecodeRuneInString(s[pos:end])
700 } else {
701 _, width = utf8.DecodeRune(b[pos:end])
703 if width > 0 {
704 pos += width
705 } else {
706 pos = end + 1
708 } else {
709 pos = matches[1]
711 prevMatchEnd = matches[1]
713 if accept {
714 deliver(re.pad(matches))
720 // Find returns a slice holding the text of the leftmost match in b of the regular expression.
721 // A return value of nil indicates no match.
722 func (re *Regexp) Find(b []byte) []byte {
723 var dstCap [2]int
724 a := re.doExecute(nil, b, "", 0, 2, dstCap[:0])
725 if a == nil {
726 return nil
728 return b[a[0]:a[1]]
731 // FindIndex returns a two-element slice of integers defining the location of
732 // the leftmost match in b of the regular expression. The match itself is at
733 // b[loc[0]:loc[1]].
734 // A return value of nil indicates no match.
735 func (re *Regexp) FindIndex(b []byte) (loc []int) {
736 a := re.doExecute(nil, b, "", 0, 2, nil)
737 if a == nil {
738 return nil
740 return a[0:2]
743 // FindString returns a string holding the text of the leftmost match in s of the regular
744 // expression. If there is no match, the return value is an empty string,
745 // but it will also be empty if the regular expression successfully matches
746 // an empty string. Use FindStringIndex or FindStringSubmatch if it is
747 // necessary to distinguish these cases.
748 func (re *Regexp) FindString(s string) string {
749 var dstCap [2]int
750 a := re.doExecute(nil, nil, s, 0, 2, dstCap[:0])
751 if a == nil {
752 return ""
754 return s[a[0]:a[1]]
757 // FindStringIndex returns a two-element slice of integers defining the
758 // location of the leftmost match in s of the regular expression. The match
759 // itself is at s[loc[0]:loc[1]].
760 // A return value of nil indicates no match.
761 func (re *Regexp) FindStringIndex(s string) (loc []int) {
762 a := re.doExecute(nil, nil, s, 0, 2, nil)
763 if a == nil {
764 return nil
766 return a[0:2]
769 // FindReaderIndex returns a two-element slice of integers defining the
770 // location of the leftmost match of the regular expression in text read from
771 // the RuneReader. The match text was found in the input stream at
772 // byte offset loc[0] through loc[1]-1.
773 // A return value of nil indicates no match.
774 func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
775 a := re.doExecute(r, nil, "", 0, 2, nil)
776 if a == nil {
777 return nil
779 return a[0:2]
782 // FindSubmatch returns a slice of slices holding the text of the leftmost
783 // match of the regular expression in b and the matches, if any, of its
784 // subexpressions, as defined by the 'Submatch' descriptions in the package
785 // comment.
786 // A return value of nil indicates no match.
787 func (re *Regexp) FindSubmatch(b []byte) [][]byte {
788 var dstCap [4]int
789 a := re.doExecute(nil, b, "", 0, re.prog.NumCap, dstCap[:0])
790 if a == nil {
791 return nil
793 ret := make([][]byte, 1+re.numSubexp)
794 for i := range ret {
795 if 2*i < len(a) && a[2*i] >= 0 {
796 ret[i] = b[a[2*i]:a[2*i+1]]
799 return ret
802 // Expand appends template to dst and returns the result; during the
803 // append, Expand replaces variables in the template with corresponding
804 // matches drawn from src. The match slice should have been returned by
805 // FindSubmatchIndex.
807 // In the template, a variable is denoted by a substring of the form
808 // $name or ${name}, where name is a non-empty sequence of letters,
809 // digits, and underscores. A purely numeric name like $1 refers to
810 // the submatch with the corresponding index; other names refer to
811 // capturing parentheses named with the (?P<name>...) syntax. A
812 // reference to an out of range or unmatched index or a name that is not
813 // present in the regular expression is replaced with an empty slice.
815 // In the $name form, name is taken to be as long as possible: $1x is
816 // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
818 // To insert a literal $ in the output, use $$ in the template.
819 func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
820 return re.expand(dst, string(template), src, "", match)
823 // ExpandString is like Expand but the template and source are strings.
824 // It appends to and returns a byte slice in order to give the calling
825 // code control over allocation.
826 func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte {
827 return re.expand(dst, template, nil, src, match)
830 func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, match []int) []byte {
831 for len(template) > 0 {
832 i := strings.Index(template, "$")
833 if i < 0 {
834 break
836 dst = append(dst, template[:i]...)
837 template = template[i:]
838 if len(template) > 1 && template[1] == '$' {
839 // Treat $$ as $.
840 dst = append(dst, '$')
841 template = template[2:]
842 continue
844 name, num, rest, ok := extract(template)
845 if !ok {
846 // Malformed; treat $ as raw text.
847 dst = append(dst, '$')
848 template = template[1:]
849 continue
851 template = rest
852 if num >= 0 {
853 if 2*num+1 < len(match) && match[2*num] >= 0 {
854 if bsrc != nil {
855 dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
856 } else {
857 dst = append(dst, src[match[2*num]:match[2*num+1]]...)
860 } else {
861 for i, namei := range re.subexpNames {
862 if name == namei && 2*i+1 < len(match) && match[2*i] >= 0 {
863 if bsrc != nil {
864 dst = append(dst, bsrc[match[2*i]:match[2*i+1]]...)
865 } else {
866 dst = append(dst, src[match[2*i]:match[2*i+1]]...)
868 break
873 dst = append(dst, template...)
874 return dst
877 // extract returns the name from a leading "$name" or "${name}" in str.
878 // If it is a number, extract returns num set to that number; otherwise num = -1.
879 func extract(str string) (name string, num int, rest string, ok bool) {
880 if len(str) < 2 || str[0] != '$' {
881 return
883 brace := false
884 if str[1] == '{' {
885 brace = true
886 str = str[2:]
887 } else {
888 str = str[1:]
890 i := 0
891 for i < len(str) {
892 rune, size := utf8.DecodeRuneInString(str[i:])
893 if !unicode.IsLetter(rune) && !unicode.IsDigit(rune) && rune != '_' {
894 break
896 i += size
898 if i == 0 {
899 // empty name is not okay
900 return
902 name = str[:i]
903 if brace {
904 if i >= len(str) || str[i] != '}' {
905 // missing closing brace
906 return
911 // Parse number.
912 num = 0
913 for i := 0; i < len(name); i++ {
914 if name[i] < '0' || '9' < name[i] || num >= 1e8 {
915 num = -1
916 break
918 num = num*10 + int(name[i]) - '0'
920 // Disallow leading zeros.
921 if name[0] == '0' && len(name) > 1 {
922 num = -1
925 rest = str[i:]
926 ok = true
927 return
930 // FindSubmatchIndex returns a slice holding the index pairs identifying the
931 // leftmost match of the regular expression in b and the matches, if any, of
932 // its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
933 // in the package comment.
934 // A return value of nil indicates no match.
935 func (re *Regexp) FindSubmatchIndex(b []byte) []int {
936 return re.pad(re.doExecute(nil, b, "", 0, re.prog.NumCap, nil))
939 // FindStringSubmatch returns a slice of strings holding the text of the
940 // leftmost match of the regular expression in s and the matches, if any, of
941 // its subexpressions, as defined by the 'Submatch' description in the
942 // package comment.
943 // A return value of nil indicates no match.
944 func (re *Regexp) FindStringSubmatch(s string) []string {
945 var dstCap [4]int
946 a := re.doExecute(nil, nil, s, 0, re.prog.NumCap, dstCap[:0])
947 if a == nil {
948 return nil
950 ret := make([]string, 1+re.numSubexp)
951 for i := range ret {
952 if 2*i < len(a) && a[2*i] >= 0 {
953 ret[i] = s[a[2*i]:a[2*i+1]]
956 return ret
959 // FindStringSubmatchIndex returns a slice holding the index pairs
960 // identifying the leftmost match of the regular expression in s and the
961 // matches, if any, of its subexpressions, as defined by the 'Submatch' and
962 // 'Index' descriptions in the package comment.
963 // A return value of nil indicates no match.
964 func (re *Regexp) FindStringSubmatchIndex(s string) []int {
965 return re.pad(re.doExecute(nil, nil, s, 0, re.prog.NumCap, nil))
968 // FindReaderSubmatchIndex returns a slice holding the index pairs
969 // identifying the leftmost match of the regular expression of text read by
970 // the RuneReader, and the matches, if any, of its subexpressions, as defined
971 // by the 'Submatch' and 'Index' descriptions in the package comment. A
972 // return value of nil indicates no match.
973 func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
974 return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap, nil))
977 const startSize = 10 // The size at which to start a slice in the 'All' routines.
979 // FindAll is the 'All' version of Find; it returns a slice of all successive
980 // matches of the expression, as defined by the 'All' description in the
981 // package comment.
982 // A return value of nil indicates no match.
983 func (re *Regexp) FindAll(b []byte, n int) [][]byte {
984 if n < 0 {
985 n = len(b) + 1
987 result := make([][]byte, 0, startSize)
988 re.allMatches("", b, n, func(match []int) {
989 result = append(result, b[match[0]:match[1]])
991 if len(result) == 0 {
992 return nil
994 return result
997 // FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
998 // successive matches of the expression, as defined by the 'All' description
999 // in the package comment.
1000 // A return value of nil indicates no match.
1001 func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
1002 if n < 0 {
1003 n = len(b) + 1
1005 result := make([][]int, 0, startSize)
1006 re.allMatches("", b, n, func(match []int) {
1007 result = append(result, match[0:2])
1009 if len(result) == 0 {
1010 return nil
1012 return result
1015 // FindAllString is the 'All' version of FindString; it returns a slice of all
1016 // successive matches of the expression, as defined by the 'All' description
1017 // in the package comment.
1018 // A return value of nil indicates no match.
1019 func (re *Regexp) FindAllString(s string, n int) []string {
1020 if n < 0 {
1021 n = len(s) + 1
1023 result := make([]string, 0, startSize)
1024 re.allMatches(s, nil, n, func(match []int) {
1025 result = append(result, s[match[0]:match[1]])
1027 if len(result) == 0 {
1028 return nil
1030 return result
1033 // FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
1034 // slice of all successive matches of the expression, as defined by the 'All'
1035 // description in the package comment.
1036 // A return value of nil indicates no match.
1037 func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
1038 if n < 0 {
1039 n = len(s) + 1
1041 result := make([][]int, 0, startSize)
1042 re.allMatches(s, nil, n, func(match []int) {
1043 result = append(result, match[0:2])
1045 if len(result) == 0 {
1046 return nil
1048 return result
1051 // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
1052 // of all successive matches of the expression, as defined by the 'All'
1053 // description in the package comment.
1054 // A return value of nil indicates no match.
1055 func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
1056 if n < 0 {
1057 n = len(b) + 1
1059 result := make([][][]byte, 0, startSize)
1060 re.allMatches("", b, n, func(match []int) {
1061 slice := make([][]byte, len(match)/2)
1062 for j := range slice {
1063 if match[2*j] >= 0 {
1064 slice[j] = b[match[2*j]:match[2*j+1]]
1067 result = append(result, slice)
1069 if len(result) == 0 {
1070 return nil
1072 return result
1075 // FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
1076 // a slice of all successive matches of the expression, as defined by the
1077 // 'All' description in the package comment.
1078 // A return value of nil indicates no match.
1079 func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
1080 if n < 0 {
1081 n = len(b) + 1
1083 result := make([][]int, 0, startSize)
1084 re.allMatches("", b, n, func(match []int) {
1085 result = append(result, match)
1087 if len(result) == 0 {
1088 return nil
1090 return result
1093 // FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
1094 // returns a slice of all successive matches of the expression, as defined by
1095 // the 'All' description in the package comment.
1096 // A return value of nil indicates no match.
1097 func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
1098 if n < 0 {
1099 n = len(s) + 1
1101 result := make([][]string, 0, startSize)
1102 re.allMatches(s, nil, n, func(match []int) {
1103 slice := make([]string, len(match)/2)
1104 for j := range slice {
1105 if match[2*j] >= 0 {
1106 slice[j] = s[match[2*j]:match[2*j+1]]
1109 result = append(result, slice)
1111 if len(result) == 0 {
1112 return nil
1114 return result
1117 // FindAllStringSubmatchIndex is the 'All' version of
1118 // FindStringSubmatchIndex; it returns a slice of all successive matches of
1119 // the expression, as defined by the 'All' description in the package
1120 // comment.
1121 // A return value of nil indicates no match.
1122 func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
1123 if n < 0 {
1124 n = len(s) + 1
1126 result := make([][]int, 0, startSize)
1127 re.allMatches(s, nil, n, func(match []int) {
1128 result = append(result, match)
1130 if len(result) == 0 {
1131 return nil
1133 return result
1136 // Split slices s into substrings separated by the expression and returns a slice of
1137 // the substrings between those expression matches.
1139 // The slice returned by this method consists of all the substrings of s
1140 // not contained in the slice returned by FindAllString. When called on an expression
1141 // that contains no metacharacters, it is equivalent to strings.SplitN.
1143 // Example:
1144 // s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
1145 // // s: ["", "b", "b", "c", "cadaaae"]
1147 // The count determines the number of substrings to return:
1148 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
1149 // n == 0: the result is nil (zero substrings)
1150 // n < 0: all substrings
1151 func (re *Regexp) Split(s string, n int) []string {
1153 if n == 0 {
1154 return nil
1157 if len(re.expr) > 0 && len(s) == 0 {
1158 return []string{""}
1161 matches := re.FindAllStringIndex(s, n)
1162 strings := make([]string, 0, len(matches))
1164 beg := 0
1165 end := 0
1166 for _, match := range matches {
1167 if n > 0 && len(strings) >= n-1 {
1168 break
1171 end = match[0]
1172 if match[1] != 0 {
1173 strings = append(strings, s[beg:end])
1175 beg = match[1]
1178 if end != len(s) {
1179 strings = append(strings, s[beg:])
1182 return strings