1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package regexp implements regular expression search.
7 // The syntax of the regular expressions accepted is the same
8 // general syntax used by Perl, Python, and other languages.
9 // More precisely, it is the syntax accepted by RE2 and described at
10 // https://golang.org/s/re2syntax, except for \C.
11 // For an overview of the syntax, run
12 // go doc regexp/syntax
14 // The regexp implementation provided by this package is
15 // guaranteed to run in time linear in the size of the input.
16 // (This is a property not guaranteed by most open source
17 // implementations of regular expressions.) For more information
18 // about this property, see
19 // http://swtch.com/~rsc/regexp/regexp1.html
20 // or any book about automata theory.
22 // All characters are UTF-8-encoded code points.
24 // There are 16 methods of Regexp that match a regular expression and identify
25 // the matched text. Their names are matched by this regular expression:
27 // Find(All)?(String)?(Submatch)?(Index)?
29 // If 'All' is present, the routine matches successive non-overlapping
30 // matches of the entire expression. Empty matches abutting a preceding
31 // match are ignored. The return value is a slice containing the successive
32 // return values of the corresponding non-'All' routine. These routines take
33 // an extra integer argument, n; if n >= 0, the function returns at most n
34 // matches/submatches.
36 // If 'String' is present, the argument is a string; otherwise it is a slice
37 // of bytes; return values are adjusted as appropriate.
39 // If 'Submatch' is present, the return value is a slice identifying the
40 // successive submatches of the expression. Submatches are matches of
41 // parenthesized subexpressions (also known as capturing groups) within the
42 // regular expression, numbered from left to right in order of opening
43 // parenthesis. Submatch 0 is the match of the entire expression, submatch 1
44 // the match of the first parenthesized subexpression, and so on.
46 // If 'Index' is present, matches and submatches are identified by byte index
47 // pairs within the input string: result[2*n:2*n+1] identifies the indexes of
48 // the nth submatch. The pair for n==0 identifies the match of the entire
49 // expression. If 'Index' is not present, the match is identified by the
50 // text of the match/submatch. If an index is negative, it means that
51 // subexpression did not match any string in the input.
53 // There is also a subset of the methods that can be applied to text read
56 // MatchReader, FindReaderIndex, FindReaderSubmatchIndex
58 // This set may grow. Note that regular expression matches may need to
59 // examine text beyond the text returned by a match, so the methods that
60 // match text from a RuneReader may read arbitrarily far into the input
63 // (There are a few other methods that do not match this pattern.)
78 // Regexp is the representation of a compiled regular expression.
79 // A Regexp is safe for concurrent use by multiple goroutines.
81 // read-only after Compile
84 // cache of machines for running regexp
89 type regexpRO
struct {
90 expr
string // as passed to Compile
91 prog
*syntax
.Prog
// compiled program
92 onepass
*onePassProg
// onepass program or nil
93 prefix
string // required prefix in unanchored matches
94 prefixBytes
[]byte // prefix, as a []byte
95 prefixComplete
bool // prefix is the entire regexp
96 prefixRune rune
// first rune in prefix
97 prefixEnd
uint32 // pc for last rune in prefix
98 cond syntax
.EmptyOp
// empty-width conditions required at start of match
104 // String returns the source text used to compile the regular expression.
105 func (re
*Regexp
) String() string {
109 // Copy returns a new Regexp object copied from re.
111 // When using a Regexp in multiple goroutines, giving each goroutine
112 // its own copy helps to avoid lock contention.
113 func (re
*Regexp
) Copy() *Regexp
{
114 // It is not safe to copy Regexp by value
115 // since it contains a sync.Mutex.
117 regexpRO
: re
.regexpRO
,
121 // Compile parses a regular expression and returns, if successful,
122 // a Regexp object that can be used to match against text.
124 // When matching against text, the regexp returns a match that
125 // begins as early as possible in the input (leftmost), and among those
126 // it chooses the one that a backtracking search would have found first.
127 // This so-called leftmost-first matching is the same semantics
128 // that Perl, Python, and other implementations use, although this
129 // package implements it without the expense of backtracking.
130 // For POSIX leftmost-longest matching, see CompilePOSIX.
131 func Compile(expr
string) (*Regexp
, error
) {
132 return compile(expr
, syntax
.Perl
, false)
135 // CompilePOSIX is like Compile but restricts the regular expression
136 // to POSIX ERE (egrep) syntax and changes the match semantics to
139 // That is, when matching against text, the regexp returns a match that
140 // begins as early as possible in the input (leftmost), and among those
141 // it chooses a match that is as long as possible.
142 // This so-called leftmost-longest matching is the same semantics
143 // that early regular expression implementations used and that POSIX
146 // However, there can be multiple leftmost-longest matches, with different
147 // submatch choices, and here this package diverges from POSIX.
148 // Among the possible leftmost-longest matches, this package chooses
149 // the one that a backtracking search would have found first, while POSIX
150 // specifies that the match be chosen to maximize the length of the first
151 // subexpression, then the second, and so on from left to right.
152 // The POSIX rule is computationally prohibitive and not even well-defined.
153 // See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
154 func CompilePOSIX(expr
string) (*Regexp
, error
) {
155 return compile(expr
, syntax
.POSIX
, true)
158 // Longest makes future searches prefer the leftmost-longest match.
159 // That is, when matching against text, the regexp returns a match that
160 // begins as early as possible in the input (leftmost), and among those
161 // it chooses a match that is as long as possible.
162 func (re
*Regexp
) Longest() {
166 func compile(expr
string, mode syntax
.Flags
, longest
bool) (*Regexp
, error
) {
167 re
, err
:= syntax
.Parse(expr
, mode
)
171 maxCap
:= re
.MaxCap()
172 capNames
:= re
.CapNames()
175 prog
, err
:= syntax
.Compile(re
)
183 onepass
: compileOnePass(prog
),
185 subexpNames
: capNames
,
186 cond
: prog
.StartCond(),
190 if regexp
.onepass
== notOnePass
{
191 regexp
.prefix
, regexp
.prefixComplete
= prog
.Prefix()
193 regexp
.prefix
, regexp
.prefixComplete
, regexp
.prefixEnd
= onePassPrefix(prog
)
195 if regexp
.prefix
!= "" {
196 // TODO(rsc): Remove this allocation by adding
197 // IndexString to package bytes.
198 regexp
.prefixBytes
= []byte(regexp
.prefix
)
199 regexp
.prefixRune
, _
= utf8
.DecodeRuneInString(regexp
.prefix
)
204 // get returns a machine to use for matching re.
205 // It uses the re's machine cache if possible, to avoid
206 // unnecessary allocation.
207 func (re
*Regexp
) get() *machine
{
209 if n
:= len(re
.machine
); n
> 0 {
211 re
.machine
= re
.machine
[:n
-1]
216 z
:= progMachine(re
.prog
, re
.onepass
)
221 // put returns a machine to the re's machine cache.
222 // There is no attempt to limit the size of the cache, so it will
223 // grow to the maximum number of simultaneous matches
224 // run using re. (The cache empties when re gets garbage collected.)
225 func (re
*Regexp
) put(z
*machine
) {
227 re
.machine
= append(re
.machine
, z
)
231 // MustCompile is like Compile but panics if the expression cannot be parsed.
232 // It simplifies safe initialization of global variables holding compiled regular
234 func MustCompile(str
string) *Regexp
{
235 regexp
, error
:= Compile(str
)
237 panic(`regexp: Compile(` + quote(str
) + `): ` + error
.Error())
242 // MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed.
243 // It simplifies safe initialization of global variables holding compiled regular
245 func MustCompilePOSIX(str
string) *Regexp
{
246 regexp
, error
:= CompilePOSIX(str
)
248 panic(`regexp: CompilePOSIX(` + quote(str
) + `): ` + error
.Error())
253 func quote(s
string) string {
254 if strconv
.CanBackquote(s
) {
257 return strconv
.Quote(s
)
260 // NumSubexp returns the number of parenthesized subexpressions in this Regexp.
261 func (re
*Regexp
) NumSubexp() int {
265 // SubexpNames returns the names of the parenthesized subexpressions
266 // in this Regexp. The name for the first sub-expression is names[1],
267 // so that if m is a match slice, the name for m[i] is SubexpNames()[i].
268 // Since the Regexp as a whole cannot be named, names[0] is always
269 // the empty string. The slice should not be modified.
270 func (re
*Regexp
) SubexpNames() []string {
271 return re
.subexpNames
274 const endOfText rune
= -1
276 // input abstracts different representations of the input text. It provides
277 // one-character lookahead.
278 type input
interface {
279 step(pos
int) (r rune
, width
int) // advance one rune
280 canCheckPrefix() bool // can we look ahead without losing info?
281 hasPrefix(re
*Regexp
) bool
282 index(re
*Regexp
, pos
int) int
283 context(pos
int) syntax
.EmptyOp
286 // inputString scans a string.
287 type inputString
struct {
291 func (i
*inputString
) step(pos
int) (rune
, int) {
292 if pos
< len(i
.str
) {
294 if c
< utf8
.RuneSelf
{
297 return utf8
.DecodeRuneInString(i
.str
[pos
:])
302 func (i
*inputString
) canCheckPrefix() bool {
306 func (i
*inputString
) hasPrefix(re
*Regexp
) bool {
307 return strings
.HasPrefix(i
.str
, re
.prefix
)
310 func (i
*inputString
) index(re
*Regexp
, pos
int) int {
311 return strings
.Index(i
.str
[pos
:], re
.prefix
)
314 func (i
*inputString
) context(pos
int) syntax
.EmptyOp
{
315 r1
, r2
:= endOfText
, endOfText
316 if pos
> 0 && pos
<= len(i
.str
) {
317 r1
, _
= utf8
.DecodeLastRuneInString(i
.str
[:pos
])
319 if pos
< len(i
.str
) {
320 r2
, _
= utf8
.DecodeRuneInString(i
.str
[pos
:])
322 return syntax
.EmptyOpContext(r1
, r2
)
325 // inputBytes scans a byte slice.
326 type inputBytes
struct {
330 func (i
*inputBytes
) step(pos
int) (rune
, int) {
331 if pos
< len(i
.str
) {
333 if c
< utf8
.RuneSelf
{
336 return utf8
.DecodeRune(i
.str
[pos
:])
341 func (i
*inputBytes
) canCheckPrefix() bool {
345 func (i
*inputBytes
) hasPrefix(re
*Regexp
) bool {
346 return bytes
.HasPrefix(i
.str
, re
.prefixBytes
)
349 func (i
*inputBytes
) index(re
*Regexp
, pos
int) int {
350 return bytes
.Index(i
.str
[pos
:], re
.prefixBytes
)
353 func (i
*inputBytes
) context(pos
int) syntax
.EmptyOp
{
354 r1
, r2
:= endOfText
, endOfText
355 if pos
> 0 && pos
<= len(i
.str
) {
356 r1
, _
= utf8
.DecodeLastRune(i
.str
[:pos
])
358 if pos
< len(i
.str
) {
359 r2
, _
= utf8
.DecodeRune(i
.str
[pos
:])
361 return syntax
.EmptyOpContext(r1
, r2
)
364 // inputReader scans a RuneReader.
365 type inputReader
struct {
371 func (i
*inputReader
) step(pos
int) (rune
, int) {
372 if !i
.atEOT
&& pos
!= i
.pos
{
376 r
, w
, err
:= i
.r
.ReadRune()
385 func (i
*inputReader
) canCheckPrefix() bool {
389 func (i
*inputReader
) hasPrefix(re
*Regexp
) bool {
393 func (i
*inputReader
) index(re
*Regexp
, pos
int) int {
397 func (i
*inputReader
) context(pos
int) syntax
.EmptyOp
{
401 // LiteralPrefix returns a literal string that must begin any match
402 // of the regular expression re. It returns the boolean true if the
403 // literal string comprises the entire regular expression.
404 func (re
*Regexp
) LiteralPrefix() (prefix
string, complete
bool) {
405 return re
.prefix
, re
.prefixComplete
408 // MatchReader reports whether the Regexp matches the text read by the
410 func (re
*Regexp
) MatchReader(r io
.RuneReader
) bool {
411 return re
.doExecute(r
, nil, "", 0, 0) != nil
414 // MatchString reports whether the Regexp matches the string s.
415 func (re
*Regexp
) MatchString(s
string) bool {
416 return re
.doExecute(nil, nil, s
, 0, 0) != nil
419 // Match reports whether the Regexp matches the byte slice b.
420 func (re
*Regexp
) Match(b
[]byte) bool {
421 return re
.doExecute(nil, b
, "", 0, 0) != nil
424 // MatchReader checks whether a textual regular expression matches the text
425 // read by the RuneReader. More complicated queries need to use Compile and
426 // the full Regexp interface.
427 func MatchReader(pattern
string, r io
.RuneReader
) (matched
bool, err error
) {
428 re
, err
:= Compile(pattern
)
432 return re
.MatchReader(r
), nil
435 // MatchString checks whether a textual regular expression
436 // matches a string. More complicated queries need
437 // to use Compile and the full Regexp interface.
438 func MatchString(pattern
string, s
string) (matched
bool, err error
) {
439 re
, err
:= Compile(pattern
)
443 return re
.MatchString(s
), nil
446 // Match checks whether a textual regular expression
447 // matches a byte slice. More complicated queries need
448 // to use Compile and the full Regexp interface.
449 func Match(pattern
string, b
[]byte) (matched
bool, err error
) {
450 re
, err
:= Compile(pattern
)
454 return re
.Match(b
), nil
457 // ReplaceAllString returns a copy of src, replacing matches of the Regexp
458 // with the replacement string repl. Inside repl, $ signs are interpreted as
459 // in Expand, so for instance $1 represents the text of the first submatch.
460 func (re
*Regexp
) ReplaceAllString(src
, repl
string) string {
462 if strings
.Contains(repl
, "$") {
463 n
= 2 * (re
.numSubexp
+ 1)
465 b
:= re
.replaceAll(nil, src
, n
, func(dst
[]byte, match
[]int) []byte {
466 return re
.expand(dst
, repl
, nil, src
, match
)
471 // ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp
472 // with the replacement string repl. The replacement repl is substituted directly,
473 // without using Expand.
474 func (re
*Regexp
) ReplaceAllLiteralString(src
, repl
string) string {
475 return string(re
.replaceAll(nil, src
, 2, func(dst
[]byte, match
[]int) []byte {
476 return append(dst
, repl
...)
480 // ReplaceAllStringFunc returns a copy of src in which all matches of the
481 // Regexp have been replaced by the return value of function repl applied
482 // to the matched substring. The replacement returned by repl is substituted
483 // directly, without using Expand.
484 func (re
*Regexp
) ReplaceAllStringFunc(src
string, repl
func(string) string) string {
485 b
:= re
.replaceAll(nil, src
, 2, func(dst
[]byte, match
[]int) []byte {
486 return append(dst
, repl(src
[match
[0]:match
[1]])...)
491 func (re
*Regexp
) replaceAll(bsrc
[]byte, src
string, nmatch
int, repl
func(dst
[]byte, m
[]int) []byte) []byte {
492 lastMatchEnd
:= 0 // end position of the most recent match
493 searchPos
:= 0 // position where we next look for a match
501 if nmatch
> re
.prog
.NumCap
{
502 nmatch
= re
.prog
.NumCap
505 for searchPos
<= endPos
{
506 a
:= re
.doExecute(nil, bsrc
, src
, searchPos
, nmatch
)
508 break // no more matches
511 // Copy the unmatched characters before this match.
513 buf
= append(buf
, bsrc
[lastMatchEnd
:a
[0]]...)
515 buf
= append(buf
, src
[lastMatchEnd
:a
[0]]...)
518 // Now insert a copy of the replacement string, but not for a
519 // match of the empty string immediately after another match.
520 // (Otherwise, we get double replacement for patterns that
521 // match both empty and nonempty strings.)
522 if a
[1] > lastMatchEnd || a
[0] == 0 {
527 // Advance past this match; always advance at least one character.
530 _
, width
= utf8
.DecodeRune(bsrc
[searchPos
:])
532 _
, width
= utf8
.DecodeRuneInString(src
[searchPos
:])
534 if searchPos
+width
> a
[1] {
536 } else if searchPos
+1 > a
[1] {
537 // This clause is only needed at the end of the input
538 // string. In that case, DecodeRuneInString returns width=0.
545 // Copy the unmatched characters after the last match.
547 buf
= append(buf
, bsrc
[lastMatchEnd
:]...)
549 buf
= append(buf
, src
[lastMatchEnd
:]...)
555 // ReplaceAll returns a copy of src, replacing matches of the Regexp
556 // with the replacement text repl. Inside repl, $ signs are interpreted as
557 // in Expand, so for instance $1 represents the text of the first submatch.
558 func (re
*Regexp
) ReplaceAll(src
, repl
[]byte) []byte {
560 if bytes
.IndexByte(repl
, '$') >= 0 {
561 n
= 2 * (re
.numSubexp
+ 1)
564 b
:= re
.replaceAll(src
, "", n
, func(dst
[]byte, match
[]int) []byte {
565 if len(srepl
) != len(repl
) {
568 return re
.expand(dst
, srepl
, src
, "", match
)
573 // ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
574 // with the replacement bytes repl. The replacement repl is substituted directly,
575 // without using Expand.
576 func (re
*Regexp
) ReplaceAllLiteral(src
, repl
[]byte) []byte {
577 return re
.replaceAll(src
, "", 2, func(dst
[]byte, match
[]int) []byte {
578 return append(dst
, repl
...)
582 // ReplaceAllFunc returns a copy of src in which all matches of the
583 // Regexp have been replaced by the return value of function repl applied
584 // to the matched byte slice. The replacement returned by repl is substituted
585 // directly, without using Expand.
586 func (re
*Regexp
) ReplaceAllFunc(src
[]byte, repl
func([]byte) []byte) []byte {
587 return re
.replaceAll(src
, "", 2, func(dst
[]byte, match
[]int) []byte {
588 return append(dst
, repl(src
[match
[0]:match
[1]])...)
592 var specialBytes
= []byte(`\.+*?()|[]{}^$`)
594 func special(b
byte) bool {
595 return bytes
.IndexByte(specialBytes
, b
) >= 0
598 // QuoteMeta returns a string that quotes all regular expression metacharacters
599 // inside the argument text; the returned string is a regular expression matching
600 // the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
601 func QuoteMeta(s
string) string {
602 b
:= make([]byte, 2*len(s
))
604 // A byte loop is correct because all metacharacters are ASCII.
606 for i
:= 0; i
< len(s
); i
++ {
614 return string(b
[0:j
])
617 // The number of capture values in the program may correspond
618 // to fewer capturing expressions than are in the regexp.
619 // For example, "(a){0}" turns into an empty program, so the
620 // maximum capture in the program is 0 but we need to return
621 // an expression for \1. Pad appends -1s to the slice a as needed.
622 func (re
*Regexp
) pad(a
[]int) []int {
627 n
:= (1 + re
.numSubexp
) * 2
634 // Find matches in slice b if b is non-nil, otherwise find matches in string s.
635 func (re
*Regexp
) allMatches(s
string, b
[]byte, n
int, deliver
func([]int)) {
643 for pos
, i
, prevMatchEnd
:= 0, 0, -1; i
< n
&& pos
<= end
; {
644 matches
:= re
.doExecute(nil, b
, s
, pos
, re
.prog
.NumCap
)
645 if len(matches
) == 0 {
650 if matches
[1] == pos
{
651 // We've found an empty match.
652 if matches
[0] == prevMatchEnd
{
653 // We don't allow an empty match right
654 // after a previous match, so ignore it.
660 _
, width
= utf8
.DecodeRuneInString(s
[pos
:end
])
662 _
, width
= utf8
.DecodeRune(b
[pos
:end
])
672 prevMatchEnd
= matches
[1]
675 deliver(re
.pad(matches
))
681 // Find returns a slice holding the text of the leftmost match in b of the regular expression.
682 // A return value of nil indicates no match.
683 func (re
*Regexp
) Find(b
[]byte) []byte {
684 a
:= re
.doExecute(nil, b
, "", 0, 2)
691 // FindIndex returns a two-element slice of integers defining the location of
692 // the leftmost match in b of the regular expression. The match itself is at
694 // A return value of nil indicates no match.
695 func (re
*Regexp
) FindIndex(b
[]byte) (loc
[]int) {
696 a
:= re
.doExecute(nil, b
, "", 0, 2)
703 // FindString returns a string holding the text of the leftmost match in s of the regular
704 // expression. If there is no match, the return value is an empty string,
705 // but it will also be empty if the regular expression successfully matches
706 // an empty string. Use FindStringIndex or FindStringSubmatch if it is
707 // necessary to distinguish these cases.
708 func (re
*Regexp
) FindString(s
string) string {
709 a
:= re
.doExecute(nil, nil, s
, 0, 2)
716 // FindStringIndex returns a two-element slice of integers defining the
717 // location of the leftmost match in s of the regular expression. The match
718 // itself is at s[loc[0]:loc[1]].
719 // A return value of nil indicates no match.
720 func (re
*Regexp
) FindStringIndex(s
string) (loc
[]int) {
721 a
:= re
.doExecute(nil, nil, s
, 0, 2)
728 // FindReaderIndex returns a two-element slice of integers defining the
729 // location of the leftmost match of the regular expression in text read from
730 // the RuneReader. The match text was found in the input stream at
731 // byte offset loc[0] through loc[1]-1.
732 // A return value of nil indicates no match.
733 func (re
*Regexp
) FindReaderIndex(r io
.RuneReader
) (loc
[]int) {
734 a
:= re
.doExecute(r
, nil, "", 0, 2)
741 // FindSubmatch returns a slice of slices holding the text of the leftmost
742 // match of the regular expression in b and the matches, if any, of its
743 // subexpressions, as defined by the 'Submatch' descriptions in the package
745 // A return value of nil indicates no match.
746 func (re
*Regexp
) FindSubmatch(b
[]byte) [][]byte {
747 a
:= re
.doExecute(nil, b
, "", 0, re
.prog
.NumCap
)
751 ret
:= make([][]byte, 1+re
.numSubexp
)
753 if 2*i
< len(a
) && a
[2*i
] >= 0 {
754 ret
[i
] = b
[a
[2*i
]:a
[2*i
+1]]
760 // Expand appends template to dst and returns the result; during the
761 // append, Expand replaces variables in the template with corresponding
762 // matches drawn from src. The match slice should have been returned by
763 // FindSubmatchIndex.
765 // In the template, a variable is denoted by a substring of the form
766 // $name or ${name}, where name is a non-empty sequence of letters,
767 // digits, and underscores. A purely numeric name like $1 refers to
768 // the submatch with the corresponding index; other names refer to
769 // capturing parentheses named with the (?P<name>...) syntax. A
770 // reference to an out of range or unmatched index or a name that is not
771 // present in the regular expression is replaced with an empty slice.
773 // In the $name form, name is taken to be as long as possible: $1x is
774 // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
776 // To insert a literal $ in the output, use $$ in the template.
777 func (re
*Regexp
) Expand(dst
[]byte, template
[]byte, src
[]byte, match
[]int) []byte {
778 return re
.expand(dst
, string(template
), src
, "", match
)
781 // ExpandString is like Expand but the template and source are strings.
782 // It appends to and returns a byte slice in order to give the calling
783 // code control over allocation.
784 func (re
*Regexp
) ExpandString(dst
[]byte, template
string, src
string, match
[]int) []byte {
785 return re
.expand(dst
, template
, nil, src
, match
)
788 func (re
*Regexp
) expand(dst
[]byte, template
string, bsrc
[]byte, src
string, match
[]int) []byte {
789 for len(template
) > 0 {
790 i
:= strings
.Index(template
, "$")
794 dst
= append(dst
, template
[:i
]...)
795 template
= template
[i
:]
796 if len(template
) > 1 && template
[1] == '$' {
798 dst
= append(dst
, '$')
799 template
= template
[2:]
802 name
, num
, rest
, ok
:= extract(template
)
804 // Malformed; treat $ as raw text.
805 dst
= append(dst
, '$')
806 template
= template
[1:]
811 if 2*num
+1 < len(match
) && match
[2*num
] >= 0 {
813 dst
= append(dst
, bsrc
[match
[2*num
]:match
[2*num
+1]]...)
815 dst
= append(dst
, src
[match
[2*num
]:match
[2*num
+1]]...)
819 for i
, namei
:= range re
.subexpNames
{
820 if name
== namei
&& 2*i
+1 < len(match
) && match
[2*i
] >= 0 {
822 dst
= append(dst
, bsrc
[match
[2*i
]:match
[2*i
+1]]...)
824 dst
= append(dst
, src
[match
[2*i
]:match
[2*i
+1]]...)
831 dst
= append(dst
, template
...)
835 // extract returns the name from a leading "$name" or "${name}" in str.
836 // If it is a number, extract returns num set to that number; otherwise num = -1.
837 func extract(str
string) (name
string, num
int, rest
string, ok
bool) {
838 if len(str
) < 2 || str
[0] != '$' {
850 rune
, size
:= utf8
.DecodeRuneInString(str
[i
:])
851 if !unicode
.IsLetter(rune
) && !unicode
.IsDigit(rune
) && rune
!= '_' {
857 // empty name is not okay
862 if i
>= len(str
) || str
[i
] != '}' {
863 // missing closing brace
871 for i
:= 0; i
< len(name
); i
++ {
872 if name
[i
] < '0' ||
'9' < name
[i
] || num
>= 1e8
{
876 num
= num
*10 + int(name
[i
]) - '0'
878 // Disallow leading zeros.
879 if name
[0] == '0' && len(name
) > 1 {
888 // FindSubmatchIndex returns a slice holding the index pairs identifying the
889 // leftmost match of the regular expression in b and the matches, if any, of
890 // its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
891 // in the package comment.
892 // A return value of nil indicates no match.
893 func (re
*Regexp
) FindSubmatchIndex(b
[]byte) []int {
894 return re
.pad(re
.doExecute(nil, b
, "", 0, re
.prog
.NumCap
))
897 // FindStringSubmatch returns a slice of strings holding the text of the
898 // leftmost match of the regular expression in s and the matches, if any, of
899 // its subexpressions, as defined by the 'Submatch' description in the
901 // A return value of nil indicates no match.
902 func (re
*Regexp
) FindStringSubmatch(s
string) []string {
903 a
:= re
.doExecute(nil, nil, s
, 0, re
.prog
.NumCap
)
907 ret
:= make([]string, 1+re
.numSubexp
)
909 if 2*i
< len(a
) && a
[2*i
] >= 0 {
910 ret
[i
] = s
[a
[2*i
]:a
[2*i
+1]]
916 // FindStringSubmatchIndex returns a slice holding the index pairs
917 // identifying the leftmost match of the regular expression in s and the
918 // matches, if any, of its subexpressions, as defined by the 'Submatch' and
919 // 'Index' descriptions in the package comment.
920 // A return value of nil indicates no match.
921 func (re
*Regexp
) FindStringSubmatchIndex(s
string) []int {
922 return re
.pad(re
.doExecute(nil, nil, s
, 0, re
.prog
.NumCap
))
925 // FindReaderSubmatchIndex returns a slice holding the index pairs
926 // identifying the leftmost match of the regular expression of text read by
927 // the RuneReader, and the matches, if any, of its subexpressions, as defined
928 // by the 'Submatch' and 'Index' descriptions in the package comment. A
929 // return value of nil indicates no match.
930 func (re
*Regexp
) FindReaderSubmatchIndex(r io
.RuneReader
) []int {
931 return re
.pad(re
.doExecute(r
, nil, "", 0, re
.prog
.NumCap
))
934 const startSize
= 10 // The size at which to start a slice in the 'All' routines.
936 // FindAll is the 'All' version of Find; it returns a slice of all successive
937 // matches of the expression, as defined by the 'All' description in the
939 // A return value of nil indicates no match.
940 func (re
*Regexp
) FindAll(b
[]byte, n
int) [][]byte {
944 result
:= make([][]byte, 0, startSize
)
945 re
.allMatches("", b
, n
, func(match
[]int) {
946 result
= append(result
, b
[match
[0]:match
[1]])
948 if len(result
) == 0 {
954 // FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
955 // successive matches of the expression, as defined by the 'All' description
956 // in the package comment.
957 // A return value of nil indicates no match.
958 func (re
*Regexp
) FindAllIndex(b
[]byte, n
int) [][]int {
962 result
:= make([][]int, 0, startSize
)
963 re
.allMatches("", b
, n
, func(match
[]int) {
964 result
= append(result
, match
[0:2])
966 if len(result
) == 0 {
972 // FindAllString is the 'All' version of FindString; it returns a slice of all
973 // successive matches of the expression, as defined by the 'All' description
974 // in the package comment.
975 // A return value of nil indicates no match.
976 func (re
*Regexp
) FindAllString(s
string, n
int) []string {
980 result
:= make([]string, 0, startSize
)
981 re
.allMatches(s
, nil, n
, func(match
[]int) {
982 result
= append(result
, s
[match
[0]:match
[1]])
984 if len(result
) == 0 {
990 // FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
991 // slice of all successive matches of the expression, as defined by the 'All'
992 // description in the package comment.
993 // A return value of nil indicates no match.
994 func (re
*Regexp
) FindAllStringIndex(s
string, n
int) [][]int {
998 result
:= make([][]int, 0, startSize
)
999 re
.allMatches(s
, nil, n
, func(match
[]int) {
1000 result
= append(result
, match
[0:2])
1002 if len(result
) == 0 {
1008 // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
1009 // of all successive matches of the expression, as defined by the 'All'
1010 // description in the package comment.
1011 // A return value of nil indicates no match.
1012 func (re
*Regexp
) FindAllSubmatch(b
[]byte, n
int) [][][]byte {
1016 result
:= make([][][]byte, 0, startSize
)
1017 re
.allMatches("", b
, n
, func(match
[]int) {
1018 slice
:= make([][]byte, len(match
)/2)
1019 for j
:= range slice
{
1020 if match
[2*j
] >= 0 {
1021 slice
[j
] = b
[match
[2*j
]:match
[2*j
+1]]
1024 result
= append(result
, slice
)
1026 if len(result
) == 0 {
1032 // FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
1033 // a slice of all successive matches of the expression, as defined by the
1034 // 'All' description in the package comment.
1035 // A return value of nil indicates no match.
1036 func (re
*Regexp
) FindAllSubmatchIndex(b
[]byte, n
int) [][]int {
1040 result
:= make([][]int, 0, startSize
)
1041 re
.allMatches("", b
, n
, func(match
[]int) {
1042 result
= append(result
, match
)
1044 if len(result
) == 0 {
1050 // FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
1051 // returns a slice of all successive matches of the expression, as defined by
1052 // the 'All' description in the package comment.
1053 // A return value of nil indicates no match.
1054 func (re
*Regexp
) FindAllStringSubmatch(s
string, n
int) [][]string {
1058 result
:= make([][]string, 0, startSize
)
1059 re
.allMatches(s
, nil, n
, func(match
[]int) {
1060 slice
:= make([]string, len(match
)/2)
1061 for j
:= range slice
{
1062 if match
[2*j
] >= 0 {
1063 slice
[j
] = s
[match
[2*j
]:match
[2*j
+1]]
1066 result
= append(result
, slice
)
1068 if len(result
) == 0 {
1074 // FindAllStringSubmatchIndex is the 'All' version of
1075 // FindStringSubmatchIndex; it returns a slice of all successive matches of
1076 // the expression, as defined by the 'All' description in the package
1078 // A return value of nil indicates no match.
1079 func (re
*Regexp
) FindAllStringSubmatchIndex(s
string, n
int) [][]int {
1083 result
:= make([][]int, 0, startSize
)
1084 re
.allMatches(s
, nil, n
, func(match
[]int) {
1085 result
= append(result
, match
)
1087 if len(result
) == 0 {
1093 // Split slices s into substrings separated by the expression and returns a slice of
1094 // the substrings between those expression matches.
1096 // The slice returned by this method consists of all the substrings of s
1097 // not contained in the slice returned by FindAllString. When called on an expression
1098 // that contains no metacharacters, it is equivalent to strings.SplitN.
1101 // s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
1102 // // s: ["", "b", "b", "c", "cadaaae"]
1104 // The count determines the number of substrings to return:
1105 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
1106 // n == 0: the result is nil (zero substrings)
1107 // n < 0: all substrings
1108 func (re
*Regexp
) Split(s
string, n
int) []string {
1114 if len(re
.expr
) > 0 && len(s
) == 0 {
1118 matches
:= re
.FindAllStringIndex(s
, n
)
1119 strings
:= make([]string, 0, len(matches
))
1123 for _
, match
:= range matches
{
1124 if n
> 0 && len(strings
) >= n
-1 {
1130 strings
= append(strings
, s
[beg
:end
])
1136 strings
= append(strings
, s
[beg
:])