1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package regexp implements regular expression search.
7 // The syntax of the regular expressions accepted is the same
8 // general syntax used by Perl, Python, and other languages.
9 // More precisely, it is the syntax accepted by RE2 and described at
10 // https://golang.org/s/re2syntax, except for \C.
11 // For an overview of the syntax, run
12 // go doc regexp/syntax
14 // The regexp implementation provided by this package is
15 // guaranteed to run in time linear in the size of the input.
16 // (This is a property not guaranteed by most open source
17 // implementations of regular expressions.) For more information
18 // about this property, see
19 // http://swtch.com/~rsc/regexp/regexp1.html
20 // or any book about automata theory.
22 // All characters are UTF-8-encoded code points.
24 // There are 16 methods of Regexp that match a regular expression and identify
25 // the matched text. Their names are matched by this regular expression:
27 // Find(All)?(String)?(Submatch)?(Index)?
29 // If 'All' is present, the routine matches successive non-overlapping
30 // matches of the entire expression. Empty matches abutting a preceding
31 // match are ignored. The return value is a slice containing the successive
32 // return values of the corresponding non-'All' routine. These routines take
33 // an extra integer argument, n; if n >= 0, the function returns at most n
34 // matches/submatches.
36 // If 'String' is present, the argument is a string; otherwise it is a slice
37 // of bytes; return values are adjusted as appropriate.
39 // If 'Submatch' is present, the return value is a slice identifying the
40 // successive submatches of the expression. Submatches are matches of
41 // parenthesized subexpressions (also known as capturing groups) within the
42 // regular expression, numbered from left to right in order of opening
43 // parenthesis. Submatch 0 is the match of the entire expression, submatch 1
44 // the match of the first parenthesized subexpression, and so on.
46 // If 'Index' is present, matches and submatches are identified by byte index
47 // pairs within the input string: result[2*n:2*n+1] identifies the indexes of
48 // the nth submatch. The pair for n==0 identifies the match of the entire
49 // expression. If 'Index' is not present, the match is identified by the
50 // text of the match/submatch. If an index is negative, it means that
51 // subexpression did not match any string in the input.
53 // There is also a subset of the methods that can be applied to text read
56 // MatchReader, FindReaderIndex, FindReaderSubmatchIndex
58 // This set may grow. Note that regular expression matches may need to
59 // examine text beyond the text returned by a match, so the methods that
60 // match text from a RuneReader may read arbitrarily far into the input
63 // (There are a few other methods that do not match this pattern.)
78 // Regexp is the representation of a compiled regular expression.
79 // A Regexp is safe for concurrent use by multiple goroutines,
80 // except for configuration methods, such as Longest.
82 // read-only after Compile
85 // cache of machines for running regexp
90 type regexpRO
struct {
91 expr
string // as passed to Compile
92 prog
*syntax
.Prog
// compiled program
93 onepass
*onePassProg
// onepass program or nil
94 prefix
string // required prefix in unanchored matches
95 prefixBytes
[]byte // prefix, as a []byte
96 prefixComplete
bool // prefix is the entire regexp
97 prefixRune rune
// first rune in prefix
98 prefixEnd
uint32 // pc for last rune in prefix
99 cond syntax
.EmptyOp
// empty-width conditions required at start of match
105 // String returns the source text used to compile the regular expression.
106 func (re
*Regexp
) String() string {
110 // Copy returns a new Regexp object copied from re.
112 // When using a Regexp in multiple goroutines, giving each goroutine
113 // its own copy helps to avoid lock contention.
114 func (re
*Regexp
) Copy() *Regexp
{
115 // It is not safe to copy Regexp by value
116 // since it contains a sync.Mutex.
118 regexpRO
: re
.regexpRO
,
122 // Compile parses a regular expression and returns, if successful,
123 // a Regexp object that can be used to match against text.
125 // When matching against text, the regexp returns a match that
126 // begins as early as possible in the input (leftmost), and among those
127 // it chooses the one that a backtracking search would have found first.
128 // This so-called leftmost-first matching is the same semantics
129 // that Perl, Python, and other implementations use, although this
130 // package implements it without the expense of backtracking.
131 // For POSIX leftmost-longest matching, see CompilePOSIX.
132 func Compile(expr
string) (*Regexp
, error
) {
133 return compile(expr
, syntax
.Perl
, false)
136 // CompilePOSIX is like Compile but restricts the regular expression
137 // to POSIX ERE (egrep) syntax and changes the match semantics to
140 // That is, when matching against text, the regexp returns a match that
141 // begins as early as possible in the input (leftmost), and among those
142 // it chooses a match that is as long as possible.
143 // This so-called leftmost-longest matching is the same semantics
144 // that early regular expression implementations used and that POSIX
147 // However, there can be multiple leftmost-longest matches, with different
148 // submatch choices, and here this package diverges from POSIX.
149 // Among the possible leftmost-longest matches, this package chooses
150 // the one that a backtracking search would have found first, while POSIX
151 // specifies that the match be chosen to maximize the length of the first
152 // subexpression, then the second, and so on from left to right.
153 // The POSIX rule is computationally prohibitive and not even well-defined.
154 // See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
155 func CompilePOSIX(expr
string) (*Regexp
, error
) {
156 return compile(expr
, syntax
.POSIX
, true)
159 // Longest makes future searches prefer the leftmost-longest match.
160 // That is, when matching against text, the regexp returns a match that
161 // begins as early as possible in the input (leftmost), and among those
162 // it chooses a match that is as long as possible.
163 // This method modifies the Regexp and may not be called concurrently
164 // with any other methods.
165 func (re
*Regexp
) Longest() {
169 func compile(expr
string, mode syntax
.Flags
, longest
bool) (*Regexp
, error
) {
170 re
, err
:= syntax
.Parse(expr
, mode
)
174 maxCap
:= re
.MaxCap()
175 capNames
:= re
.CapNames()
178 prog
, err
:= syntax
.Compile(re
)
186 onepass
: compileOnePass(prog
),
188 subexpNames
: capNames
,
189 cond
: prog
.StartCond(),
193 if regexp
.onepass
== notOnePass
{
194 regexp
.prefix
, regexp
.prefixComplete
= prog
.Prefix()
196 regexp
.prefix
, regexp
.prefixComplete
, regexp
.prefixEnd
= onePassPrefix(prog
)
198 if regexp
.prefix
!= "" {
199 // TODO(rsc): Remove this allocation by adding
200 // IndexString to package bytes.
201 regexp
.prefixBytes
= []byte(regexp
.prefix
)
202 regexp
.prefixRune
, _
= utf8
.DecodeRuneInString(regexp
.prefix
)
207 // get returns a machine to use for matching re.
208 // It uses the re's machine cache if possible, to avoid
209 // unnecessary allocation.
210 func (re
*Regexp
) get() *machine
{
212 if n
:= len(re
.machine
); n
> 0 {
214 re
.machine
= re
.machine
[:n
-1]
219 z
:= progMachine(re
.prog
, re
.onepass
)
224 // put returns a machine to the re's machine cache.
225 // There is no attempt to limit the size of the cache, so it will
226 // grow to the maximum number of simultaneous matches
227 // run using re. (The cache empties when re gets garbage collected.)
228 func (re
*Regexp
) put(z
*machine
) {
230 re
.machine
= append(re
.machine
, z
)
234 // MustCompile is like Compile but panics if the expression cannot be parsed.
235 // It simplifies safe initialization of global variables holding compiled regular
237 func MustCompile(str
string) *Regexp
{
238 regexp
, error
:= Compile(str
)
240 panic(`regexp: Compile(` + quote(str
) + `): ` + error
.Error())
245 // MustCompilePOSIX is like CompilePOSIX but panics if the expression cannot be parsed.
246 // It simplifies safe initialization of global variables holding compiled regular
248 func MustCompilePOSIX(str
string) *Regexp
{
249 regexp
, error
:= CompilePOSIX(str
)
251 panic(`regexp: CompilePOSIX(` + quote(str
) + `): ` + error
.Error())
256 func quote(s
string) string {
257 if strconv
.CanBackquote(s
) {
260 return strconv
.Quote(s
)
263 // NumSubexp returns the number of parenthesized subexpressions in this Regexp.
264 func (re
*Regexp
) NumSubexp() int {
268 // SubexpNames returns the names of the parenthesized subexpressions
269 // in this Regexp. The name for the first sub-expression is names[1],
270 // so that if m is a match slice, the name for m[i] is SubexpNames()[i].
271 // Since the Regexp as a whole cannot be named, names[0] is always
272 // the empty string. The slice should not be modified.
273 func (re
*Regexp
) SubexpNames() []string {
274 return re
.subexpNames
277 const endOfText rune
= -1
279 // input abstracts different representations of the input text. It provides
280 // one-character lookahead.
281 type input
interface {
282 step(pos
int) (r rune
, width
int) // advance one rune
283 canCheckPrefix() bool // can we look ahead without losing info?
284 hasPrefix(re
*Regexp
) bool
285 index(re
*Regexp
, pos
int) int
286 context(pos
int) syntax
.EmptyOp
289 // inputString scans a string.
290 type inputString
struct {
294 func (i
*inputString
) step(pos
int) (rune
, int) {
295 if pos
< len(i
.str
) {
297 if c
< utf8
.RuneSelf
{
300 return utf8
.DecodeRuneInString(i
.str
[pos
:])
305 func (i
*inputString
) canCheckPrefix() bool {
309 func (i
*inputString
) hasPrefix(re
*Regexp
) bool {
310 return strings
.HasPrefix(i
.str
, re
.prefix
)
313 func (i
*inputString
) index(re
*Regexp
, pos
int) int {
314 return strings
.Index(i
.str
[pos
:], re
.prefix
)
317 func (i
*inputString
) context(pos
int) syntax
.EmptyOp
{
318 r1
, r2
:= endOfText
, endOfText
319 // 0 < pos && pos <= len(i.str)
320 if uint(pos
-1) < uint(len(i
.str
)) {
321 r1
= rune(i
.str
[pos
-1])
322 if r1
>= utf8
.RuneSelf
{
323 r1
, _
= utf8
.DecodeLastRuneInString(i
.str
[:pos
])
326 // 0 <= pos && pos < len(i.str)
327 if uint(pos
) < uint(len(i
.str
)) {
328 r2
= rune(i
.str
[pos
])
329 if r2
>= utf8
.RuneSelf
{
330 r2
, _
= utf8
.DecodeRuneInString(i
.str
[pos
:])
333 return syntax
.EmptyOpContext(r1
, r2
)
336 // inputBytes scans a byte slice.
337 type inputBytes
struct {
341 func (i
*inputBytes
) step(pos
int) (rune
, int) {
342 if pos
< len(i
.str
) {
344 if c
< utf8
.RuneSelf
{
347 return utf8
.DecodeRune(i
.str
[pos
:])
352 func (i
*inputBytes
) canCheckPrefix() bool {
356 func (i
*inputBytes
) hasPrefix(re
*Regexp
) bool {
357 return bytes
.HasPrefix(i
.str
, re
.prefixBytes
)
360 func (i
*inputBytes
) index(re
*Regexp
, pos
int) int {
361 return bytes
.Index(i
.str
[pos
:], re
.prefixBytes
)
364 func (i
*inputBytes
) context(pos
int) syntax
.EmptyOp
{
365 r1
, r2
:= endOfText
, endOfText
366 // 0 < pos && pos <= len(i.str)
367 if uint(pos
-1) < uint(len(i
.str
)) {
368 r1
= rune(i
.str
[pos
-1])
369 if r1
>= utf8
.RuneSelf
{
370 r1
, _
= utf8
.DecodeLastRune(i
.str
[:pos
])
373 // 0 <= pos && pos < len(i.str)
374 if uint(pos
) < uint(len(i
.str
)) {
375 r2
= rune(i
.str
[pos
])
376 if r2
>= utf8
.RuneSelf
{
377 r2
, _
= utf8
.DecodeRune(i
.str
[pos
:])
380 return syntax
.EmptyOpContext(r1
, r2
)
383 // inputReader scans a RuneReader.
384 type inputReader
struct {
390 func (i
*inputReader
) step(pos
int) (rune
, int) {
391 if !i
.atEOT
&& pos
!= i
.pos
{
395 r
, w
, err
:= i
.r
.ReadRune()
404 func (i
*inputReader
) canCheckPrefix() bool {
408 func (i
*inputReader
) hasPrefix(re
*Regexp
) bool {
412 func (i
*inputReader
) index(re
*Regexp
, pos
int) int {
416 func (i
*inputReader
) context(pos
int) syntax
.EmptyOp
{
420 // LiteralPrefix returns a literal string that must begin any match
421 // of the regular expression re. It returns the boolean true if the
422 // literal string comprises the entire regular expression.
423 func (re
*Regexp
) LiteralPrefix() (prefix
string, complete
bool) {
424 return re
.prefix
, re
.prefixComplete
427 // MatchReader reports whether the Regexp matches the text read by the
429 func (re
*Regexp
) MatchReader(r io
.RuneReader
) bool {
430 return re
.doMatch(r
, nil, "")
433 // MatchString reports whether the Regexp matches the string s.
434 func (re
*Regexp
) MatchString(s
string) bool {
435 return re
.doMatch(nil, nil, s
)
438 // Match reports whether the Regexp matches the byte slice b.
439 func (re
*Regexp
) Match(b
[]byte) bool {
440 return re
.doMatch(nil, b
, "")
443 // MatchReader checks whether a textual regular expression matches the text
444 // read by the RuneReader. More complicated queries need to use Compile and
445 // the full Regexp interface.
446 func MatchReader(pattern
string, r io
.RuneReader
) (matched
bool, err error
) {
447 re
, err
:= Compile(pattern
)
451 return re
.MatchReader(r
), nil
454 // MatchString checks whether a textual regular expression
455 // matches a string. More complicated queries need
456 // to use Compile and the full Regexp interface.
457 func MatchString(pattern
string, s
string) (matched
bool, err error
) {
458 re
, err
:= Compile(pattern
)
462 return re
.MatchString(s
), nil
465 // Match checks whether a textual regular expression
466 // matches a byte slice. More complicated queries need
467 // to use Compile and the full Regexp interface.
468 func Match(pattern
string, b
[]byte) (matched
bool, err error
) {
469 re
, err
:= Compile(pattern
)
473 return re
.Match(b
), nil
476 // ReplaceAllString returns a copy of src, replacing matches of the Regexp
477 // with the replacement string repl. Inside repl, $ signs are interpreted as
478 // in Expand, so for instance $1 represents the text of the first submatch.
479 func (re
*Regexp
) ReplaceAllString(src
, repl
string) string {
481 if strings
.Contains(repl
, "$") {
482 n
= 2 * (re
.numSubexp
+ 1)
484 b
:= re
.replaceAll(nil, src
, n
, func(dst
[]byte, match
[]int) []byte {
485 return re
.expand(dst
, repl
, nil, src
, match
)
490 // ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp
491 // with the replacement string repl. The replacement repl is substituted directly,
492 // without using Expand.
493 func (re
*Regexp
) ReplaceAllLiteralString(src
, repl
string) string {
494 return string(re
.replaceAll(nil, src
, 2, func(dst
[]byte, match
[]int) []byte {
495 return append(dst
, repl
...)
499 // ReplaceAllStringFunc returns a copy of src in which all matches of the
500 // Regexp have been replaced by the return value of function repl applied
501 // to the matched substring. The replacement returned by repl is substituted
502 // directly, without using Expand.
503 func (re
*Regexp
) ReplaceAllStringFunc(src
string, repl
func(string) string) string {
504 b
:= re
.replaceAll(nil, src
, 2, func(dst
[]byte, match
[]int) []byte {
505 return append(dst
, repl(src
[match
[0]:match
[1]])...)
510 func (re
*Regexp
) replaceAll(bsrc
[]byte, src
string, nmatch
int, repl
func(dst
[]byte, m
[]int) []byte) []byte {
511 lastMatchEnd
:= 0 // end position of the most recent match
512 searchPos
:= 0 // position where we next look for a match
520 if nmatch
> re
.prog
.NumCap
{
521 nmatch
= re
.prog
.NumCap
525 for searchPos
<= endPos
{
526 a
:= re
.doExecute(nil, bsrc
, src
, searchPos
, nmatch
, dstCap
[:0])
528 break // no more matches
531 // Copy the unmatched characters before this match.
533 buf
= append(buf
, bsrc
[lastMatchEnd
:a
[0]]...)
535 buf
= append(buf
, src
[lastMatchEnd
:a
[0]]...)
538 // Now insert a copy of the replacement string, but not for a
539 // match of the empty string immediately after another match.
540 // (Otherwise, we get double replacement for patterns that
541 // match both empty and nonempty strings.)
542 if a
[1] > lastMatchEnd || a
[0] == 0 {
547 // Advance past this match; always advance at least one character.
550 _
, width
= utf8
.DecodeRune(bsrc
[searchPos
:])
552 _
, width
= utf8
.DecodeRuneInString(src
[searchPos
:])
554 if searchPos
+width
> a
[1] {
556 } else if searchPos
+1 > a
[1] {
557 // This clause is only needed at the end of the input
558 // string. In that case, DecodeRuneInString returns width=0.
565 // Copy the unmatched characters after the last match.
567 buf
= append(buf
, bsrc
[lastMatchEnd
:]...)
569 buf
= append(buf
, src
[lastMatchEnd
:]...)
575 // ReplaceAll returns a copy of src, replacing matches of the Regexp
576 // with the replacement text repl. Inside repl, $ signs are interpreted as
577 // in Expand, so for instance $1 represents the text of the first submatch.
578 func (re
*Regexp
) ReplaceAll(src
, repl
[]byte) []byte {
580 if bytes
.IndexByte(repl
, '$') >= 0 {
581 n
= 2 * (re
.numSubexp
+ 1)
584 b
:= re
.replaceAll(src
, "", n
, func(dst
[]byte, match
[]int) []byte {
585 if len(srepl
) != len(repl
) {
588 return re
.expand(dst
, srepl
, src
, "", match
)
593 // ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp
594 // with the replacement bytes repl. The replacement repl is substituted directly,
595 // without using Expand.
596 func (re
*Regexp
) ReplaceAllLiteral(src
, repl
[]byte) []byte {
597 return re
.replaceAll(src
, "", 2, func(dst
[]byte, match
[]int) []byte {
598 return append(dst
, repl
...)
602 // ReplaceAllFunc returns a copy of src in which all matches of the
603 // Regexp have been replaced by the return value of function repl applied
604 // to the matched byte slice. The replacement returned by repl is substituted
605 // directly, without using Expand.
606 func (re
*Regexp
) ReplaceAllFunc(src
[]byte, repl
func([]byte) []byte) []byte {
607 return re
.replaceAll(src
, "", 2, func(dst
[]byte, match
[]int) []byte {
608 return append(dst
, repl(src
[match
[0]:match
[1]])...)
612 // Bitmap used by func special to check whether a character needs to be escaped.
613 var specialBytes
[16]byte
615 // special reports whether byte b needs to be escaped by QuoteMeta.
616 func special(b
byte) bool {
617 return b
< utf8
.RuneSelf
&& specialBytes
[b%16
]&(1<<(b
/16)) != 0
621 for _
, b
:= range []byte(`\.+*?()|[]{}^$`) {
622 specialBytes
[b%16
] |
= 1 << (b
/ 16)
626 // QuoteMeta returns a string that quotes all regular expression metacharacters
627 // inside the argument text; the returned string is a regular expression matching
628 // the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
629 func QuoteMeta(s
string) string {
630 // A byte loop is correct because all metacharacters are ASCII.
632 for i
= 0; i
< len(s
); i
++ {
637 // No meta characters found, so return original string.
642 b
:= make([]byte, 2*len(s
)-i
)
645 for ; i
< len(s
); i
++ {
656 // The number of capture values in the program may correspond
657 // to fewer capturing expressions than are in the regexp.
658 // For example, "(a){0}" turns into an empty program, so the
659 // maximum capture in the program is 0 but we need to return
660 // an expression for \1. Pad appends -1s to the slice a as needed.
661 func (re
*Regexp
) pad(a
[]int) []int {
666 n
:= (1 + re
.numSubexp
) * 2
673 // Find matches in slice b if b is non-nil, otherwise find matches in string s.
674 func (re
*Regexp
) allMatches(s
string, b
[]byte, n
int, deliver
func([]int)) {
682 for pos
, i
, prevMatchEnd
:= 0, 0, -1; i
< n
&& pos
<= end
; {
683 matches
:= re
.doExecute(nil, b
, s
, pos
, re
.prog
.NumCap
, nil)
684 if len(matches
) == 0 {
689 if matches
[1] == pos
{
690 // We've found an empty match.
691 if matches
[0] == prevMatchEnd
{
692 // We don't allow an empty match right
693 // after a previous match, so ignore it.
699 _
, width
= utf8
.DecodeRuneInString(s
[pos
:end
])
701 _
, width
= utf8
.DecodeRune(b
[pos
:end
])
711 prevMatchEnd
= matches
[1]
714 deliver(re
.pad(matches
))
720 // Find returns a slice holding the text of the leftmost match in b of the regular expression.
721 // A return value of nil indicates no match.
722 func (re
*Regexp
) Find(b
[]byte) []byte {
724 a
:= re
.doExecute(nil, b
, "", 0, 2, dstCap
[:0])
731 // FindIndex returns a two-element slice of integers defining the location of
732 // the leftmost match in b of the regular expression. The match itself is at
734 // A return value of nil indicates no match.
735 func (re
*Regexp
) FindIndex(b
[]byte) (loc
[]int) {
736 a
:= re
.doExecute(nil, b
, "", 0, 2, nil)
743 // FindString returns a string holding the text of the leftmost match in s of the regular
744 // expression. If there is no match, the return value is an empty string,
745 // but it will also be empty if the regular expression successfully matches
746 // an empty string. Use FindStringIndex or FindStringSubmatch if it is
747 // necessary to distinguish these cases.
748 func (re
*Regexp
) FindString(s
string) string {
750 a
:= re
.doExecute(nil, nil, s
, 0, 2, dstCap
[:0])
757 // FindStringIndex returns a two-element slice of integers defining the
758 // location of the leftmost match in s of the regular expression. The match
759 // itself is at s[loc[0]:loc[1]].
760 // A return value of nil indicates no match.
761 func (re
*Regexp
) FindStringIndex(s
string) (loc
[]int) {
762 a
:= re
.doExecute(nil, nil, s
, 0, 2, nil)
769 // FindReaderIndex returns a two-element slice of integers defining the
770 // location of the leftmost match of the regular expression in text read from
771 // the RuneReader. The match text was found in the input stream at
772 // byte offset loc[0] through loc[1]-1.
773 // A return value of nil indicates no match.
774 func (re
*Regexp
) FindReaderIndex(r io
.RuneReader
) (loc
[]int) {
775 a
:= re
.doExecute(r
, nil, "", 0, 2, nil)
782 // FindSubmatch returns a slice of slices holding the text of the leftmost
783 // match of the regular expression in b and the matches, if any, of its
784 // subexpressions, as defined by the 'Submatch' descriptions in the package
786 // A return value of nil indicates no match.
787 func (re
*Regexp
) FindSubmatch(b
[]byte) [][]byte {
789 a
:= re
.doExecute(nil, b
, "", 0, re
.prog
.NumCap
, dstCap
[:0])
793 ret
:= make([][]byte, 1+re
.numSubexp
)
795 if 2*i
< len(a
) && a
[2*i
] >= 0 {
796 ret
[i
] = b
[a
[2*i
]:a
[2*i
+1]]
802 // Expand appends template to dst and returns the result; during the
803 // append, Expand replaces variables in the template with corresponding
804 // matches drawn from src. The match slice should have been returned by
805 // FindSubmatchIndex.
807 // In the template, a variable is denoted by a substring of the form
808 // $name or ${name}, where name is a non-empty sequence of letters,
809 // digits, and underscores. A purely numeric name like $1 refers to
810 // the submatch with the corresponding index; other names refer to
811 // capturing parentheses named with the (?P<name>...) syntax. A
812 // reference to an out of range or unmatched index or a name that is not
813 // present in the regular expression is replaced with an empty slice.
815 // In the $name form, name is taken to be as long as possible: $1x is
816 // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
818 // To insert a literal $ in the output, use $$ in the template.
819 func (re
*Regexp
) Expand(dst
[]byte, template
[]byte, src
[]byte, match
[]int) []byte {
820 return re
.expand(dst
, string(template
), src
, "", match
)
823 // ExpandString is like Expand but the template and source are strings.
824 // It appends to and returns a byte slice in order to give the calling
825 // code control over allocation.
826 func (re
*Regexp
) ExpandString(dst
[]byte, template
string, src
string, match
[]int) []byte {
827 return re
.expand(dst
, template
, nil, src
, match
)
830 func (re
*Regexp
) expand(dst
[]byte, template
string, bsrc
[]byte, src
string, match
[]int) []byte {
831 for len(template
) > 0 {
832 i
:= strings
.Index(template
, "$")
836 dst
= append(dst
, template
[:i
]...)
837 template
= template
[i
:]
838 if len(template
) > 1 && template
[1] == '$' {
840 dst
= append(dst
, '$')
841 template
= template
[2:]
844 name
, num
, rest
, ok
:= extract(template
)
846 // Malformed; treat $ as raw text.
847 dst
= append(dst
, '$')
848 template
= template
[1:]
853 if 2*num
+1 < len(match
) && match
[2*num
] >= 0 {
855 dst
= append(dst
, bsrc
[match
[2*num
]:match
[2*num
+1]]...)
857 dst
= append(dst
, src
[match
[2*num
]:match
[2*num
+1]]...)
861 for i
, namei
:= range re
.subexpNames
{
862 if name
== namei
&& 2*i
+1 < len(match
) && match
[2*i
] >= 0 {
864 dst
= append(dst
, bsrc
[match
[2*i
]:match
[2*i
+1]]...)
866 dst
= append(dst
, src
[match
[2*i
]:match
[2*i
+1]]...)
873 dst
= append(dst
, template
...)
877 // extract returns the name from a leading "$name" or "${name}" in str.
878 // If it is a number, extract returns num set to that number; otherwise num = -1.
879 func extract(str
string) (name
string, num
int, rest
string, ok
bool) {
880 if len(str
) < 2 || str
[0] != '$' {
892 rune
, size
:= utf8
.DecodeRuneInString(str
[i
:])
893 if !unicode
.IsLetter(rune
) && !unicode
.IsDigit(rune
) && rune
!= '_' {
899 // empty name is not okay
904 if i
>= len(str
) || str
[i
] != '}' {
905 // missing closing brace
913 for i
:= 0; i
< len(name
); i
++ {
914 if name
[i
] < '0' ||
'9' < name
[i
] || num
>= 1e8
{
918 num
= num
*10 + int(name
[i
]) - '0'
920 // Disallow leading zeros.
921 if name
[0] == '0' && len(name
) > 1 {
930 // FindSubmatchIndex returns a slice holding the index pairs identifying the
931 // leftmost match of the regular expression in b and the matches, if any, of
932 // its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
933 // in the package comment.
934 // A return value of nil indicates no match.
935 func (re
*Regexp
) FindSubmatchIndex(b
[]byte) []int {
936 return re
.pad(re
.doExecute(nil, b
, "", 0, re
.prog
.NumCap
, nil))
939 // FindStringSubmatch returns a slice of strings holding the text of the
940 // leftmost match of the regular expression in s and the matches, if any, of
941 // its subexpressions, as defined by the 'Submatch' description in the
943 // A return value of nil indicates no match.
944 func (re
*Regexp
) FindStringSubmatch(s
string) []string {
946 a
:= re
.doExecute(nil, nil, s
, 0, re
.prog
.NumCap
, dstCap
[:0])
950 ret
:= make([]string, 1+re
.numSubexp
)
952 if 2*i
< len(a
) && a
[2*i
] >= 0 {
953 ret
[i
] = s
[a
[2*i
]:a
[2*i
+1]]
959 // FindStringSubmatchIndex returns a slice holding the index pairs
960 // identifying the leftmost match of the regular expression in s and the
961 // matches, if any, of its subexpressions, as defined by the 'Submatch' and
962 // 'Index' descriptions in the package comment.
963 // A return value of nil indicates no match.
964 func (re
*Regexp
) FindStringSubmatchIndex(s
string) []int {
965 return re
.pad(re
.doExecute(nil, nil, s
, 0, re
.prog
.NumCap
, nil))
968 // FindReaderSubmatchIndex returns a slice holding the index pairs
969 // identifying the leftmost match of the regular expression of text read by
970 // the RuneReader, and the matches, if any, of its subexpressions, as defined
971 // by the 'Submatch' and 'Index' descriptions in the package comment. A
972 // return value of nil indicates no match.
973 func (re
*Regexp
) FindReaderSubmatchIndex(r io
.RuneReader
) []int {
974 return re
.pad(re
.doExecute(r
, nil, "", 0, re
.prog
.NumCap
, nil))
977 const startSize
= 10 // The size at which to start a slice in the 'All' routines.
979 // FindAll is the 'All' version of Find; it returns a slice of all successive
980 // matches of the expression, as defined by the 'All' description in the
982 // A return value of nil indicates no match.
983 func (re
*Regexp
) FindAll(b
[]byte, n
int) [][]byte {
987 result
:= make([][]byte, 0, startSize
)
988 re
.allMatches("", b
, n
, func(match
[]int) {
989 result
= append(result
, b
[match
[0]:match
[1]])
991 if len(result
) == 0 {
997 // FindAllIndex is the 'All' version of FindIndex; it returns a slice of all
998 // successive matches of the expression, as defined by the 'All' description
999 // in the package comment.
1000 // A return value of nil indicates no match.
1001 func (re
*Regexp
) FindAllIndex(b
[]byte, n
int) [][]int {
1005 result
:= make([][]int, 0, startSize
)
1006 re
.allMatches("", b
, n
, func(match
[]int) {
1007 result
= append(result
, match
[0:2])
1009 if len(result
) == 0 {
1015 // FindAllString is the 'All' version of FindString; it returns a slice of all
1016 // successive matches of the expression, as defined by the 'All' description
1017 // in the package comment.
1018 // A return value of nil indicates no match.
1019 func (re
*Regexp
) FindAllString(s
string, n
int) []string {
1023 result
:= make([]string, 0, startSize
)
1024 re
.allMatches(s
, nil, n
, func(match
[]int) {
1025 result
= append(result
, s
[match
[0]:match
[1]])
1027 if len(result
) == 0 {
1033 // FindAllStringIndex is the 'All' version of FindStringIndex; it returns a
1034 // slice of all successive matches of the expression, as defined by the 'All'
1035 // description in the package comment.
1036 // A return value of nil indicates no match.
1037 func (re
*Regexp
) FindAllStringIndex(s
string, n
int) [][]int {
1041 result
:= make([][]int, 0, startSize
)
1042 re
.allMatches(s
, nil, n
, func(match
[]int) {
1043 result
= append(result
, match
[0:2])
1045 if len(result
) == 0 {
1051 // FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice
1052 // of all successive matches of the expression, as defined by the 'All'
1053 // description in the package comment.
1054 // A return value of nil indicates no match.
1055 func (re
*Regexp
) FindAllSubmatch(b
[]byte, n
int) [][][]byte {
1059 result
:= make([][][]byte, 0, startSize
)
1060 re
.allMatches("", b
, n
, func(match
[]int) {
1061 slice
:= make([][]byte, len(match
)/2)
1062 for j
:= range slice
{
1063 if match
[2*j
] >= 0 {
1064 slice
[j
] = b
[match
[2*j
]:match
[2*j
+1]]
1067 result
= append(result
, slice
)
1069 if len(result
) == 0 {
1075 // FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns
1076 // a slice of all successive matches of the expression, as defined by the
1077 // 'All' description in the package comment.
1078 // A return value of nil indicates no match.
1079 func (re
*Regexp
) FindAllSubmatchIndex(b
[]byte, n
int) [][]int {
1083 result
:= make([][]int, 0, startSize
)
1084 re
.allMatches("", b
, n
, func(match
[]int) {
1085 result
= append(result
, match
)
1087 if len(result
) == 0 {
1093 // FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it
1094 // returns a slice of all successive matches of the expression, as defined by
1095 // the 'All' description in the package comment.
1096 // A return value of nil indicates no match.
1097 func (re
*Regexp
) FindAllStringSubmatch(s
string, n
int) [][]string {
1101 result
:= make([][]string, 0, startSize
)
1102 re
.allMatches(s
, nil, n
, func(match
[]int) {
1103 slice
:= make([]string, len(match
)/2)
1104 for j
:= range slice
{
1105 if match
[2*j
] >= 0 {
1106 slice
[j
] = s
[match
[2*j
]:match
[2*j
+1]]
1109 result
= append(result
, slice
)
1111 if len(result
) == 0 {
1117 // FindAllStringSubmatchIndex is the 'All' version of
1118 // FindStringSubmatchIndex; it returns a slice of all successive matches of
1119 // the expression, as defined by the 'All' description in the package
1121 // A return value of nil indicates no match.
1122 func (re
*Regexp
) FindAllStringSubmatchIndex(s
string, n
int) [][]int {
1126 result
:= make([][]int, 0, startSize
)
1127 re
.allMatches(s
, nil, n
, func(match
[]int) {
1128 result
= append(result
, match
)
1130 if len(result
) == 0 {
1136 // Split slices s into substrings separated by the expression and returns a slice of
1137 // the substrings between those expression matches.
1139 // The slice returned by this method consists of all the substrings of s
1140 // not contained in the slice returned by FindAllString. When called on an expression
1141 // that contains no metacharacters, it is equivalent to strings.SplitN.
1144 // s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
1145 // // s: ["", "b", "b", "c", "cadaaae"]
1147 // The count determines the number of substrings to return:
1148 // n > 0: at most n substrings; the last substring will be the unsplit remainder.
1149 // n == 0: the result is nil (zero substrings)
1150 // n < 0: all substrings
1151 func (re
*Regexp
) Split(s
string, n
int) []string {
1157 if len(re
.expr
) > 0 && len(s
) == 0 {
1161 matches
:= re
.FindAllStringIndex(s
, n
)
1162 strings
:= make([]string, 0, len(matches
))
1166 for _
, match
:= range matches
{
1167 if n
> 0 && len(strings
) >= n
-1 {
1173 strings
= append(strings
, s
[beg
:end
])
1179 strings
= append(strings
, s
[beg
:])