Handle arithmetic on eliminated address indices [PR116413]
[official-gcc.git] / libgo / go / regexp / exec.go
blob4411e4c3e61188d1345c599c7ac2c5fad561b4ca
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package regexp
7 import (
8 "io"
9 "regexp/syntax"
10 "sync"
13 // A queue is a 'sparse array' holding pending threads of execution.
14 // See https://research.swtch.com/2008/03/using-uninitialized-memory-for-fun-and.html
15 type queue struct {
16 sparse []uint32
17 dense []entry
20 // An entry is an entry on a queue.
21 // It holds both the instruction pc and the actual thread.
22 // Some queue entries are just place holders so that the machine
23 // knows it has considered that pc. Such entries have t == nil.
24 type entry struct {
25 pc uint32
26 t *thread
29 // A thread is the state of a single path through the machine:
30 // an instruction and a corresponding capture array.
31 // See https://swtch.com/~rsc/regexp/regexp2.html
32 type thread struct {
33 inst *syntax.Inst
34 cap []int
37 // A machine holds all the state during an NFA simulation for p.
38 type machine struct {
39 re *Regexp // corresponding Regexp
40 p *syntax.Prog // compiled program
41 q0, q1 queue // two queues for runq, nextq
42 pool []*thread // pool of available threads
43 matched bool // whether a match was found
44 matchcap []int // capture information for the match
46 inputs inputs
49 type inputs struct {
50 // cached inputs, to avoid allocation
51 bytes inputBytes
52 string inputString
53 reader inputReader
56 func (i *inputs) newBytes(b []byte) input {
57 i.bytes.str = b
58 return &i.bytes
61 func (i *inputs) newString(s string) input {
62 i.string.str = s
63 return &i.string
66 func (i *inputs) newReader(r io.RuneReader) input {
67 i.reader.r = r
68 i.reader.atEOT = false
69 i.reader.pos = 0
70 return &i.reader
73 func (i *inputs) clear() {
74 // We need to clear 1 of these.
75 // Avoid the expense of clearing the others (pointer write barrier).
76 if i.bytes.str != nil {
77 i.bytes.str = nil
78 } else if i.reader.r != nil {
79 i.reader.r = nil
80 } else {
81 i.string.str = ""
85 func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) {
86 if r != nil {
87 return i.newReader(r), 0
89 if b != nil {
90 return i.newBytes(b), len(b)
92 return i.newString(s), len(s)
95 func (m *machine) init(ncap int) {
96 for _, t := range m.pool {
97 t.cap = t.cap[:ncap]
99 m.matchcap = m.matchcap[:ncap]
102 // alloc allocates a new thread with the given instruction.
103 // It uses the free pool if possible.
104 func (m *machine) alloc(i *syntax.Inst) *thread {
105 var t *thread
106 if n := len(m.pool); n > 0 {
107 t = m.pool[n-1]
108 m.pool = m.pool[:n-1]
109 } else {
110 t = new(thread)
111 t.cap = make([]int, len(m.matchcap), cap(m.matchcap))
113 t.inst = i
114 return t
117 // A lazyFlag is a lazily-evaluated syntax.EmptyOp,
118 // for checking zero-width flags like ^ $ \A \z \B \b.
119 // It records the pair of relevant runes and does not
120 // determine the implied flags until absolutely necessary
121 // (most of the time, that means never).
122 type lazyFlag uint64
124 func newLazyFlag(r1, r2 rune) lazyFlag {
125 return lazyFlag(uint64(r1)<<32 | uint64(uint32(r2)))
128 func (f lazyFlag) match(op syntax.EmptyOp) bool {
129 if op == 0 {
130 return true
132 r1 := rune(f >> 32)
133 if op&syntax.EmptyBeginLine != 0 {
134 if r1 != '\n' && r1 >= 0 {
135 return false
137 op &^= syntax.EmptyBeginLine
139 if op&syntax.EmptyBeginText != 0 {
140 if r1 >= 0 {
141 return false
143 op &^= syntax.EmptyBeginText
145 if op == 0 {
146 return true
148 r2 := rune(f)
149 if op&syntax.EmptyEndLine != 0 {
150 if r2 != '\n' && r2 >= 0 {
151 return false
153 op &^= syntax.EmptyEndLine
155 if op&syntax.EmptyEndText != 0 {
156 if r2 >= 0 {
157 return false
159 op &^= syntax.EmptyEndText
161 if op == 0 {
162 return true
164 if syntax.IsWordChar(r1) != syntax.IsWordChar(r2) {
165 op &^= syntax.EmptyWordBoundary
166 } else {
167 op &^= syntax.EmptyNoWordBoundary
169 return op == 0
172 // match runs the machine over the input starting at pos.
173 // It reports whether a match was found.
174 // If so, m.matchcap holds the submatch information.
175 func (m *machine) match(i input, pos int) bool {
176 startCond := m.re.cond
177 if startCond == ^syntax.EmptyOp(0) { // impossible
178 return false
180 m.matched = false
181 for i := range m.matchcap {
182 m.matchcap[i] = -1
184 runq, nextq := &m.q0, &m.q1
185 r, r1 := endOfText, endOfText
186 width, width1 := 0, 0
187 r, width = i.step(pos)
188 if r != endOfText {
189 r1, width1 = i.step(pos + width)
191 var flag lazyFlag
192 if pos == 0 {
193 flag = newLazyFlag(-1, r)
194 } else {
195 flag = i.context(pos)
197 for {
198 if len(runq.dense) == 0 {
199 if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
200 // Anchored match, past beginning of text.
201 break
203 if m.matched {
204 // Have match; finished exploring alternatives.
205 break
207 if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() {
208 // Match requires literal prefix; fast search for it.
209 advance := i.index(m.re, pos)
210 if advance < 0 {
211 break
213 pos += advance
214 r, width = i.step(pos)
215 r1, width1 = i.step(pos + width)
218 if !m.matched {
219 if len(m.matchcap) > 0 {
220 m.matchcap[0] = pos
222 m.add(runq, uint32(m.p.Start), pos, m.matchcap, &flag, nil)
224 flag = newLazyFlag(r, r1)
225 m.step(runq, nextq, pos, pos+width, r, &flag)
226 if width == 0 {
227 break
229 if len(m.matchcap) == 0 && m.matched {
230 // Found a match and not paying attention
231 // to where it is, so any match will do.
232 break
234 pos += width
235 r, width = r1, width1
236 if r != endOfText {
237 r1, width1 = i.step(pos + width)
239 runq, nextq = nextq, runq
241 m.clear(nextq)
242 return m.matched
245 // clear frees all threads on the thread queue.
246 func (m *machine) clear(q *queue) {
247 for _, d := range q.dense {
248 if d.t != nil {
249 m.pool = append(m.pool, d.t)
252 q.dense = q.dense[:0]
255 // step executes one step of the machine, running each of the threads
256 // on runq and appending new threads to nextq.
257 // The step processes the rune c (which may be endOfText),
258 // which starts at position pos and ends at nextPos.
259 // nextCond gives the setting for the empty-width flags after c.
260 func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond *lazyFlag) {
261 longest := m.re.longest
262 for j := 0; j < len(runq.dense); j++ {
263 d := &runq.dense[j]
264 t := d.t
265 if t == nil {
266 continue
268 if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
269 m.pool = append(m.pool, t)
270 continue
272 i := t.inst
273 add := false
274 switch i.Op {
275 default:
276 panic("bad inst")
278 case syntax.InstMatch:
279 if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) {
280 t.cap[1] = pos
281 copy(m.matchcap, t.cap)
283 if !longest {
284 // First-match mode: cut off all lower-priority threads.
285 for _, d := range runq.dense[j+1:] {
286 if d.t != nil {
287 m.pool = append(m.pool, d.t)
290 runq.dense = runq.dense[:0]
292 m.matched = true
294 case syntax.InstRune:
295 add = i.MatchRune(c)
296 case syntax.InstRune1:
297 add = c == i.Rune[0]
298 case syntax.InstRuneAny:
299 add = true
300 case syntax.InstRuneAnyNotNL:
301 add = c != '\n'
303 if add {
304 t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t)
306 if t != nil {
307 m.pool = append(m.pool, t)
310 runq.dense = runq.dense[:0]
313 // add adds an entry to q for pc, unless the q already has such an entry.
314 // It also recursively adds an entry for all instructions reachable from pc by following
315 // empty-width conditions satisfied by cond. pos gives the current position
316 // in the input.
317 func (m *machine) add(q *queue, pc uint32, pos int, cap []int, cond *lazyFlag, t *thread) *thread {
318 Again:
319 if pc == 0 {
320 return t
322 if j := q.sparse[pc]; j < uint32(len(q.dense)) && q.dense[j].pc == pc {
323 return t
326 j := len(q.dense)
327 q.dense = q.dense[:j+1]
328 d := &q.dense[j]
329 d.t = nil
330 d.pc = pc
331 q.sparse[pc] = uint32(j)
333 i := &m.p.Inst[pc]
334 switch i.Op {
335 default:
336 panic("unhandled")
337 case syntax.InstFail:
338 // nothing
339 case syntax.InstAlt, syntax.InstAltMatch:
340 t = m.add(q, i.Out, pos, cap, cond, t)
341 pc = i.Arg
342 goto Again
343 case syntax.InstEmptyWidth:
344 if cond.match(syntax.EmptyOp(i.Arg)) {
345 pc = i.Out
346 goto Again
348 case syntax.InstNop:
349 pc = i.Out
350 goto Again
351 case syntax.InstCapture:
352 if int(i.Arg) < len(cap) {
353 opos := cap[i.Arg]
354 cap[i.Arg] = pos
355 m.add(q, i.Out, pos, cap, cond, nil)
356 cap[i.Arg] = opos
357 } else {
358 pc = i.Out
359 goto Again
361 case syntax.InstMatch, syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
362 if t == nil {
363 t = m.alloc(i)
364 } else {
365 t.inst = i
367 if len(cap) > 0 && &t.cap[0] != &cap[0] {
368 copy(t.cap, cap)
370 d.t = t
371 t = nil
373 return t
376 type onePassMachine struct {
377 inputs inputs
378 matchcap []int
381 var onePassPool sync.Pool
383 func newOnePassMachine() *onePassMachine {
384 m, ok := onePassPool.Get().(*onePassMachine)
385 if !ok {
386 m = new(onePassMachine)
388 return m
391 func freeOnePassMachine(m *onePassMachine) {
392 m.inputs.clear()
393 onePassPool.Put(m)
396 // doOnePass implements r.doExecute using the one-pass execution engine.
397 func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap int, dstCap []int) []int {
398 startCond := re.cond
399 if startCond == ^syntax.EmptyOp(0) { // impossible
400 return nil
403 m := newOnePassMachine()
404 if cap(m.matchcap) < ncap {
405 m.matchcap = make([]int, ncap)
406 } else {
407 m.matchcap = m.matchcap[:ncap]
410 matched := false
411 for i := range m.matchcap {
412 m.matchcap[i] = -1
415 i, _ := m.inputs.init(ir, ib, is)
417 r, r1 := endOfText, endOfText
418 width, width1 := 0, 0
419 r, width = i.step(pos)
420 if r != endOfText {
421 r1, width1 = i.step(pos + width)
423 var flag lazyFlag
424 if pos == 0 {
425 flag = newLazyFlag(-1, r)
426 } else {
427 flag = i.context(pos)
429 pc := re.onepass.Start
430 inst := re.onepass.Inst[pc]
431 // If there is a simple literal prefix, skip over it.
432 if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) &&
433 len(re.prefix) > 0 && i.canCheckPrefix() {
434 // Match requires literal prefix; fast search for it.
435 if !i.hasPrefix(re) {
436 goto Return
438 pos += len(re.prefix)
439 r, width = i.step(pos)
440 r1, width1 = i.step(pos + width)
441 flag = i.context(pos)
442 pc = int(re.prefixEnd)
444 for {
445 inst = re.onepass.Inst[pc]
446 pc = int(inst.Out)
447 switch inst.Op {
448 default:
449 panic("bad inst")
450 case syntax.InstMatch:
451 matched = true
452 if len(m.matchcap) > 0 {
453 m.matchcap[0] = 0
454 m.matchcap[1] = pos
456 goto Return
457 case syntax.InstRune:
458 if !inst.MatchRune(r) {
459 goto Return
461 case syntax.InstRune1:
462 if r != inst.Rune[0] {
463 goto Return
465 case syntax.InstRuneAny:
466 // Nothing
467 case syntax.InstRuneAnyNotNL:
468 if r == '\n' {
469 goto Return
471 // peek at the input rune to see which branch of the Alt to take
472 case syntax.InstAlt, syntax.InstAltMatch:
473 pc = int(onePassNext(&inst, r))
474 continue
475 case syntax.InstFail:
476 goto Return
477 case syntax.InstNop:
478 continue
479 case syntax.InstEmptyWidth:
480 if !flag.match(syntax.EmptyOp(inst.Arg)) {
481 goto Return
483 continue
484 case syntax.InstCapture:
485 if int(inst.Arg) < len(m.matchcap) {
486 m.matchcap[inst.Arg] = pos
488 continue
490 if width == 0 {
491 break
493 flag = newLazyFlag(r, r1)
494 pos += width
495 r, width = r1, width1
496 if r != endOfText {
497 r1, width1 = i.step(pos + width)
501 Return:
502 if !matched {
503 freeOnePassMachine(m)
504 return nil
507 dstCap = append(dstCap, m.matchcap...)
508 freeOnePassMachine(m)
509 return dstCap
512 // doMatch reports whether either r, b or s match the regexp.
513 func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
514 return re.doExecute(r, b, s, 0, 0, nil) != nil
517 // doExecute finds the leftmost match in the input, appends the position
518 // of its subexpressions to dstCap and returns dstCap.
520 // nil is returned if no matches are found and non-nil if matches are found.
521 func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
522 if dstCap == nil {
523 // Make sure 'return dstCap' is non-nil.
524 dstCap = arrayNoInts[:0:0]
527 if r == nil && len(b)+len(s) < re.minInputLen {
528 return nil
531 if re.onepass != nil {
532 return re.doOnePass(r, b, s, pos, ncap, dstCap)
534 if r == nil && len(b)+len(s) < re.maxBitStateLen {
535 return re.backtrack(b, s, pos, ncap, dstCap)
538 m := re.get()
539 i, _ := m.inputs.init(r, b, s)
541 m.init(ncap)
542 if !m.match(i, pos) {
543 re.put(m)
544 return nil
547 dstCap = append(dstCap, m.matchcap...)
548 re.put(m)
549 return dstCap
552 // arrayNoInts is returned by doExecute match if nil dstCap is passed
553 // to it with ncap=0.
554 var arrayNoInts [0]int