store resultPointers instead of entire results, re-use capnproto buffer
[debiancodesearch.git] / regexp / regexp_test.go
blob08bde8dd6c3f6cdc5f5eb465a2ee377d5412e2a3
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package regexp
7 import (
8 "bytes"
9 "reflect"
10 "strings"
11 "testing"
14 var nstateTests = []struct {
15 q []uint32
16 partial rune
18 {[]uint32{1, 2, 3}, 1},
19 {[]uint32{1}, 1},
20 {[]uint32{}, 0},
21 {[]uint32{1, 2, 8}, 0x10FFF},
24 func TestNstateEnc(t *testing.T) {
25 var n1, n2 nstate
26 n1.q.Init(10)
27 n2.q.Init(10)
28 for _, tt := range nstateTests {
29 n1.q.Reset()
30 n1.partial = tt.partial
31 for _, id := range tt.q {
32 n1.q.Add(id)
34 enc := n1.enc()
35 n2.dec(enc)
36 if n2.partial != n1.partial || !reflect.DeepEqual(n1.q.Dense(), n2.q.Dense()) {
37 t.Errorf("%v.enc.dec = %v", &n1, &n2)
42 var matchTests = []struct {
43 re string
44 s string
45 m []int
47 // Adapted from go/src/pkg/regexp/find_test.go.
48 {`a+`, "abc\ndef\nghi\n", []int{1}},
49 {``, ``, []int{1}},
50 {`^abcdefg`, "abcdefg", []int{1}},
51 {`a+`, "baaab", []int{1}},
52 {"abcd..", "abcdef", []int{1}},
53 {`a`, "a", []int{1}},
54 {`x`, "y", nil},
55 {`b`, "abc", []int{1}},
56 {`.`, "a", []int{1}},
57 {`.*`, "abcdef", []int{1}},
58 {`^`, "abcde", []int{1}},
59 {`$`, "abcde", []int{1}},
60 {`^abcd$`, "abcd", []int{1}},
61 {`^bcd'`, "abcdef", nil},
62 {`^abcd$`, "abcde", nil},
63 {`a+`, "baaab", []int{1}},
64 {`a*`, "baaab", []int{1}},
65 {`[a-z]+`, "abcd", []int{1}},
66 {`[^a-z]+`, "ab1234cd", []int{1}},
67 {`[a\-\]z]+`, "az]-bcz", []int{1}},
68 {`[^\n]+`, "abcd\n", []int{1}},
69 {`[日本語]+`, "日本語日本語", []int{1}},
70 {`日本語+`, "日本語", []int{1}},
71 {`日本語+`, "日本語語語語", []int{1}},
72 {`()`, "", []int{1}},
73 {`(a)`, "a", []int{1}},
74 {`(.)(.)`, "日a", []int{1}},
75 {`(.*)`, "", []int{1}},
76 {`(.*)`, "abcd", []int{1}},
77 {`(..)(..)`, "abcd", []int{1}},
78 {`(([^xyz]*)(d))`, "abcd", []int{1}},
79 {`((a|b|c)*(d))`, "abcd", []int{1}},
80 {`(((a|b|c)*)(d))`, "abcd", []int{1}},
81 {`\a\f\r\t\v`, "\a\f\r\t\v", []int{1}},
82 {`[\a\f\n\r\t\v]+`, "\a\f\r\t\v", []int{1}},
84 {`a*(|(b))c*`, "aacc", []int{1}},
85 {`(.*).*`, "ab", []int{1}},
86 {`[.]`, ".", []int{1}},
87 {`/$`, "/abc/", []int{1}},
88 {`/$`, "/abc", nil},
90 // multiple matches
91 {`.`, "abc", []int{1}},
92 {`(.)`, "abc", []int{1}},
93 {`.(.)`, "abcd", []int{1}},
94 {`ab*`, "abbaab", []int{1}},
95 {`a(b*)`, "abbaab", []int{1}},
97 // fixed bugs
98 {`ab$`, "cab", []int{1}},
99 {`axxb$`, "axxcb", nil},
100 {`data`, "daXY data", []int{1}},
101 {`da(.)a$`, "daXY data", []int{1}},
102 {`zx+`, "zzx", []int{1}},
103 {`ab$`, "abcab", []int{1}},
104 {`(aa)*$`, "a", []int{1}},
105 {`(?:.|(?:.a))`, "", nil},
106 {`(?:A(?:A|a))`, "Aa", []int{1}},
107 {`(?:A|(?:A|a))`, "a", []int{1}},
108 {`(a){0}`, "", []int{1}},
109 // {`(?-s)(?:(?:^).)`, "\n", nil},
110 // {`(?s)(?:(?:^).)`, "\n", []int{1}},
111 // {`(?:(?:^).)`, "\n", nil},
112 {`\b`, "x", []int{1}},
113 {`\b`, "xx", []int{1}},
114 {`\b`, "x y", []int{1}},
115 {`\b`, "xx yy", []int{1}},
116 {`\B`, "x", nil},
117 {`\B`, "xx", []int{1}},
118 {`\B`, "x y", nil},
119 {`\B`, "xx yy", []int{1}},
120 {`(?im)^[abc]+$`, "abcABC", []int{1}},
121 {`(?im)^[α]+$`, "αΑ", []int{1}},
122 {`[Aa]BC`, "abc", nil},
123 {`[Aa]bc`, "abc", []int{1}},
125 // RE2 tests
126 {`[^\S\s]`, "abcd", nil},
127 {`[^\S[:space:]]`, "abcd", nil},
128 {`[^\D\d]`, "abcd", nil},
129 {`[^\D[:digit:]]`, "abcd", nil},
130 {`(?i)\W`, "x", nil},
131 {`(?i)\W`, "k", nil},
132 {`(?i)\W`, "s", nil},
134 // can backslash-escape any punctuation
135 {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
136 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, []int{1}},
137 {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
138 `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, []int{1}},
139 {"\\`", "`", []int{1}},
140 {"[\\`]+", "`", []int{1}},
142 // long set of matches (longer than startSize)
144 ".",
145 "qwertyuiopasdfghjklzxcvbnm1234567890",
146 []int{1},
150 func TestMatch(t *testing.T) {
151 for _, tt := range matchTests {
152 re, err := Compile("(?m)" + tt.re)
153 if err != nil {
154 t.Errorf("Compile(%#q): %v", tt.re, err)
155 continue
157 b := []byte(tt.s)
158 lines := grep(re, b)
159 if !reflect.DeepEqual(lines, tt.m) {
160 t.Errorf("grep(%#q, %q) = %v, want %v", tt.re, tt.s, lines, tt.m)
165 func grep(re *Regexp, b []byte) []int {
166 var m []int
167 lineno := 1
168 for {
169 i := re.Match(b, true, true)
170 if i < 0 {
171 break
173 start := bytes.LastIndex(b[:i], nl) + 1
174 end := i + 1
175 if end > len(b) {
176 end = len(b)
178 lineno += bytes.Count(b[:start], nl)
179 m = append(m, lineno)
180 if start < end && b[end-1] == '\n' {
181 lineno++
183 b = b[end:]
184 if len(b) == 0 {
185 break
188 return m
191 var grepTests = []struct {
192 re string
193 s string
194 out string
195 err string
196 g Grep
198 {re: `a+`, s: "abc\ndef\nghalloo\n", out: "input:abc\ninput:ghalloo\n"},
199 {re: `x.*y`, s: "xay\nxa\ny\n", out: "input:xay\n"},
202 func TestGrep(t *testing.T) {
203 for i, tt := range grepTests {
204 re, err := Compile("(?m)" + tt.re)
205 if err != nil {
206 t.Errorf("Compile(%#q): %v", tt.re, err)
207 continue
209 g := tt.g
210 g.Regexp = re
211 var out, errb bytes.Buffer
212 g.Stdout = &out
213 g.Stderr = &errb
214 g.Reader(strings.NewReader(tt.s), "input")
215 if out.String() != tt.out || errb.String() != tt.err {
216 t.Errorf("#%d: grep(%#q, %q) = %q, %q, want %q, %q", i, tt.re, tt.s, out.String(), errb.String(), tt.out, tt.err)