1 # Future self: the delimiter is an ASCII vertical bar, which is also a
2 # REGEX special character, but hadn't already been used. Nearly every
3 # other plain ASCII character had been used by a test. Characters
4 # outside the plain ASCII range have a risk of being mangled by modern
5 # editors. So, avoid using | in a test, or if needed, select a new
7 # 2.8.2 Regular Expression General Requirement
10 7|9|A#*::|A:A#:qA::qA#::qA##::q|
11 1|5|A#*::|A##::A#::qA::qA#:q|
12 # 2.8.3.1.2 BRE Special Characters
20 7|11|X\*Y\*8|Y*8X*8X*Y*8|
41 -1|-1|^*5<*9|5<9*5<*9|
45 1|6|A\(**9\)=|A***9=79|
51 ### GA113(2) GNU regex implements GA113(1)
53 ##-1|-1|\(^*ab\)|^*ab|
75 2|6|A\([34]$[34]\)B|XA4$3BY|
76 # 2.8.3.1.3 Periods in BREs
84 # 2.8.3.2 RE Bracket Expression
101 16|16|[^a-zA-Z0-9]|pqrstVWXYZ23579#|
105 3|3|[][.-.]-0]|ab0-]|
111 9|9|[^]]\{1,\}|]]]]]]]]X|
112 -1|-1|[^]]\{1,\}|]]]]]]]]|
115 2|8|[a-z]*[[.].]][A-Z]*|Abcd]DEFg|
121 3|3|[][.-.]-0]|ab0-]|
125 -2|-2|[[.ab.][.CD.][.EF.]]|yZabCDEFQ9|
132 #W the expected result for [[:alnum:]]* is 2-7 which is wrong
133 0|0|[[:alnum:]]*| aB28gH|
134 2|7|[[:alnum:]][[:alnum:]]*| aB28gH|
135 #W the expected result for [^[:alnum:]]* is 2-5 which is wrong
136 0|0|[^[:alnum:]]*|2 ,
\x7fa|
137 2|5|[^[:alnum:]][^[:alnum:]]*|2 ,
\x7fa|
138 #W the expected result for [[:alpha:]]* is 2-5 which is wrong
139 0|0|[[:alpha:]]*| aBgH2|
140 2|5|[[:alpha:]][[:alpha:]]*| aBgH2|
141 1|6|[^[:alpha:]]*|2 8,
\x7fa|
142 1|2|[[:blank:]]*|
\r\x7f|
143 1|8|[^[:blank:]]*|aB28gH,
\x7f |
144 1|2|[[:cntrl:]]*|
\x7f |
145 1|8|[^[:cntrl:]]*|aB2 8gh,|
146 #W the expected result for [[:digit:]]* is 2-3 which is wrong
147 0|0|[[:digit:]]*|a28|
148 2|3|[[:digit:]][[:digit:]]*|a28|
149 1|8|[^[:digit:]]*|aB gH,
\x7f|
150 1|7|[[:graph:]]*|aB28gH, |
151 1|3|[^[:graph:]]*|
\x7f,|
152 1|2|[[:lower:]]*|agB|
153 1|8|[^[:lower:]]*|B2 8H,
\x7fa|
154 1|8|[[:print:]]*|aB2 8gH, |
155 1|2|[^[:print:]]*|
\x7f |
156 #W the expected result for [[:punct:]]* is 2-2 which is wrong
157 0|0|[[:punct:]]*|a,2|
158 2|3|[[:punct:]][[:punct:]]*|a,,2|
159 1|9|[^[:punct:]]*|aB2 8gH
\x7f|
160 1|3|[[:space:]]*|
\r\x7f|
161 #W the expected result for [^[:space:]]* is 2-9 which is wrong
162 0|0|[^[:space:]]*| aB28gH,
\x7f |
163 2|9|[^[:space:]][^[:space:]]*| aB28gH,
\x7f |
164 #W the expected result for [[:upper:]]* is 2-3 which is wrong
165 0|0|[[:upper:]]*|aBH2|
166 2|3|[[:upper:]][[:upper:]]*|aBH2|
167 1|8|[^[:upper:]]*|a2 8g,
\x7fB|
168 #W the expected result for [[:xdigit:]]* is 2-5 which is wrong
169 0|0|[[:xdigit:]]*|gaB28h|
170 2|5|[[:xdigit:]][[:xdigit:]]*|gaB28h|
171 #W the expected result for [^[:xdigit:]]* is 2-7 which is wrong
172 2|7|[^[:xdigit:]][^[:xdigit:]]*|a gH,
\x7f2|
184 3|3|[][.-.]-0]|ab0-]|
186 2|6|bc[d-w]xy|abchxyz|
189 -1|-1|[a-ce-f]|dBCCdE|
190 2|4|b[n-zA-M]Y|absY9Z|
191 2|4|b[n-zA-M]Y|abGY9Z|
202 2|4|a[^ac-]c|5abcde-|
206 4|6|X[^---]Y|X-YXaYXbY|
207 # 2.8.3.3 BREs Matching Multiple Characters
211 -1|-1|[a-c][e-f]|abcdef|
212 3|4|[a-c][e-f]|acbedf|
213 4|8|abc*XYZ|890abXYZ#*|
214 4|9|abc*XYZ|890abcXYZ#*|
215 4|15|abc*XYZ|890abcccccccXYZ#*|
216 -1|-1|abc*XYZ|890abc*XYZ#*|
220 1|10|\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)|abcdefghijk|
221 3|8|43\(2\(6\)*0\)AB|654320ABCD|
222 3|9|43\(2\(7\)*0\)AB|6543270ABCD|
223 3|12|43\(2\(7\)*0\)AB|6543277770ABCD|
225 1|10|\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)|abcdefghijk|
226 -1|-1|\(a\(b\(c\(d\(e\(f\(g\)h\(i\(k\)\)\)\)\)\)\)\)|abcdefghijk|
230 1|6|a\(.*b\)|ababbbc|
232 1|20|a\(.*b\)c|axcaxbbbcsxbbbbbbbbc|
234 1|7|\(a\(b\(c\(d\(e\)\)\)\)\)\4|abcdededede|
235 #W POSIX does not really specify whether a\(b\)*c\1 matches acb.
236 #W back references are supposed to expand to the last match, but what
237 #W if there never was a match as in this case?
238 -1|-1|a\(b\)*c\1|acb|
239 1|11|\(a\(b\(c\(d\(e\(f\(g\)h\(i\(j\)\)\)\)\)\)\)\)\9|abcdefghijjk|
241 #W These two tests have the same problem as the test in GA135. No match
242 #W of a subexpression, why should the back reference be usable?
243 #W 1 2 a\(b\)*c\1 acb
244 #W 4 7 a\(b\(c\(d\(f\)*\)\)\)\4|xYzabcdePQRST
245 -1|-1|a\(b\)*c\1|acb|
246 -1|-1|a\(b\(c\(d\(f\)*\)\)\)\4|xYzabcdePQRST|
248 -2|-2|\(a\(b\)\)\3|foo|
249 -2|-2|\(a\(b\)\)\(a\(b\)\)\5|foo|
252 1|10|a.*b|abababvbabc|
253 2|5|b*c|abbbcdeabbbbbbcde|
254 2|5|bbb*c|abbbcdeabbbbbbcde|
255 1|5|a\(b\)*c\1|abbcbbb|
256 -1|-1|a\(b\)*c\1|abbdbd|
257 0|0|\([a-c]*\)\1|abcacdef|
258 1|6|\([a-c]*\)\1|abcabcabcd|
263 1|7|\([a-c]*\)\{0,\}|aabcaab|
264 1|2|\(a\)\1\{1,2\}|aabc|
265 1|3|\(a\)\1\{1,2\}|aaaabc|
266 #W the expression \(\(a\)\1\)\{1,2\} is ill-formed, using \2
267 1|4|\(\(a\)\2\)\{1,2\}|aaaabc|
272 1|64|a\{64\}|aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa|
274 1|7|\([a-c]*\)\{0,\}|aabcaab|
275 #W the expected result for \([a-c]*\)\{2,\} is failure which isn't correct
276 1|3|\([a-c]*\)\{2,\}|abcdefg|
277 1|3|\([a-c]*\)\{1,\}|abcdefg|
278 -1|-1|a\{64,\}|aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa|
282 0|0|\([a-c]*\)\{0,0\}|foo|
283 1|63|a\{1,63\}|aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa|
284 # 2.8.3.4 BRE Precedence
286 #W There are numerous bugs in the original version.
287 2|19|\^\[[[.].]]\\(\\1\\)\*\\{1,2\\}\$|a^[]\(\1\)*\{1,2\}$b|
288 1|6|[[=*=]][[=\=]][[=]=]][[===]][[...]][[:punct:]]|*\]=.;|
289 1|6|[$\(*\)^]*|$\()*^|
292 #W the expected result for \(*\)*\1* is 2-2 which isn't correct
293 0|0|\(*\)*\1*|a*b*11|
294 2|3|\(*\)*\1*b|a*b*11|
295 #W the expected result for \(a\(b\{1,2\}\)\{1,2\}\) is 1-5 which isn't correct
296 1|3|\(a\(b\{1,2\}\)\{1,2\}\)|abbab|
297 1|5|\(a\(b\{1,2\}\)\)\{1,2\}|abbab|
298 1|1|^\(^\(^a$\)$\)$|a|
302 # 2.8.3.5 BRE Expression Anchoring
306 -1|-1|^[a-zA-Z]|99Nine|
307 1|4|^[a-zA-Z]*|Nine99|
310 -1|-1|\(^a\)\1|^a^abc|
314 -1|-1|\(^def\)|abcdef|
315 ### GA145(2) GNU regex implements GA145(1)
316 ##-1|-1|\(^a\)\1|aabc|
317 ##1|4|\(^a\)\1|^a^abc|
324 #W the expected result for [a-z]*$ is failure which isn't correct
325 10|9|[a-z]*$|99ZZxyz99|
335 -1|-1|\(abc$\)|abcdef|
336 ### GA147(2) GNU regex implements GA147(1)
337 ##-1|-1|\(a$\)\1|bcaa|
338 ##2|5|\(a$\)\1|ba$a$|
346 1|9|^[a-zA-Z0-9]*$|2aA3bB9zZ|
347 -1|-1|^[a-z0-9]*$|2aA3b#B9zZ|