<machine/stdint.h>: Check __cplusplus first.
[dragonfly.git] / contrib / nvi2 / regex / regcomp.c
blobcec7ba0796888908ce64789d220e237c63d8274a
1 /* $NetBSD: regcomp.c,v 1.7 2011/11/19 17:45:11 tnozaki Exp $ */
3 /*-
4 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
5 * Copyright (c) 1992, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Henry Spencer of the University of Toronto.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
35 * @(#)regcomp.c 8.4 (Berkeley) 3/19/94
38 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid[] = "@(#)regcomp.c 8.4 (Berkeley) 3/19/94";
40 #endif /* LIBC_SCCS and not lint */
42 #include <sys/types.h>
43 #include <stdio.h>
44 #include <string.h>
45 #include <ctype.h>
46 #include <limits.h>
47 #include <stdlib.h>
48 #include <regex.h>
50 #include "utils.h"
51 #include "regex2.h"
53 #include "cclass.h"
54 #include "cname.h"
57 * parse structure, passed up and down to avoid global variables and
58 * other clumsinesses
60 struct parse {
61 RCHAR_T *next; /* next character in RE */
62 RCHAR_T *end; /* end of string (-> NUL normally) */
63 int error; /* has an error been seen? */
64 sop *strip; /* malloced strip */
65 RCHAR_T *stripdata; /* malloced stripdata */
66 sopno ssize; /* malloced strip size (allocated) */
67 sopno slen; /* malloced strip length (used) */
68 int ncsalloc; /* number of csets allocated */
69 struct re_guts *g;
70 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
71 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
72 sopno pend[NPAREN]; /* -> ) ([0] unused) */
75 /* ========= begin header generated by ./mkh ========= */
76 #ifdef __cplusplus
77 extern "C" {
78 #endif
80 /* === regcomp.c === */
81 static void p_ere(struct parse *p, int stop, size_t reclimit);
82 static void p_ere_exp(struct parse *p, size_t reclimit);
83 static void p_str(struct parse *p);
84 static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
85 static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
86 static int p_count(struct parse *p);
87 static void p_bracket(struct parse *p);
88 static void p_b_term(struct parse *p, cset *cs);
89 static void p_b_cclass(struct parse *p, cset *cs);
90 static void p_b_eclass(struct parse *p, cset *cs);
91 static char p_b_symbol(struct parse *p);
92 static char p_b_coll_elem(struct parse *p, int endc);
93 static char othercase(int ch);
94 static void bothcases(struct parse *p, int ch);
95 static void ordinary(struct parse *p, int ch);
96 static void nonnewline(struct parse *p);
97 static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
98 static int seterr(struct parse *p, int e);
99 static cset *allocset(struct parse *p);
100 static void freeset(struct parse *p, cset *cs);
101 static int freezeset(struct parse *p, cset *cs);
102 static int firstch(struct parse *p, cset *cs);
103 static int nch(struct parse *p, cset *cs);
104 static void mcadd(struct parse *p, cset *cs, const char *cp);
105 #ifdef notdef
106 static void mcsub(cset *cs, char *cp);
107 static int mcin(cset *cs, char *cp);
108 static char *mcfind(cset *cs, char *cp);
109 #endif
110 static void mcinvert(struct parse *p, cset *cs);
111 static void mccase(struct parse *p, cset *cs);
112 #ifdef notdef
113 static int isinsets(struct re_guts *g, int c);
114 static int samesets(struct re_guts *g, int c1, int c2);
115 #endif
116 static void categorize(struct parse *p, struct re_guts *g);
117 static sopno dupl(struct parse *p, sopno start, sopno finish);
118 static void doemit(struct parse *p, sop op, size_t opnd);
119 static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
120 static void dofwd(struct parse *p, sopno pos, sop value);
121 static int enlarge(struct parse *p, sopno size);
122 static void stripsnug(struct parse *p, struct re_guts *g);
123 static void findmust(struct parse *p, struct re_guts *g);
124 static sopno pluscount(struct parse *p, struct re_guts *g);
126 #ifdef __cplusplus
128 #endif
129 /* ========= end header generated by ./mkh ========= */
131 static RCHAR_T nuls[10]; /* place to point scanner in event of error */
134 * macros for use with parse structure
135 * BEWARE: these know that the parse structure is named `p' !!!
137 #define PEEK() (*p->next)
138 #define PEEK2() (*(p->next+1))
139 #define MORE() (p->next < p->end)
140 #define MORE2() (p->next+1 < p->end)
141 #define SEE(c) (MORE() && PEEK() == (c))
142 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
143 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
144 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
145 #define NEXT() (p->next++)
146 #define NEXT2() (p->next += 2)
147 #define NEXTn(n) (p->next += (n))
148 #define GETNEXT() (*p->next++)
149 #define SETERROR(e) seterr(p, (e))
150 #define REQUIRE(co, e) ((co) || SETERROR(e))
151 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
152 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
153 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
154 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
155 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
156 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
157 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
158 #define HERE() (p->slen)
159 #define THERE() (p->slen - 1)
160 #define THERETHERE() (p->slen - 2)
161 #define DROP(n) (p->slen -= (n))
163 #ifndef NDEBUG
164 static int never = 0; /* for use in asserts; shuts lint up */
165 #else
166 #define never 0 /* some <assert.h>s have bugs too */
167 #endif
169 #define MEMLIMIT 0x8000000
170 #define MEMSIZE(p) \
171 ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
172 (p)->ncsalloc * sizeof(cset) + \
173 (p)->ssize * sizeof(sop))
174 #define RECLIMIT 256
177 - regcomp - interface for parser and compilation
179 int /* 0 success, otherwise REG_something */
180 regcomp(regex_t *preg, const RCHAR_T *pattern, int cflags)
182 struct parse pa;
183 struct re_guts *g;
184 struct parse *p = &pa;
185 int i;
186 size_t len;
187 #ifdef REDEBUG
188 # define GOODFLAGS(f) (f)
189 #else
190 # define GOODFLAGS(f) ((f)&~REG_DUMP)
191 #endif
193 cflags = GOODFLAGS(cflags);
194 if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
195 return(REG_INVARG);
197 if (cflags&REG_PEND) {
198 if (preg->re_endp < pattern)
199 return(REG_INVARG);
200 len = preg->re_endp - pattern;
201 } else
202 len = STRLEN(pattern);
204 /* do the mallocs early so failure handling is easy */
205 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
206 (NC-1)*sizeof(cat_t));
207 if (g == NULL)
208 return(REG_ESPACE);
209 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
210 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
211 if (p->strip == NULL) {
212 free((char *)g);
213 return(REG_ESPACE);
215 p->stripdata = (RCHAR_T *)malloc(p->ssize * sizeof(RCHAR_T));
216 if (p->stripdata == NULL) {
217 free((char *)p->strip);
218 free((char *)g);
219 return(REG_ESPACE);
221 p->slen = 0;
223 /* set things up */
224 p->g = g;
225 p->next = (RCHAR_T *)pattern; /* convenience; we do not modify it */
226 p->end = p->next + len;
227 p->error = 0;
228 p->ncsalloc = 0;
229 for (i = 0; i < NPAREN; i++) {
230 p->pbegin[i] = 0;
231 p->pend[i] = 0;
233 g->csetsize = NC;
234 g->sets = NULL;
235 g->setbits = NULL;
236 g->ncsets = 0;
237 g->cflags = cflags;
238 g->iflags = 0;
239 g->nbol = 0;
240 g->neol = 0;
241 g->must = NULL;
242 g->mlen = 0;
243 g->nsub = 0;
244 #if 0
245 g->ncategories = 1; /* category 0 is "everything else" */
246 g->categories = &g->catspace[-(CHAR_MIN)];
247 memset((char *)g->catspace, 0, NC*sizeof(cat_t));
248 #endif
249 g->backrefs = 0;
251 /* do it */
252 EMIT(OEND, 0);
253 g->firststate = THERE();
254 if (cflags&REG_EXTENDED)
255 p_ere(p, OUT, 0);
256 else if (cflags&REG_NOSPEC)
257 p_str(p);
258 else
259 p_bre(p, OUT, OUT, 0);
260 EMIT(OEND, 0);
261 g->laststate = THERE();
263 /* tidy up loose ends and fill things in */
264 categorize(p, g);
265 stripsnug(p, g);
266 findmust(p, g);
267 g->nplus = pluscount(p, g);
268 g->magic = MAGIC2;
269 preg->re_nsub = g->nsub;
270 preg->re_g = g;
271 preg->re_magic = MAGIC1;
272 #ifndef REDEBUG
273 /* not debugging, so can't rely on the assert() in regexec() */
274 if (g->iflags&BAD)
275 SETERROR(REG_ASSERT);
276 #endif
278 /* win or lose, we're done */
279 if (p->error != 0) /* lose */
280 regfree(preg);
281 return(p->error);
285 - p_ere - ERE parser top level, concatenation and alternation
287 static void
288 p_ere(struct parse *p, int stop, size_t reclimit)
289 /* character this ERE should end at */
291 char c;
292 sopno prevback = 0;
293 sopno prevfwd = 0;
294 sopno conc;
295 int first = 1; /* is this the first alternative? */
297 if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
298 p->error = REG_ESPACE;
299 return;
302 for (;;) {
303 /* do a bunch of concatenated expressions */
304 conc = HERE();
305 while (MORE() && (c = PEEK()) != '|' && c != stop)
306 p_ere_exp(p, reclimit);
307 (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
309 if (!EAT('|'))
310 break; /* NOTE BREAK OUT */
312 if (first) {
313 INSERT(OCH_, conc); /* offset is wrong */
314 prevfwd = conc;
315 prevback = conc;
316 first = 0;
318 ASTERN(OOR1, prevback);
319 prevback = THERE();
320 AHEAD(prevfwd); /* fix previous offset */
321 prevfwd = HERE();
322 EMIT(OOR2, 0); /* offset is very wrong */
325 if (!first) { /* tail-end fixups */
326 AHEAD(prevfwd);
327 ASTERN(O_CH, prevback);
330 assert(!MORE() || SEE(stop));
334 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
336 static void
337 p_ere_exp(struct parse *p, size_t reclimit)
339 char c;
340 sopno pos;
341 int count;
342 int count2;
343 sopno subno;
344 int wascaret = 0;
346 assert(MORE()); /* caller should have ensured this */
347 c = GETNEXT();
349 pos = HERE();
350 switch (c) {
351 case '(':
352 (void)REQUIRE(MORE(), REG_EPAREN);
353 p->g->nsub++;
354 subno = p->g->nsub;
355 if (subno < NPAREN)
356 p->pbegin[subno] = HERE();
357 EMIT(OLPAREN, subno);
358 if (!SEE(')'))
359 p_ere(p, ')', reclimit);
360 if (subno < NPAREN) {
361 p->pend[subno] = HERE();
362 assert(p->pend[subno] != 0);
364 EMIT(ORPAREN, subno);
365 (void)MUSTEAT(')', REG_EPAREN);
366 break;
367 #ifndef POSIX_MISTAKE
368 case ')': /* happens only if no current unmatched ( */
370 * You may ask, why the ifndef? Because I didn't notice
371 * this until slightly too late for 1003.2, and none of the
372 * other 1003.2 regular-expression reviewers noticed it at
373 * all. So an unmatched ) is legal POSIX, at least until
374 * we can get it fixed.
376 SETERROR(REG_EPAREN);
377 break;
378 #endif
379 case '^':
380 EMIT(OBOL, 0);
381 p->g->iflags |= USEBOL;
382 p->g->nbol++;
383 wascaret = 1;
384 break;
385 case '$':
386 EMIT(OEOL, 0);
387 p->g->iflags |= USEEOL;
388 p->g->neol++;
389 break;
390 case '|':
391 SETERROR(REG_EMPTY);
392 break;
393 case '*':
394 case '+':
395 case '?':
396 SETERROR(REG_BADRPT);
397 break;
398 case '.':
399 if (p->g->cflags&REG_NEWLINE)
400 nonnewline(p);
401 else
402 EMIT(OANY, 0);
403 break;
404 case '[':
405 p_bracket(p);
406 break;
407 case '\\':
408 (void)REQUIRE(MORE(), REG_EESCAPE);
409 c = GETNEXT();
410 ordinary(p, c);
411 break;
412 case '{': /* okay as ordinary except if digit follows */
413 (void)REQUIRE(!MORE() || !ISDIGIT((UCHAR_T)PEEK()), REG_BADRPT);
414 /* FALLTHROUGH */
415 default:
416 ordinary(p, c);
417 break;
420 if (!MORE())
421 return;
422 c = PEEK();
423 /* we call { a repetition if followed by a digit */
424 if (!( c == '*' || c == '+' || c == '?' ||
425 (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ))
426 return; /* no repetition, we're done */
427 NEXT();
429 (void)REQUIRE(!wascaret, REG_BADRPT);
430 switch (c) {
431 case '*': /* implemented as +? */
432 /* this case does not require the (y|) trick, noKLUDGE */
433 INSERT(OPLUS_, pos);
434 ASTERN(O_PLUS, pos);
435 INSERT(OQUEST_, pos);
436 ASTERN(O_QUEST, pos);
437 break;
438 case '+':
439 INSERT(OPLUS_, pos);
440 ASTERN(O_PLUS, pos);
441 break;
442 case '?':
443 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
444 INSERT(OCH_, pos); /* offset slightly wrong */
445 ASTERN(OOR1, pos); /* this one's right */
446 AHEAD(pos); /* fix the OCH_ */
447 EMIT(OOR2, 0); /* offset very wrong... */
448 AHEAD(THERE()); /* ...so fix it */
449 ASTERN(O_CH, THERETHERE());
450 break;
451 case '{':
452 count = p_count(p);
453 if (EAT(',')) {
454 if (ISDIGIT((UCHAR_T)PEEK())) {
455 count2 = p_count(p);
456 (void)REQUIRE(count <= count2, REG_BADBR);
457 } else /* single number with comma */
458 count2 = INFINITY;
459 } else /* just a single number */
460 count2 = count;
461 repeat(p, pos, count, count2, 0);
462 if (!EAT('}')) { /* error heuristics */
463 while (MORE() && PEEK() != '}')
464 NEXT();
465 (void)REQUIRE(MORE(), REG_EBRACE);
466 SETERROR(REG_BADBR);
468 break;
471 if (!MORE())
472 return;
473 c = PEEK();
474 if (!( c == '*' || c == '+' || c == '?' ||
475 (c == '{' && MORE2() && ISDIGIT((UCHAR_T)PEEK2())) ) )
476 return;
477 SETERROR(REG_BADRPT);
481 - p_str - string (no metacharacters) "parser"
483 static void
484 p_str(struct parse *p)
486 (void)REQUIRE(MORE(), REG_EMPTY);
487 while (MORE())
488 ordinary(p, GETNEXT());
492 - p_bre - BRE parser top level, anchoring and concatenation
493 * Giving end1 as OUT essentially eliminates the end1/end2 check.
495 * This implementation is a bit of a kludge, in that a trailing $ is first
496 * taken as an ordinary character and then revised to be an anchor. The
497 * only undesirable side effect is that '$' gets included as a character
498 * category in such cases. This is fairly harmless; not worth fixing.
499 * The amount of lookahead needed to avoid this kludge is excessive.
501 static void
502 p_bre(struct parse *p,
503 int end1, /* first terminating character */
504 int end2, /* second terminating character */
505 size_t reclimit)
507 sopno start;
508 int first = 1; /* first subexpression? */
509 int wasdollar = 0;
511 if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
512 p->error = REG_ESPACE;
513 return;
516 start = HERE();
518 if (EAT('^')) {
519 EMIT(OBOL, 0);
520 p->g->iflags |= USEBOL;
521 p->g->nbol++;
523 while (MORE() && !SEETWO(end1, end2)) {
524 wasdollar = p_simp_re(p, first, reclimit);
525 first = 0;
527 if (wasdollar) { /* oops, that was a trailing anchor */
528 DROP(1);
529 EMIT(OEOL, 0);
530 p->g->iflags |= USEEOL;
531 p->g->neol++;
534 (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
538 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
540 static int /* was the simple RE an unbackslashed $? */
541 p_simp_re(struct parse *p,
542 int starordinary, /* is a leading * an ordinary character? */
543 size_t reclimit)
545 int c;
546 int count;
547 int count2;
548 sopno pos;
549 int i;
550 sopno subno;
551 int backsl;
553 pos = HERE(); /* repetion op, if any, covers from here */
555 assert(MORE()); /* caller should have ensured this */
556 c = GETNEXT();
557 backsl = c == '\\';
558 if (backsl) {
559 (void)REQUIRE(MORE(), REG_EESCAPE);
560 c = (unsigned char)GETNEXT();
561 switch (c) {
562 case '{':
563 SETERROR(REG_BADRPT);
564 break;
565 case '(':
566 p->g->nsub++;
567 subno = p->g->nsub;
568 if (subno < NPAREN)
569 p->pbegin[subno] = HERE();
570 EMIT(OLPAREN, subno);
571 /* the MORE here is an error heuristic */
572 if (MORE() && !SEETWO('\\', ')'))
573 p_bre(p, '\\', ')', reclimit);
574 if (subno < NPAREN) {
575 p->pend[subno] = HERE();
576 assert(p->pend[subno] != 0);
578 EMIT(ORPAREN, subno);
579 (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
580 break;
581 case ')': /* should not get here -- must be user */
582 case '}':
583 SETERROR(REG_EPAREN);
584 break;
585 case '1':
586 case '2':
587 case '3':
588 case '4':
589 case '5':
590 case '6':
591 case '7':
592 case '8':
593 case '9':
594 i = c - '0';
595 assert(i < NPAREN);
596 if (p->pend[i] != 0) {
597 assert(i <= p->g->nsub);
598 EMIT(OBACK_, i);
599 assert(p->pbegin[i] != 0);
600 assert(p->strip[p->pbegin[i]] == OLPAREN);
601 assert(p->strip[p->pend[i]] == ORPAREN);
602 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
603 EMIT(O_BACK, i);
604 } else
605 SETERROR(REG_ESUBREG);
606 p->g->backrefs = 1;
607 break;
608 default:
609 ordinary(p, c);
610 break;
612 } else {
613 switch (c) {
614 case '.':
615 if (p->g->cflags&REG_NEWLINE)
616 nonnewline(p);
617 else
618 EMIT(OANY, 0);
619 break;
620 case '[':
621 p_bracket(p);
622 break;
623 case '*':
624 (void)REQUIRE(starordinary, REG_BADRPT);
625 /* FALLTHROUGH */
626 default:
627 ordinary(p, c);
628 break;
632 if (EAT('*')) { /* implemented as +? */
633 /* this case does not require the (y|) trick, noKLUDGE */
634 INSERT(OPLUS_, pos);
635 ASTERN(O_PLUS, pos);
636 INSERT(OQUEST_, pos);
637 ASTERN(O_QUEST, pos);
638 } else if (EATTWO('\\', '{')) {
639 count = p_count(p);
640 if (EAT(',')) {
641 if (MORE() && ISDIGIT((UCHAR_T)PEEK())) {
642 count2 = p_count(p);
643 (void)REQUIRE(count <= count2, REG_BADBR);
644 } else /* single number with comma */
645 count2 = INFINITY;
646 } else /* just a single number */
647 count2 = count;
648 repeat(p, pos, count, count2, reclimit);
649 if (!EATTWO('\\', '}')) { /* error heuristics */
650 while (MORE() && !SEETWO('\\', '}'))
651 NEXT();
652 (void)REQUIRE(MORE(), REG_EBRACE);
653 SETERROR(REG_BADBR);
655 } else if (!backsl && c == (unsigned char)'$') /* $ (but not \$) ends it */
656 return(1);
658 return(0);
662 - p_count - parse a repetition count
664 static int /* the value */
665 p_count(struct parse *p)
667 int count = 0;
668 int ndigits = 0;
670 while (MORE() && ISDIGIT((UCHAR_T)PEEK()) && count <= DUPMAX) {
671 count = count*10 + (GETNEXT() - '0');
672 ndigits++;
675 (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
676 return(count);
680 - p_bracket - parse a bracketed character list
682 * Note a significant property of this code: if the allocset() did SETERROR,
683 * no set operations are done.
685 static void
686 p_bracket(struct parse *p)
688 cset *cs;
689 int invert = 0;
690 static RCHAR_T bow[] = { '[', ':', '<', ':', ']', ']' };
691 static RCHAR_T eow[] = { '[', ':', '>', ':', ']', ']' };
693 cs = allocset(p);
694 if (cs == NULL)
695 return;
697 /* Dept of Truly Sickening Special-Case Kludges */
698 if (p->next + 5 < p->end && MEMCMP(p->next, bow, 6) == 0) {
699 EMIT(OBOW, 0);
700 NEXTn(6);
701 return;
703 if (p->next + 5 < p->end && MEMCMP(p->next, eow, 6) == 0) {
704 EMIT(OEOW, 0);
705 NEXTn(6);
706 return;
709 if (EAT('^'))
710 invert++; /* make note to invert set at end */
711 if (EAT(']'))
712 CHadd(cs, ']');
713 else if (EAT('-'))
714 CHadd(cs, '-');
715 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
716 p_b_term(p, cs);
717 if (EAT('-'))
718 CHadd(cs, '-');
719 (void)MUSTEAT(']', REG_EBRACK);
721 if (p->error != 0) /* don't mess things up further */
722 return;
724 if (p->g->cflags&REG_ICASE) {
725 int i;
726 int ci;
728 for (i = p->g->csetsize - 1; i >= 0; i--)
729 if (CHIN(cs, i) && isalpha(i)) {
730 ci = othercase(i);
731 if (ci != i)
732 CHadd(cs, ci);
734 if (cs->multis != NULL)
735 mccase(p, cs);
737 if (invert) {
738 int i;
740 for (i = p->g->csetsize - 1; i >= 0; i--)
741 if (CHIN(cs, i))
742 CHsub(cs, i);
743 else
744 CHadd(cs, i);
745 if (p->g->cflags&REG_NEWLINE)
746 CHsub(cs, '\n');
747 if (cs->multis != NULL)
748 mcinvert(p, cs);
751 assert(cs->multis == NULL); /* xxx */
753 if (nch(p, cs) == 1) { /* optimize singleton sets */
754 ordinary(p, firstch(p, cs));
755 freeset(p, cs);
756 } else
757 EMIT(OANYOF, freezeset(p, cs));
761 - p_b_term - parse one term of a bracketed character list
763 static void
764 p_b_term(struct parse *p, cset *cs)
766 char c;
767 char start, finish;
768 int i;
770 /* classify what we've got */
771 switch ((MORE()) ? PEEK() : '\0') {
772 case '[':
773 c = (MORE2()) ? PEEK2() : '\0';
774 break;
775 case '-':
776 SETERROR(REG_ERANGE);
777 return; /* NOTE RETURN */
778 break;
779 default:
780 c = '\0';
781 break;
784 switch (c) {
785 case ':': /* character class */
786 NEXT2();
787 (void)REQUIRE(MORE(), REG_EBRACK);
788 c = PEEK();
789 (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
790 p_b_cclass(p, cs);
791 (void)REQUIRE(MORE(), REG_EBRACK);
792 (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
793 break;
794 case '=': /* equivalence class */
795 NEXT2();
796 (void)REQUIRE(MORE(), REG_EBRACK);
797 c = PEEK();
798 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
799 p_b_eclass(p, cs);
800 (void)REQUIRE(MORE(), REG_EBRACK);
801 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
802 break;
803 default: /* symbol, ordinary character, or range */
804 /* xxx revision needed for multichar stuff */
805 start = p_b_symbol(p);
806 if (SEE('-') && MORE2() && PEEK2() != ']') {
807 /* range */
808 NEXT();
809 if (EAT('-'))
810 finish = '-';
811 else
812 finish = p_b_symbol(p);
813 } else
814 finish = start;
815 /* xxx what about signed chars here... */
816 (void)REQUIRE(start <= finish, REG_ERANGE);
817 for (i = start; i <= finish; i++)
818 CHadd(cs, i);
819 break;
824 - p_b_cclass - parse a character-class name and deal with it
826 static void
827 p_b_cclass(struct parse *p, cset *cs)
829 RCHAR_T *sp = p->next;
830 struct cclass *cp;
831 size_t len;
832 const char *u;
833 char c;
835 while (MORE() && isalpha(PEEK()))
836 NEXT();
837 len = p->next - sp;
838 for (cp = cclasses; cp->name != NULL; cp++)
839 if (STRLEN(cp->name) == len && !MEMCMP(cp->name, sp, len))
840 break;
841 if (cp->name == NULL) {
842 /* oops, didn't find it */
843 SETERROR(REG_ECTYPE);
844 return;
847 u = cp->chars;
848 while ((c = *u++) != '\0')
849 CHadd(cs, c);
850 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
851 MCadd(p, cs, u);
855 - p_b_eclass - parse an equivalence-class name and deal with it
857 * This implementation is incomplete. xxx
859 static void
860 p_b_eclass(struct parse *p, cset *cs)
862 char c;
864 c = p_b_coll_elem(p, '=');
865 CHadd(cs, c);
869 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
871 static char /* value of symbol */
872 p_b_symbol(struct parse *p)
874 char value;
876 (void)REQUIRE(MORE(), REG_EBRACK);
877 if (!EATTWO('[', '.'))
878 return(GETNEXT());
880 /* collating symbol */
881 value = p_b_coll_elem(p, '.');
882 (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
883 return(value);
887 - p_b_coll_elem - parse a collating-element name and look it up
889 static char /* value of collating element */
890 p_b_coll_elem(struct parse *p, int endc)
892 /* name ended by endc,']' */
894 RCHAR_T *sp = p->next;
895 struct cname *cp;
896 size_t len;
898 while (MORE() && !SEETWO(endc, ']'))
899 NEXT();
900 if (!MORE()) {
901 SETERROR(REG_EBRACK);
902 return(0);
904 len = p->next - sp;
905 for (cp = cnames; cp->name != NULL; cp++)
906 if (STRLEN(cp->name) == len && MEMCMP(cp->name, sp, len))
907 return(cp->code); /* known name */
908 if (len == 1)
909 return(*sp); /* single character */
910 SETERROR(REG_ECOLLATE); /* neither */
911 return(0);
915 - othercase - return the case counterpart of an alphabetic
917 static char /* if no counterpart, return ch */
918 othercase(int ch)
920 assert(isalpha(ch));
921 if (isupper(ch))
922 return(tolower(ch));
923 else if (islower(ch))
924 return(toupper(ch));
925 else /* peculiar, but could happen */
926 return(ch);
930 - bothcases - emit a dualcase version of a two-case character
932 * Boy, is this implementation ever a kludge...
934 static void
935 bothcases(struct parse *p, int ch)
937 RCHAR_T *oldnext = p->next;
938 RCHAR_T *oldend = p->end;
939 RCHAR_T bracket[3];
941 assert(othercase(ch) != ch); /* p_bracket() would recurse */
942 p->next = bracket;
943 p->end = bracket+2;
944 bracket[0] = ch;
945 bracket[1] = ']';
946 bracket[2] = '\0';
947 p_bracket(p);
948 assert(p->next == bracket+2);
949 p->next = oldnext;
950 p->end = oldend;
954 - ordinary - emit an ordinary character
956 static void
957 ordinary(struct parse *p, int ch)
960 cat_t *cap = p->g->categories;
963 if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
964 bothcases(p, ch);
965 else {
966 EMIT(OCHAR, (UCHAR_T)ch);
968 if (cap[ch] == 0)
969 cap[ch] = p->g->ncategories++;
975 - nonnewline - emit REG_NEWLINE version of OANY
977 * Boy, is this implementation ever a kludge...
979 static void
980 nonnewline(struct parse *p)
982 RCHAR_T *oldnext = p->next;
983 RCHAR_T *oldend = p->end;
984 RCHAR_T bracket[4];
986 p->next = bracket;
987 p->end = bracket+3;
988 bracket[0] = '^';
989 bracket[1] = '\n';
990 bracket[2] = ']';
991 bracket[3] = '\0';
992 p_bracket(p);
993 assert(p->next == bracket+3);
994 p->next = oldnext;
995 p->end = oldend;
999 - repeat - generate code for a bounded repetition, recursively if needed
1001 static void
1002 repeat(struct parse *p,
1003 sopno start, /* operand from here to end of strip */
1004 int from, /* repeated from this number */
1005 int to, /* to this number of times (maybe INFINITY) */
1006 size_t reclimit)
1008 sopno finish;
1009 # define N 2
1010 # define INF 3
1011 # define REP(f, t) ((f)*8 + (t))
1012 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1013 sopno copy;
1015 if (reclimit++ > RECLIMIT)
1016 p->error = REG_ESPACE;
1017 if (p->error)
1018 return;
1020 finish = HERE();
1022 assert(from <= to);
1024 switch (REP(MAP(from), MAP(to))) {
1025 case REP(0, 0): /* must be user doing this */
1026 DROP(finish-start); /* drop the operand */
1027 break;
1028 case REP(0, 1): /* as x{1,1}? */
1029 case REP(0, N): /* as x{1,n}? */
1030 case REP(0, INF): /* as x{1,}? */
1031 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1032 INSERT(OCH_, start); /* offset is wrong... */
1033 repeat(p, start+1, 1, to, reclimit);
1034 ASTERN(OOR1, start);
1035 AHEAD(start); /* ... fix it */
1036 EMIT(OOR2, 0);
1037 AHEAD(THERE());
1038 ASTERN(O_CH, THERETHERE());
1039 break;
1040 case REP(1, 1): /* trivial case */
1041 /* done */
1042 break;
1043 case REP(1, N): /* as x?x{1,n-1} */
1044 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1045 INSERT(OCH_, start);
1046 ASTERN(OOR1, start);
1047 AHEAD(start);
1048 EMIT(OOR2, 0); /* offset very wrong... */
1049 AHEAD(THERE()); /* ...so fix it */
1050 ASTERN(O_CH, THERETHERE());
1051 copy = dupl(p, start+1, finish+1);
1052 assert(copy == finish+4);
1053 repeat(p, copy, 1, to-1, reclimit);
1054 break;
1055 case REP(1, INF): /* as x+ */
1056 INSERT(OPLUS_, start);
1057 ASTERN(O_PLUS, start);
1058 break;
1059 case REP(N, N): /* as xx{m-1,n-1} */
1060 copy = dupl(p, start, finish);
1061 repeat(p, copy, from-1, to-1, reclimit);
1062 break;
1063 case REP(N, INF): /* as xx{n-1,INF} */
1064 copy = dupl(p, start, finish);
1065 repeat(p, copy, from-1, to, reclimit);
1066 break;
1067 default: /* "can't happen" */
1068 SETERROR(REG_ASSERT); /* just in case */
1069 break;
1074 - seterr - set an error condition
1076 static int /* useless but makes type checking happy */
1077 seterr(struct parse *p, int e)
1079 if (p->error == 0) /* keep earliest error condition */
1080 p->error = e;
1081 p->next = nuls; /* try to bring things to a halt */
1082 p->end = nuls;
1083 return(0); /* make the return value well-defined */
1087 - allocset - allocate a set of characters for []
1089 static cset *
1090 allocset(struct parse *p)
1092 int no = p->g->ncsets++;
1093 size_t nc;
1094 size_t nbytes;
1095 cset *cs;
1096 size_t css = (size_t)p->g->csetsize;
1097 int i;
1099 if (no >= p->ncsalloc) { /* need another column of space */
1100 p->ncsalloc += CHAR_BIT;
1101 nc = p->ncsalloc;
1102 assert(nc % CHAR_BIT == 0);
1103 nbytes = nc / CHAR_BIT * css;
1104 if (MEMSIZE(p) > MEMLIMIT)
1105 goto oomem;
1106 if (p->g->sets == NULL)
1107 p->g->sets = (cset *)malloc(nc * sizeof(cset));
1108 else
1109 p->g->sets = (cset *)realloc((char *)p->g->sets,
1110 nc * sizeof(cset));
1111 if (p->g->setbits == NULL)
1112 p->g->setbits = (uch *)malloc(nbytes);
1113 else {
1114 p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1115 nbytes);
1116 /* xxx this isn't right if setbits is now NULL */
1117 for (i = 0; i < no; i++)
1118 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1120 if (p->g->sets != NULL && p->g->setbits != NULL)
1121 memset((char *)p->g->setbits + (nbytes - css),
1122 0, css);
1123 else {
1124 oomem:
1125 no = 0;
1126 SETERROR(REG_ESPACE);
1127 /* caller's responsibility not to do set ops */
1128 return NULL;
1132 cs = &p->g->sets[no];
1133 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1134 cs->mask = 1 << ((no) % CHAR_BIT);
1135 cs->hash = 0;
1136 cs->smultis = 0;
1137 cs->multis = NULL;
1139 return(cs);
1143 - freeset - free a now-unused set
1145 static void
1146 freeset(struct parse *p, cset *cs)
1148 size_t i;
1149 cset *top = &p->g->sets[p->g->ncsets];
1150 size_t css = (size_t)p->g->csetsize;
1152 for (i = 0; i < css; i++)
1153 CHsub(cs, i);
1154 if (cs == top-1) /* recover only the easy case */
1155 p->g->ncsets--;
1159 - freezeset - final processing on a set of characters
1161 * The main task here is merging identical sets. This is usually a waste
1162 * of time (although the hash code minimizes the overhead), but can win
1163 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1164 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1165 * the same value!
1167 static int /* set number */
1168 freezeset(struct parse *p, cset *cs)
1170 uch h = cs->hash;
1171 size_t i;
1172 cset *top = &p->g->sets[p->g->ncsets];
1173 cset *cs2;
1174 size_t css = (size_t)p->g->csetsize;
1176 /* look for an earlier one which is the same */
1177 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1178 if (cs2->hash == h && cs2 != cs) {
1179 /* maybe */
1180 for (i = 0; i < css; i++)
1181 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1182 break; /* no */
1183 if (i == css)
1184 break; /* yes */
1187 if (cs2 < top) { /* found one */
1188 freeset(p, cs);
1189 cs = cs2;
1192 return((int)(cs - p->g->sets));
1196 - firstch - return first character in a set (which must have at least one)
1198 static int /* character; there is no "none" value */
1199 firstch(struct parse *p, cset *cs)
1201 size_t i;
1202 size_t css = (size_t)p->g->csetsize;
1204 for (i = 0; i < css; i++)
1205 if (CHIN(cs, i))
1206 return((char)i);
1207 assert(never);
1208 return(0); /* arbitrary */
1212 - nch - number of characters in a set
1214 static int
1215 nch(struct parse *p, cset *cs)
1217 size_t i;
1218 size_t css = (size_t)p->g->csetsize;
1219 int n = 0;
1221 for (i = 0; i < css; i++)
1222 if (CHIN(cs, i))
1223 n++;
1224 return(n);
1228 - mcadd - add a collating element to a cset
1230 static void
1231 mcadd(struct parse *p, cset *cs, const char *cp)
1233 size_t oldend = cs->smultis;
1234 void *np;
1236 cs->smultis += strlen(cp) + 1;
1237 np = realloc(cs->multis, cs->smultis);
1238 if (np == NULL) {
1239 if (cs->multis)
1240 free(cs->multis);
1241 cs->multis = NULL;
1242 SETERROR(REG_ESPACE);
1243 return;
1245 cs->multis = np;
1247 strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
1251 - mcinvert - invert the list of collating elements in a cset
1253 * This would have to know the set of possibilities. Implementation
1254 * is deferred.
1256 static void
1257 mcinvert(struct parse *p, cset *cs)
1259 assert(cs->multis == NULL); /* xxx */
1263 - mccase - add case counterparts of the list of collating elements in a cset
1265 * This would have to know the set of possibilities. Implementation
1266 * is deferred.
1268 static void
1269 mccase(struct parse *p, cset *cs)
1271 assert(cs->multis == NULL); /* xxx */
1274 #ifdef notdef
1276 - isinsets - is this character in any sets?
1278 static int /* predicate */
1279 isinsets(struct re_guts *g, int c)
1281 uch *col;
1282 int i;
1283 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1284 unsigned uc = (unsigned char)c;
1286 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1287 if (col[uc] != 0)
1288 return(1);
1289 return(0);
1293 - samesets - are these two characters in exactly the same sets?
1295 static int /* predicate */
1296 samesets(struct re_guts *g, int c1, int c2)
1298 uch *col;
1299 int i;
1300 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1301 unsigned uc1 = (unsigned char)c1;
1302 unsigned uc2 = (unsigned char)c2;
1304 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1305 if (col[uc1] != col[uc2])
1306 return(0);
1307 return(1);
1309 #endif
1312 - categorize - sort out character categories
1314 static void
1315 categorize(struct parse *p, struct re_guts *g)
1317 #ifdef notdef
1318 cat_t *cats = g->categories;
1319 int c;
1320 int c2;
1321 cat_t cat;
1323 /* avoid making error situations worse */
1324 if (p->error != 0)
1325 return;
1327 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1328 if (cats[c] == 0 && isinsets(g, c)) {
1329 cat = g->ncategories++;
1330 cats[c] = cat;
1331 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1332 if (cats[c2] == 0 && samesets(g, c, c2))
1333 cats[c2] = cat;
1335 #endif
1339 - dupl - emit a duplicate of a bunch of sops
1341 static sopno /* start of duplicate */
1342 dupl(struct parse *p,
1343 sopno start, /* from here */
1344 sopno finish) /* to this less one */
1346 sopno ret = HERE();
1347 sopno len = finish - start;
1349 assert(finish >= start);
1350 if (len == 0)
1351 return(ret);
1352 if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
1353 return ret;
1354 assert(p->ssize >= p->slen + len);
1355 (void) memcpy((char *)(p->strip + p->slen),
1356 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1357 (void) memcpy((char *)(p->stripdata + p->slen),
1358 (char *)(p->stripdata + start), (size_t)len*sizeof(RCHAR_T));
1359 p->slen += len;
1360 return(ret);
1364 - doemit - emit a strip operator
1366 * It might seem better to implement this as a macro with a function as
1367 * hard-case backup, but it's just too big and messy unless there are
1368 * some changes to the data structures. Maybe later.
1370 static void
1371 doemit(struct parse *p, sop op, size_t opnd)
1373 /* avoid making error situations worse */
1374 if (p->error != 0)
1375 return;
1377 /* deal with oversize operands ("can't happen", more or less) */
1378 assert(opnd < 1);
1380 /* deal with undersized strip */
1381 if (p->slen >= p->ssize)
1382 if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */
1383 return;
1385 /* finally, it's all reduced to the easy case */
1386 p->strip[p->slen] = op;
1387 p->stripdata[p->slen] = opnd;
1388 p->slen++;
1392 - doinsert - insert a sop into the strip
1394 static void
1395 doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
1397 sopno sn;
1398 sop s;
1399 RCHAR_T d;
1400 int i;
1402 /* avoid making error situations worse */
1403 if (p->error != 0)
1404 return;
1406 sn = HERE();
1407 EMIT(op, opnd); /* do checks, ensure space */
1408 assert(HERE() == sn+1);
1409 s = p->strip[sn];
1410 d = p->stripdata[sn];
1412 /* adjust paren pointers */
1413 assert(pos > 0);
1414 for (i = 1; i < NPAREN; i++) {
1415 if (p->pbegin[i] >= pos) {
1416 p->pbegin[i]++;
1418 if (p->pend[i] >= pos) {
1419 p->pend[i]++;
1423 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1424 (HERE()-pos-1)*sizeof(sop));
1425 memmove((char *)&p->stripdata[pos+1], (char *)&p->stripdata[pos],
1426 (HERE()-pos-1)*sizeof(RCHAR_T));
1427 p->strip[pos] = s;
1428 p->stripdata[pos] = d;
1432 - dofwd - complete a forward reference
1434 static void
1435 dofwd(struct parse *p, sopno pos, sop value)
1437 /* avoid making error situations worse */
1438 if (p->error != 0)
1439 return;
1441 assert(value < 1);
1442 p->stripdata[pos] = value;
1446 - enlarge - enlarge the strip
1448 static int
1449 enlarge(struct parse *p, sopno size)
1451 sop *sp;
1452 RCHAR_T *dp;
1453 sopno osize;
1455 if (p->ssize >= size)
1456 return 1;
1458 osize = p->ssize;
1459 p->ssize = size;
1460 if (MEMSIZE(p) > MEMLIMIT)
1461 goto oomem;
1462 sp = realloc(p->strip, p->ssize * sizeof(sop));
1463 if (sp == NULL)
1464 goto oomem;
1465 p->strip = sp;
1466 dp = realloc(p->stripdata, p->ssize * sizeof(RCHAR_T));
1467 if (dp == NULL) {
1468 oomem:
1469 p->ssize = osize;
1470 SETERROR(REG_ESPACE);
1471 return 0;
1473 p->stripdata = dp;
1474 return 1;
1478 - stripsnug - compact the strip
1480 static void
1481 stripsnug(struct parse *p, struct re_guts *g)
1483 g->nstates = p->slen;
1484 g->strip = (sop *)realloc((char *)p->strip,
1485 p->slen * sizeof(sop));
1486 if (g->strip == NULL) {
1487 SETERROR(REG_ESPACE);
1488 g->strip = p->strip;
1490 g->stripdata = (RCHAR_T *)realloc((char *)p->stripdata,
1491 p->slen * sizeof(RCHAR_T));
1492 if (g->stripdata == NULL) {
1493 SETERROR(REG_ESPACE);
1494 g->stripdata = p->stripdata;
1499 - findmust - fill in must and mlen with longest mandatory literal string
1501 * This algorithm could do fancy things like analyzing the operands of |
1502 * for common subsequences. Someday. This code is simple and finds most
1503 * of the interesting cases.
1505 * Note that must and mlen got initialized during setup.
1507 static void
1508 findmust(struct parse *p, struct re_guts *g)
1510 sop *scans;
1511 RCHAR_T *scand;
1512 sop *starts = 0;
1513 RCHAR_T *startd = NULL;
1514 sop *newstarts = 0;
1515 RCHAR_T *newstartd = NULL;
1516 sopno newlen;
1517 sop s;
1518 RCHAR_T d;
1519 RCHAR_T *cp;
1520 sopno i;
1522 /* avoid making error situations worse */
1523 if (p->error != 0)
1524 return;
1526 /* find the longest OCHAR sequence in strip */
1527 newlen = 0;
1528 scans = g->strip + 1;
1529 scand = g->stripdata + 1;
1530 do {
1531 s = *scans++;
1532 d = *scand++;
1533 switch (s) {
1534 case OCHAR: /* sequence member */
1535 if (newlen == 0) { /* new sequence */
1536 newstarts = scans - 1;
1537 newstartd = scand - 1;
1539 newlen++;
1540 break;
1541 case OPLUS_: /* things that don't break one */
1542 case OLPAREN:
1543 case ORPAREN:
1544 break;
1545 case OQUEST_: /* things that must be skipped */
1546 case OCH_:
1547 scans--;
1548 scand--;
1549 do {
1550 scans += d;
1551 scand += d;
1552 s = *scans;
1553 d = *scand;
1554 /* assert() interferes w debug printouts */
1555 if (s != O_QUEST && s != O_CH && s != OOR2) {
1556 g->iflags |= BAD;
1557 return;
1559 } while (s != O_QUEST && s != O_CH);
1560 /* fallthrough */
1561 default: /* things that break a sequence */
1562 if (newlen > g->mlen) { /* ends one */
1563 starts = newstarts;
1564 startd = newstartd;
1565 g->mlen = newlen;
1567 newlen = 0;
1568 break;
1570 } while (s != OEND);
1572 if (g->mlen == 0) /* there isn't one */
1573 return;
1575 /* turn it into a character string */
1576 g->must = malloc(((size_t)g->mlen + 1) * sizeof(RCHAR_T));
1577 if (g->must == NULL) { /* argh; just forget it */
1578 g->mlen = 0;
1579 return;
1581 cp = g->must;
1582 scans = starts;
1583 scand = startd;
1584 for (i = g->mlen; i > 0; i--) {
1585 for (;;) {
1586 s = *scans++;
1587 d = *scand++;
1588 if (s == OCHAR)
1589 break;
1591 assert(cp < g->must + g->mlen);
1592 *cp++ = d;
1594 assert(cp == g->must + g->mlen);
1595 *cp++ = '\0'; /* just on general principles */
1599 - pluscount - count + nesting
1601 static sopno /* nesting depth */
1602 pluscount(struct parse *p, struct re_guts *g)
1604 sop *scan;
1605 sop s;
1606 sopno plusnest = 0;
1607 sopno maxnest = 0;
1609 if (p->error != 0)
1610 return(0); /* there may not be an OEND */
1612 scan = g->strip + 1;
1613 do {
1614 s = *scan++;
1615 switch (s) {
1616 case OPLUS_:
1617 plusnest++;
1618 break;
1619 case O_PLUS:
1620 if (plusnest > maxnest)
1621 maxnest = plusnest;
1622 plusnest--;
1623 break;
1625 } while (s != OEND);
1626 if (plusnest != 0)
1627 g->iflags |= BAD;
1628 return(maxnest);