mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / regex / regcomp.c
blobc0fb93917a40a1c6179bae910e99a905aadf850d
1 #include <my_global.h>
2 #include <m_string.h>
3 #include <m_ctype.h>
4 #ifdef __WIN__
5 #include <limits.h>
6 #endif
8 #include "my_regex.h"
9 #include "utils.h"
10 #include "regex2.h"
12 #include "cclass.h"
13 #include "cname.h"
16 * parse structure, passed up and down to avoid global variables and
17 * other clumsinesses
19 struct parse {
20 char *next; /* next character in RE */
21 char *end; /* end of string (-> NUL normally) */
22 int error; /* has an error been seen? */
23 sop *strip; /* malloced strip */
24 sopno ssize; /* malloced strip size (allocated) */
25 sopno slen; /* malloced strip length (used) */
26 int ncsalloc; /* number of csets allocated */
27 struct re_guts *g;
28 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
29 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
30 sopno pend[NPAREN]; /* -> ) ([0] unused) */
31 CHARSET_INFO *charset; /* for ctype things */
34 /* Check if there is enough stack space for recursion. */
35 my_regex_stack_check_t my_regex_enough_mem_in_stack= NULL;
37 #include "regcomp.ih"
39 static char nuls[10]; /* place to point scanner in event of error */
41 struct cclass cclasses[CCLASS_LAST+1]= {
42 { "alnum", "","", _MY_U | _MY_L | _MY_NMR},
43 { "alpha", "","", _MY_U | _MY_L },
44 { "blank", "","", _MY_B },
45 { "cntrl", "","", _MY_CTR },
46 { "digit", "","", _MY_NMR },
47 { "graph", "","", _MY_PNT | _MY_U | _MY_L | _MY_NMR},
48 { "lower", "","", _MY_L },
49 { "print", "","", _MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B },
50 { "punct", "","", _MY_PNT },
51 { "space", "","", _MY_SPC },
52 { "upper", "","", _MY_U },
53 { "xdigit", "","", _MY_X },
54 { NULL,NULL,NULL, 0 }
58 * macros for use with parse structure
59 * BEWARE: these know that the parse structure is named `p' !!!
61 #define PEEK() (*p->next)
62 #define PEEK2() (*(p->next+1))
63 #define MORE() (p->next < p->end)
64 #define MORE2() (p->next+1 < p->end)
65 #define SEE(c) (MORE() && PEEK() == (c))
66 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
67 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
68 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
69 #define NEXT() (p->next++)
70 #define NEXT2() (p->next += 2)
71 #define NEXTn(n) (p->next += (n))
72 #define GETNEXT() (*p->next++)
73 #define SETERROR(e) seterr(p, (e))
74 #define REQUIRE(co, e) ((co) || SETERROR(e))
75 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
76 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
77 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
78 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
79 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
80 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
81 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
82 #define HERE() (p->slen)
83 #define THERE() (p->slen - 1)
84 #define THERETHERE() (p->slen - 2)
85 #define DROP(n) (p->slen -= (n))
87 #ifndef NDEBUG
88 static int never = 0; /* for use in asserts; shuts lint up */
89 #else
90 #define never 0 /* some <assert.h>s have bugs too */
91 #endif
94 - regcomp - interface for parser and compilation
95 = extern int regcomp(regex_t *, const char *, int);
96 = #define REG_BASIC 0000
97 = #define REG_EXTENDED 0001
98 = #define REG_ICASE 0002
99 = #define REG_NOSUB 0004
100 = #define REG_NEWLINE 0010
101 = #define REG_NOSPEC 0020
102 = #define REG_PEND 0040
103 = #define REG_DUMP 0200
105 int /* 0 success, otherwise REG_something */
106 my_regcomp(preg, pattern, cflags, charset)
107 my_regex_t *preg;
108 const char *pattern;
109 int cflags;
110 CHARSET_INFO *charset;
112 struct parse pa;
113 register struct re_guts *g;
114 register struct parse *p = &pa;
115 register int i;
116 register size_t len;
117 #ifdef REDEBUG
118 # define GOODFLAGS(f) (f)
119 #else
120 # define GOODFLAGS(f) ((f)&~REG_DUMP)
121 #endif
123 my_regex_init(charset, NULL); /* Init cclass if neaded */
124 preg->charset=charset;
125 cflags = GOODFLAGS(cflags);
126 if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
127 return(REG_INVARG);
129 if (cflags&REG_PEND) {
130 if (preg->re_endp < pattern)
131 return(REG_INVARG);
132 len = preg->re_endp - pattern;
133 } else
134 len = strlen((char *)pattern);
136 /* do the mallocs early so failure handling is easy */
137 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
138 (NC-1)*sizeof(cat_t));
139 if (g == NULL)
140 return(REG_ESPACE);
141 p->ssize = (long) (len/(size_t)2*(size_t)3 + (size_t)1); /* ugh */
142 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
143 p->slen = 0;
144 if (p->strip == NULL) {
145 free((char *)g);
146 return(REG_ESPACE);
149 /* set things up */
150 p->g = g;
151 p->next = (char *)pattern; /* convenience; we do not modify it */
152 p->end = p->next + len;
153 p->error = 0;
154 p->ncsalloc = 0;
155 p->charset = preg->charset;
156 for (i = 0; i < NPAREN; i++) {
157 p->pbegin[i] = 0;
158 p->pend[i] = 0;
160 g->csetsize = NC;
161 g->sets = NULL;
162 g->setbits = NULL;
163 g->ncsets = 0;
164 g->cflags = cflags;
165 g->iflags = 0;
166 g->nbol = 0;
167 g->neol = 0;
168 g->must = NULL;
169 g->mlen = 0;
170 g->nsub = 0;
171 g->ncategories = 1; /* category 0 is "everything else" */
172 g->categories = &g->catspace[-(CHAR_MIN)];
173 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
174 g->backrefs = 0;
176 /* do it */
177 EMIT(OEND, 0);
178 g->firststate = THERE();
179 if (cflags&REG_EXTENDED)
180 p_ere(p, OUT);
181 else if (cflags&REG_NOSPEC)
182 p_str(p);
183 else
184 p_bre(p, OUT, OUT);
185 EMIT(OEND, 0);
186 g->laststate = THERE();
188 /* tidy up loose ends and fill things in */
189 categorize(p, g);
190 stripsnug(p, g);
191 findmust(p, g);
192 g->nplus = pluscount(p, g);
193 g->magic = MAGIC2;
194 preg->re_nsub = g->nsub;
195 preg->re_g = g;
196 preg->re_magic = MAGIC1;
197 #ifndef REDEBUG
198 /* not debugging, so can't rely on the assert() in regexec() */
199 if (g->iflags&BAD)
200 SETERROR(REG_ASSERT);
201 #endif
203 /* win or lose, we're done */
204 if (p->error != 0) /* lose */
205 my_regfree(preg);
206 return(p->error);
210 - p_ere - ERE parser top level, concatenation and alternation
211 == static void p_ere(register struct parse *p, int stop);
213 static void
214 p_ere(p, stop)
215 register struct parse *p;
216 int stop; /* character this ERE should end at */
218 register char c;
219 register sopno UNINIT_VAR(prevback);
220 register sopno UNINIT_VAR(prevfwd);
221 register sopno conc;
222 register int first = 1; /* is this the first alternative? */
224 for (;;) {
225 /* do a bunch of concatenated expressions */
226 conc = HERE();
227 while (MORE() && (c = PEEK()) != '|' && c != stop)
229 if (my_regex_enough_mem_in_stack &&
230 my_regex_enough_mem_in_stack(0))
232 SETERROR(REG_ESPACE);
233 return;
235 p_ere_exp(p);
237 if(REQUIRE(HERE() != conc, REG_EMPTY)) {}/* require nonempty */
239 if (!EAT('|'))
240 break; /* NOTE BREAK OUT */
242 if (first) {
243 INSERT(OCH_, conc); /* offset is wrong */
244 prevfwd = conc;
245 prevback = conc;
246 first = 0;
248 ASTERN(OOR1, prevback);
249 prevback = THERE();
250 AHEAD(prevfwd); /* fix previous offset */
251 prevfwd = HERE();
252 EMIT(OOR2, 0); /* offset is very wrong */
255 if (!first) { /* tail-end fixups */
256 AHEAD(prevfwd);
257 ASTERN(O_CH, prevback);
260 assert(!MORE() || SEE(stop));
264 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
265 == static void p_ere_exp(register struct parse *p);
267 static void
268 p_ere_exp(p)
269 register struct parse *p;
271 register char c;
272 register sopno pos;
273 register int count;
274 register int count2;
275 register sopno subno;
276 int wascaret = 0;
278 assert(MORE()); /* caller should have ensured this */
279 c = GETNEXT();
281 pos = HERE();
282 switch (c) {
283 case '(':
284 if(REQUIRE(MORE(), REG_EPAREN)) {}
285 p->g->nsub++;
286 subno = (sopno) p->g->nsub;
287 if (subno < NPAREN)
288 p->pbegin[subno] = HERE();
289 EMIT(OLPAREN, subno);
290 if (!SEE(')'))
291 p_ere(p, ')');
292 if (subno < NPAREN) {
293 p->pend[subno] = HERE();
294 assert(p->pend[subno] != 0);
296 EMIT(ORPAREN, subno);
297 if(MUSTEAT(')', REG_EPAREN)) {}
298 break;
299 #ifndef POSIX_MISTAKE
300 case ')': /* happens only if no current unmatched ( */
302 * You may ask, why the ifndef? Because I didn't notice
303 * this until slightly too late for 1003.2, and none of the
304 * other 1003.2 regular-expression reviewers noticed it at
305 * all. So an unmatched ) is legal POSIX, at least until
306 * we can get it fixed.
308 SETERROR(REG_EPAREN);
309 break;
310 #endif
311 case '^':
312 EMIT(OBOL, 0);
313 p->g->iflags |= USEBOL;
314 p->g->nbol++;
315 wascaret = 1;
316 break;
317 case '$':
318 EMIT(OEOL, 0);
319 p->g->iflags |= USEEOL;
320 p->g->neol++;
321 break;
322 case '|':
323 SETERROR(REG_EMPTY);
324 break;
325 case '*':
326 case '+':
327 case '?':
328 SETERROR(REG_BADRPT);
329 break;
330 case '.':
331 if (p->g->cflags&REG_NEWLINE)
332 nonnewline(p);
333 else
334 EMIT(OANY, 0);
335 break;
336 case '[':
337 p_bracket(p);
338 break;
339 case '\\':
340 if(REQUIRE(MORE(), REG_EESCAPE)) {}
341 c = GETNEXT();
342 ordinary(p, c);
343 break;
344 case '{': /* okay as ordinary except if digit follows */
345 if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), REG_BADRPT)) {}
346 /* FALLTHROUGH */
347 default:
348 ordinary(p, c);
349 break;
352 if (!MORE())
353 return;
354 c = PEEK();
355 /* we call { a repetition if followed by a digit */
356 if (!( c == '*' || c == '+' || c == '?' ||
357 (c == '{' && MORE2() &&
358 my_isdigit(p->charset,PEEK2())) ))
359 return; /* no repetition, we're done */
360 NEXT();
362 if(REQUIRE(!wascaret, REG_BADRPT)) {}
363 switch (c) {
364 case '*': /* implemented as +? */
365 /* this case does not require the (y|) trick, noKLUDGE */
366 INSERT(OPLUS_, pos);
367 ASTERN(O_PLUS, pos);
368 INSERT(OQUEST_, pos);
369 ASTERN(O_QUEST, pos);
370 break;
371 case '+':
372 INSERT(OPLUS_, pos);
373 ASTERN(O_PLUS, pos);
374 break;
375 case '?':
376 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
377 INSERT(OCH_, pos); /* offset slightly wrong */
378 ASTERN(OOR1, pos); /* this one's right */
379 AHEAD(pos); /* fix the OCH_ */
380 EMIT(OOR2, 0); /* offset very wrong... */
381 AHEAD(THERE()); /* ...so fix it */
382 ASTERN(O_CH, THERETHERE());
383 break;
384 case '{':
385 count = p_count(p);
386 if (EAT(',')) {
387 if (my_isdigit(p->charset,PEEK())) {
388 count2 = p_count(p);
389 if(REQUIRE(count <= count2, REG_BADBR)) {}
390 } else /* single number with comma */
391 count2 = RE_INFINITY;
392 } else /* just a single number */
393 count2 = count;
394 repeat(p, pos, count, count2);
395 if (!EAT('}')) { /* error heuristics */
396 while (MORE() && PEEK() != '}')
397 NEXT();
398 if(REQUIRE(MORE(), REG_EBRACE)) {}
399 SETERROR(REG_BADBR);
401 break;
404 if (!MORE())
405 return;
406 c = PEEK();
407 if (!( c == '*' || c == '+' || c == '?' ||
408 (c == '{' && MORE2() &&
409 my_isdigit(p->charset,PEEK2())) ) )
410 return;
411 SETERROR(REG_BADRPT);
415 - p_str - string (no metacharacters) "parser"
416 == static void p_str(register struct parse *p);
418 static void
419 p_str(p)
420 register struct parse *p;
422 if(REQUIRE(MORE(), REG_EMPTY)) {}
423 while (MORE())
424 ordinary(p, GETNEXT());
428 - p_bre - BRE parser top level, anchoring and concatenation
429 == static void p_bre(register struct parse *p, register int end1, \
430 == register int end2);
431 * Giving end1 as OUT essentially eliminates the end1/end2 check.
433 * This implementation is a bit of a kludge, in that a trailing $ is first
434 * taken as an ordinary character and then revised to be an anchor. The
435 * only undesirable side effect is that '$' gets included as a character
436 * category in such cases. This is fairly harmless; not worth fixing.
437 * The amount of lookahead needed to avoid this kludge is excessive.
439 static void
440 p_bre(p, end1, end2)
441 register struct parse *p;
442 register int end1; /* first terminating character */
443 register int end2; /* second terminating character */
445 register sopno start = HERE();
446 register int first = 1; /* first subexpression? */
447 register int wasdollar = 0;
449 if (EAT('^')) {
450 EMIT(OBOL, 0);
451 p->g->iflags |= USEBOL;
452 p->g->nbol++;
454 while (MORE() && !SEETWO(end1, end2)) {
455 wasdollar = p_simp_re(p, first);
456 first = 0;
458 if (wasdollar) { /* oops, that was a trailing anchor */
459 DROP(1);
460 EMIT(OEOL, 0);
461 p->g->iflags |= USEEOL;
462 p->g->neol++;
465 if(REQUIRE(HERE() != start, REG_EMPTY)) {} /* require nonempty */
469 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
470 == static int p_simp_re(register struct parse *p, int starordinary);
472 static int /* was the simple RE an unbackslashed $? */
473 p_simp_re(p, starordinary)
474 register struct parse *p;
475 int starordinary; /* is a leading * an ordinary character? */
477 register int c;
478 register int count;
479 register int count2;
480 register sopno pos;
481 register int i;
482 register sopno subno;
483 # define BACKSL (1<<CHAR_BIT)
485 pos = HERE(); /* repetion op, if any, covers from here */
487 assert(MORE()); /* caller should have ensured this */
488 c = GETNEXT();
489 if (c == '\\') {
490 if(REQUIRE(MORE(), REG_EESCAPE)) {}
491 c = BACKSL | (unsigned char)GETNEXT();
493 switch (c) {
494 case '.':
495 if (p->g->cflags&REG_NEWLINE)
496 nonnewline(p);
497 else
498 EMIT(OANY, 0);
499 break;
500 case '[':
501 p_bracket(p);
502 break;
503 case BACKSL|'{':
504 SETERROR(REG_BADRPT);
505 break;
506 case BACKSL|'(':
507 p->g->nsub++;
508 subno = (sopno) p->g->nsub;
509 if (subno < NPAREN)
510 p->pbegin[subno] = HERE();
511 EMIT(OLPAREN, subno);
512 /* the MORE here is an error heuristic */
513 if (MORE() && !SEETWO('\\', ')'))
514 p_bre(p, '\\', ')');
515 if (subno < NPAREN) {
516 p->pend[subno] = HERE();
517 assert(p->pend[subno] != 0);
519 EMIT(ORPAREN, subno);
520 if(REQUIRE(EATTWO('\\', ')'), REG_EPAREN)) {}
521 break;
522 case BACKSL|')': /* should not get here -- must be user */
523 case BACKSL|'}':
524 SETERROR(REG_EPAREN);
525 break;
526 case BACKSL|'1':
527 case BACKSL|'2':
528 case BACKSL|'3':
529 case BACKSL|'4':
530 case BACKSL|'5':
531 case BACKSL|'6':
532 case BACKSL|'7':
533 case BACKSL|'8':
534 case BACKSL|'9':
535 i = (c&~BACKSL) - '0';
536 assert(i < NPAREN);
537 if (p->pend[i] != 0) {
538 assert((uint) i <= p->g->nsub);
539 EMIT(OBACK_, i);
540 assert(p->pbegin[i] != 0);
541 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
542 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
543 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
544 EMIT(O_BACK, i);
545 } else
546 SETERROR(REG_ESUBREG);
547 p->g->backrefs = 1;
548 break;
549 case '*':
550 if(REQUIRE(starordinary, REG_BADRPT)) {}
551 /* FALLTHROUGH */
552 default:
553 ordinary(p, c &~ BACKSL);
554 break;
557 if (EAT('*')) { /* implemented as +? */
558 /* this case does not require the (y|) trick, noKLUDGE */
559 INSERT(OPLUS_, pos);
560 ASTERN(O_PLUS, pos);
561 INSERT(OQUEST_, pos);
562 ASTERN(O_QUEST, pos);
563 } else if (EATTWO('\\', '{')) {
564 count = p_count(p);
565 if (EAT(',')) {
566 if (MORE() && my_isdigit(p->charset,PEEK())) {
567 count2 = p_count(p);
568 if(REQUIRE(count <= count2, REG_BADBR)) {}
569 } else /* single number with comma */
570 count2 = RE_INFINITY;
571 } else /* just a single number */
572 count2 = count;
573 repeat(p, pos, count, count2);
574 if (!EATTWO('\\', '}')) { /* error heuristics */
575 while (MORE() && !SEETWO('\\', '}'))
576 NEXT();
577 if(REQUIRE(MORE(), REG_EBRACE)) {}
578 SETERROR(REG_BADBR);
580 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
581 return(1);
583 return(0);
587 - p_count - parse a repetition count
588 == static int p_count(register struct parse *p);
590 static int /* the value */
591 p_count(p)
592 register struct parse *p;
594 register int count = 0;
595 register int ndigits = 0;
597 while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) {
598 count = count*10 + (GETNEXT() - '0');
599 ndigits++;
602 if(REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR)) {}
603 return(count);
607 - p_bracket - parse a bracketed character list
608 == static void p_bracket(register struct parse *p);
610 * Note a significant property of this code: if the allocset() did SETERROR,
611 * no set operations are done.
613 static void
614 p_bracket(p)
615 register struct parse *p;
617 register cset *cs = allocset(p);
618 register int invert = 0;
620 /* Dept of Truly Sickening Special-Case Kludges */
621 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
622 EMIT(OBOW, 0);
623 NEXTn(6);
624 return;
626 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
627 EMIT(OEOW, 0);
628 NEXTn(6);
629 return;
632 if (EAT('^'))
633 invert++; /* make note to invert set at end */
634 if (EAT(']'))
635 CHadd(cs, ']');
636 else if (EAT('-'))
637 CHadd(cs, '-');
638 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
639 p_b_term(p, cs);
640 if (EAT('-'))
641 CHadd(cs, '-');
642 if(MUSTEAT(']', REG_EBRACK)) {}
644 if (p->error != 0) /* don't mess things up further */
645 return;
647 if (p->g->cflags&REG_ICASE) {
648 register int i;
649 register int ci;
651 for (i = p->g->csetsize - 1; i >= 0; i--)
652 if (CHIN(cs, i) && my_isalpha(p->charset,i)) {
653 ci = othercase(p->charset,i);
654 if (ci != i)
655 CHadd(cs, ci);
657 if (cs->multis != NULL)
658 mccase(p, cs);
660 if (invert) {
661 register int i;
663 for (i = p->g->csetsize - 1; i >= 0; i--)
664 if (CHIN(cs, i))
665 CHsub(cs, i);
666 else
667 CHadd(cs, i);
668 if (p->g->cflags&REG_NEWLINE)
669 CHsub(cs, '\n');
670 if (cs->multis != NULL)
671 mcinvert(p, cs);
674 assert(cs->multis == NULL); /* xxx */
676 if (nch(p, cs) == 1) { /* optimize singleton sets */
677 ordinary(p, firstch(p, cs));
678 freeset(p, cs);
679 } else
680 EMIT(OANYOF, freezeset(p, cs));
684 - p_b_term - parse one term of a bracketed character list
685 == static void p_b_term(register struct parse *p, register cset *cs);
687 static void
688 p_b_term(p, cs)
689 register struct parse *p;
690 register cset *cs;
692 register char c;
693 register char start, finish;
694 register int i;
696 /* classify what we've got */
697 switch ((MORE()) ? PEEK() : '\0') {
698 case '[':
699 c = (MORE2()) ? PEEK2() : '\0';
700 break;
701 case '-':
702 SETERROR(REG_ERANGE);
703 return; /* NOTE RETURN */
704 default:
705 c = '\0';
706 break;
709 switch (c) {
710 case ':': /* character class */
711 NEXT2();
712 if(REQUIRE(MORE(), REG_EBRACK)) {}
713 c = PEEK();
714 if(REQUIRE(c != '-' && c != ']', REG_ECTYPE)) {}
715 p_b_cclass(p, cs);
716 if(REQUIRE(MORE(), REG_EBRACK)) {}
717 if(REQUIRE(EATTWO(':', ']'), REG_ECTYPE)) {}
718 break;
719 case '=': /* equivalence class */
720 NEXT2();
721 if(REQUIRE(MORE(), REG_EBRACK)) {}
722 c = PEEK();
723 if(REQUIRE(c != '-' && c != ']', REG_ECOLLATE)) {}
724 p_b_eclass(p, cs);
725 if(REQUIRE(MORE(), REG_EBRACK)) {}
726 if(REQUIRE(EATTWO('=', ']'), REG_ECOLLATE)) {}
727 break;
728 default: /* symbol, ordinary character, or range */
729 /* xxx revision needed for multichar stuff */
730 start = p_b_symbol(p);
731 if (SEE('-') && MORE2() && PEEK2() != ']') {
732 /* range */
733 NEXT();
734 if (EAT('-'))
735 finish = '-';
736 else
737 finish = p_b_symbol(p);
738 } else
739 finish = start;
740 /* xxx what about signed chars here... */
741 if(REQUIRE(start <= finish, REG_ERANGE)) {}
742 for (i = start; i <= finish; i++)
743 CHadd(cs, i);
744 break;
749 - p_b_cclass - parse a character-class name and deal with it
750 == static void p_b_cclass(register struct parse *p, register cset *cs);
752 static void
753 p_b_cclass(p, cs)
754 register struct parse *p;
755 register cset *cs;
757 register char *sp = p->next;
758 register struct cclass *cp;
759 register size_t len;
761 while (MORE() && my_isalpha(p->charset,PEEK()))
762 NEXT();
763 len = p->next - sp;
764 for (cp = cclasses; cp->name != NULL; cp++)
765 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
766 break;
767 if (cp->name == NULL) {
768 /* oops, didn't find it */
769 SETERROR(REG_ECTYPE);
770 return;
773 #ifndef USE_ORIG_REGEX_CODE
775 register size_t i;
776 for (i=1 ; i<256 ; i++)
777 if (p->charset->ctype[i+1] & cp->mask)
778 CHadd(cs, i);
780 #else
782 register char *u = (char*) cp->chars;
783 register char c;
785 while ((c = *u++) != '\0')
786 CHadd(cs, c);
788 for (u = (char*) cp->multis; *u != '\0'; u += strlen(u) + 1)
789 MCadd(p, cs, u);
791 #endif
796 - p_b_eclass - parse an equivalence-class name and deal with it
797 == static void p_b_eclass(register struct parse *p, register cset *cs);
799 * This implementation is incomplete. xxx
801 static void
802 p_b_eclass(p, cs)
803 register struct parse *p;
804 register cset *cs;
806 register char c;
808 c = p_b_coll_elem(p, '=');
809 CHadd(cs, c);
813 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
814 == static char p_b_symbol(register struct parse *p);
816 static char /* value of symbol */
817 p_b_symbol(p)
818 register struct parse *p;
820 register char value;
822 if(REQUIRE(MORE(), REG_EBRACK)) {}
823 if (!EATTWO('[', '.'))
824 return(GETNEXT());
826 /* collating symbol */
827 value = p_b_coll_elem(p, '.');
828 if(REQUIRE(EATTWO('.', ']'), REG_ECOLLATE)) {}
829 return(value);
833 - p_b_coll_elem - parse a collating-element name and look it up
834 == static char p_b_coll_elem(register struct parse *p, int endc);
836 static char /* value of collating element */
837 p_b_coll_elem(p, endc)
838 register struct parse *p;
839 int endc; /* name ended by endc,']' */
841 register char *sp = p->next;
842 register struct cname *cp;
843 #ifdef _WIN64
844 register __int64 len;
845 #else
846 register int len;
847 #endif
848 while (MORE() && !SEETWO(endc, ']'))
849 NEXT();
850 if (!MORE()) {
851 SETERROR(REG_EBRACK);
852 return(0);
854 len = p->next - sp;
855 for (cp = cnames; cp->name != NULL; cp++)
856 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
857 return(cp->code); /* known name */
858 if (len == 1)
859 return(*sp); /* single character */
860 SETERROR(REG_ECOLLATE); /* neither */
861 return(0);
865 - othercase - return the case counterpart of an alphabetic
866 == static char othercase(int ch);
868 static char /* if no counterpart, return ch */
869 othercase(charset,ch)
870 CHARSET_INFO *charset;
871 int ch;
874 In MySQL some multi-byte character sets
875 have 'ctype' array but don't have 'to_lower'
876 and 'to_upper' arrays. In this case we handle
877 only basic latin letters a..z and A..Z.
879 If 'to_lower' and 'to_upper' arrays are empty in a character set,
880 then my_isalpha(cs, ch) should never return TRUE for characters
881 other than basic latin letters. Otherwise it should be
882 considered as a mistake in character set definition.
884 assert(my_isalpha(charset,ch));
885 if (my_isupper(charset,ch))
887 return(charset->to_lower ? my_tolower(charset,ch) :
888 ch - 'A' + 'a');
890 else if (my_islower(charset,ch))
892 return(charset->to_upper ? my_toupper(charset,ch) :
893 ch - 'a' + 'A');
895 else /* peculiar, but could happen */
896 return(ch);
900 - bothcases - emit a dualcase version of a two-case character
901 == static void bothcases(register struct parse *p, int ch);
903 * Boy, is this implementation ever a kludge...
905 static void
906 bothcases(p, ch)
907 register struct parse *p;
908 int ch;
910 register char *oldnext = p->next;
911 register char *oldend = p->end;
912 char bracket[3];
914 assert(othercase(p->charset, ch) != ch); /* p_bracket() would recurse */
915 p->next = bracket;
916 p->end = bracket+2;
917 bracket[0] = ch;
918 bracket[1] = ']';
919 bracket[2] = '\0';
920 p_bracket(p);
921 assert(p->next == bracket+2);
922 p->next = oldnext;
923 p->end = oldend;
927 - ordinary - emit an ordinary character
928 == static void ordinary(register struct parse *p, register int ch);
930 static void
931 ordinary(p, ch)
932 register struct parse *p;
933 register int ch;
935 register cat_t *cap = p->g->categories;
937 if ((p->g->cflags&REG_ICASE) && my_isalpha(p->charset,ch) &&
938 othercase(p->charset,ch) != ch)
939 bothcases(p, ch);
940 else {
941 EMIT(OCHAR, (unsigned char)ch);
942 if (cap[ch] == 0)
943 cap[ch] = p->g->ncategories++;
948 - nonnewline - emit REG_NEWLINE version of OANY
949 == static void nonnewline(register struct parse *p);
951 * Boy, is this implementation ever a kludge...
953 static void
954 nonnewline(p)
955 register struct parse *p;
957 register char *oldnext = p->next;
958 register char *oldend = p->end;
959 char bracket[4];
961 p->next = bracket;
962 p->end = bracket+3;
963 bracket[0] = '^';
964 bracket[1] = '\n';
965 bracket[2] = ']';
966 bracket[3] = '\0';
967 p_bracket(p);
968 assert(p->next == bracket+3);
969 p->next = oldnext;
970 p->end = oldend;
974 - repeat - generate code for a bounded repetition, recursively if needed
975 == static void repeat(register struct parse *p, sopno start, int from, int to);
977 static void
978 repeat(p, start, from, to)
979 register struct parse *p;
980 sopno start; /* operand from here to end of strip */
981 int from; /* repeated from this number */
982 int to; /* to this number of times (maybe RE_INFINITY) */
984 register sopno finish = HERE();
985 # define N 2
986 # define INF 3
987 # define REP(f, t) ((f)*8 + (t))
988 # define MAP(n) (((n) <= 1) ? (n) : ((n) == RE_INFINITY) ? INF : N)
989 register sopno copy;
991 if (p->error != 0) /* head off possible runaway recursion */
992 return;
994 assert(from <= to);
996 switch (REP(MAP(from), MAP(to))) {
997 case REP(0, 0): /* must be user doing this */
998 DROP(finish-start); /* drop the operand */
999 break;
1000 case REP(0, 1): /* as x{1,1}? */
1001 case REP(0, N): /* as x{1,n}? */
1002 case REP(0, INF): /* as x{1,}? */
1003 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1004 INSERT(OCH_, start); /* offset is wrong... */
1005 repeat(p, start+1, 1, to);
1006 ASTERN(OOR1, start);
1007 AHEAD(start); /* ... fix it */
1008 EMIT(OOR2, 0);
1009 AHEAD(THERE());
1010 ASTERN(O_CH, THERETHERE());
1011 break;
1012 case REP(1, 1): /* trivial case */
1013 /* done */
1014 break;
1015 case REP(1, N): /* as x?x{1,n-1} */
1016 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1017 INSERT(OCH_, start);
1018 ASTERN(OOR1, start);
1019 AHEAD(start);
1020 EMIT(OOR2, 0); /* offset very wrong... */
1021 AHEAD(THERE()); /* ...so fix it */
1022 ASTERN(O_CH, THERETHERE());
1023 copy = dupl(p, start+1, finish+1);
1024 assert(copy == finish+4);
1025 repeat(p, copy, 1, to-1);
1026 break;
1027 case REP(1, INF): /* as x+ */
1028 INSERT(OPLUS_, start);
1029 ASTERN(O_PLUS, start);
1030 break;
1031 case REP(N, N): /* as xx{m-1,n-1} */
1032 copy = dupl(p, start, finish);
1033 repeat(p, copy, from-1, to-1);
1034 break;
1035 case REP(N, INF): /* as xx{n-1,INF} */
1036 copy = dupl(p, start, finish);
1037 repeat(p, copy, from-1, to);
1038 break;
1039 default: /* "can't happen" */
1040 SETERROR(REG_ASSERT); /* just in case */
1041 break;
1046 - seterr - set an error condition
1047 == static int seterr(register struct parse *p, int e);
1049 static int /* useless but makes type checking happy */
1050 seterr(p, e)
1051 register struct parse *p;
1052 int e;
1054 if (p->error == 0) /* keep earliest error condition */
1055 p->error = e;
1056 p->next = nuls; /* try to bring things to a halt */
1057 p->end = nuls;
1058 return(0); /* make the return value well-defined */
1062 - allocset - allocate a set of characters for []
1063 == static cset *allocset(register struct parse *p);
1065 static cset *
1066 allocset(p)
1067 register struct parse *p;
1069 register int no = p->g->ncsets++;
1070 register size_t nc;
1071 register size_t nbytes;
1072 register cset *cs;
1073 register size_t css = (size_t)p->g->csetsize;
1074 register int i;
1076 if (no >= p->ncsalloc) { /* need another column of space */
1077 p->ncsalloc += CHAR_BIT;
1078 nc = p->ncsalloc;
1079 assert(nc % CHAR_BIT == 0);
1080 nbytes = nc / CHAR_BIT * css;
1081 if (p->g->sets == NULL)
1082 p->g->sets = (cset *)malloc(nc * sizeof(cset));
1083 else
1084 p->g->sets = (cset *)realloc((char *)p->g->sets,
1085 nc * sizeof(cset));
1086 if (p->g->setbits == NULL)
1087 p->g->setbits = (uch *)malloc(nbytes);
1088 else {
1089 p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1090 nbytes);
1091 /* xxx this isn't right if setbits is now NULL */
1092 for (i = 0; i < no; i++)
1093 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1095 if (p->g->sets != NULL && p->g->setbits != NULL)
1096 (void) memset((char *)p->g->setbits + (nbytes - css),
1097 0, css);
1098 else {
1099 no = 0;
1100 SETERROR(REG_ESPACE);
1101 /* caller's responsibility not to do set ops */
1105 assert(p->g->sets != NULL); /* xxx */
1106 cs = &p->g->sets[no];
1107 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1108 cs->mask = 1 << ((no) % CHAR_BIT);
1109 cs->hash = 0;
1110 cs->smultis = 0;
1111 cs->multis = NULL;
1113 return(cs);
1117 - freeset - free a now-unused set
1118 == static void freeset(register struct parse *p, register cset *cs);
1120 static void
1121 freeset(p, cs)
1122 register struct parse *p;
1123 register cset *cs;
1125 register size_t i;
1126 register cset *top = &p->g->sets[p->g->ncsets];
1127 register size_t css = (size_t)p->g->csetsize;
1129 for (i = 0; i < css; i++)
1130 CHsub(cs, i);
1131 if (cs == top-1) /* recover only the easy case */
1132 p->g->ncsets--;
1136 - freezeset - final processing on a set of characters
1137 == static int freezeset(register struct parse *p, register cset *cs);
1139 * The main task here is merging identical sets. This is usually a waste
1140 * of time (although the hash code minimizes the overhead), but can win
1141 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1142 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1143 * the same value!
1145 static int /* set number */
1146 freezeset(p, cs)
1147 register struct parse *p;
1148 register cset *cs;
1150 register uch h = cs->hash;
1151 register size_t i;
1152 register cset *top = &p->g->sets[p->g->ncsets];
1153 register cset *cs2;
1154 register size_t css = (size_t)p->g->csetsize;
1156 /* look for an earlier one which is the same */
1157 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1158 if (cs2->hash == h && cs2 != cs) {
1159 /* maybe */
1160 for (i = 0; i < css; i++)
1161 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1162 break; /* no */
1163 if (i == css)
1164 break; /* yes */
1167 if (cs2 < top) { /* found one */
1168 freeset(p, cs);
1169 cs = cs2;
1172 return((int)(cs - p->g->sets));
1176 - firstch - return first character in a set (which must have at least one)
1177 == static int firstch(register struct parse *p, register cset *cs);
1179 static int /* character; there is no "none" value */
1180 firstch(p, cs)
1181 register struct parse *p;
1182 register cset *cs;
1184 register size_t i;
1185 register size_t css = (size_t)p->g->csetsize;
1187 for (i = 0; i < css; i++)
1188 if (CHIN(cs, i))
1189 return((char)i);
1190 assert(never);
1191 return(0); /* arbitrary */
1195 - nch - number of characters in a set
1196 == static int nch(register struct parse *p, register cset *cs);
1198 static int
1199 nch(p, cs)
1200 register struct parse *p;
1201 register cset *cs;
1203 register size_t i;
1204 register size_t css = (size_t)p->g->csetsize;
1205 register int n = 0;
1207 for (i = 0; i < css; i++)
1208 if (CHIN(cs, i))
1209 n++;
1210 return(n);
1213 #ifdef USE_ORIG_REGEX_CODE
1215 - mcadd - add a collating element to a cset
1216 == static void mcadd(register struct parse *p, register cset *cs, \
1217 == register char *cp);
1219 static void
1220 mcadd(p, cs, cp)
1221 register struct parse *p;
1222 register cset *cs;
1223 register char *cp;
1225 register size_t oldend = cs->smultis;
1227 cs->smultis += strlen(cp) + 1;
1228 if (cs->multis == NULL)
1229 cs->multis = malloc(cs->smultis);
1230 else
1231 cs->multis = realloc(cs->multis, cs->smultis);
1232 if (cs->multis == NULL) {
1233 SETERROR(REG_ESPACE);
1234 return;
1237 (void) strcpy(cs->multis + oldend - 1, cp);
1238 cs->multis[cs->smultis - 1] = '\0';
1240 #endif
1242 #ifdef NOT_USED
1244 - mcsub - subtract a collating element from a cset
1245 == static void mcsub(register cset *cs, register char *cp);
1247 static void
1248 mcsub(cs, cp)
1249 register cset *cs;
1250 register char *cp;
1252 register char *fp = mcfind(cs, cp);
1253 register size_t len = strlen(fp);
1255 assert(fp != NULL);
1256 (void) memmove(fp, fp + len + 1,
1257 cs->smultis - (fp + len + 1 - cs->multis));
1258 cs->smultis -= len;
1260 if (cs->smultis == 0) {
1261 free(cs->multis);
1262 cs->multis = NULL;
1263 return;
1266 cs->multis = realloc(cs->multis, cs->smultis);
1267 assert(cs->multis != NULL);
1271 - mcin - is a collating element in a cset?
1272 == static int mcin(register cset *cs, register char *cp);
1274 static int
1275 mcin(cs, cp)
1276 register cset *cs;
1277 register char *cp;
1279 return(mcfind(cs, cp) != NULL);
1283 - mcfind - find a collating element in a cset
1284 == static char *mcfind(register cset *cs, register char *cp);
1286 static char *
1287 mcfind(cs, cp)
1288 register cset *cs;
1289 register char *cp;
1291 register char *p;
1293 if (cs->multis == NULL)
1294 return(NULL);
1295 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1296 if (strcmp(cp, p) == 0)
1297 return(p);
1298 return(NULL);
1300 #endif
1303 - mcinvert - invert the list of collating elements in a cset
1304 == static void mcinvert(register struct parse *p, register cset *cs);
1306 * This would have to know the set of possibilities. Implementation
1307 * is deferred.
1309 static void
1310 mcinvert(p, cs)
1311 register struct parse *p __attribute__((unused));
1312 register cset *cs __attribute__((unused));
1314 assert(cs->multis == NULL); /* xxx */
1318 - mccase - add case counterparts of the list of collating elements in a cset
1319 == static void mccase(register struct parse *p, register cset *cs);
1321 * This would have to know the set of possibilities. Implementation
1322 * is deferred.
1324 static void
1325 mccase(p, cs)
1326 register struct parse *p __attribute__((unused));
1327 register cset *cs __attribute__((unused));
1329 assert(cs->multis == NULL); /* xxx */
1333 - isinsets - is this character in any sets?
1334 == static int isinsets(register struct re_guts *g, int c);
1336 static int /* predicate */
1337 isinsets(g, c)
1338 register struct re_guts *g;
1339 int c;
1341 register uch *col;
1342 register int i;
1343 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1344 register unsigned uc = (unsigned char)c;
1346 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1347 if (col[uc] != 0)
1348 return(1);
1349 return(0);
1353 - samesets - are these two characters in exactly the same sets?
1354 == static int samesets(register struct re_guts *g, int c1, int c2);
1356 static int /* predicate */
1357 samesets(g, c1, c2)
1358 register struct re_guts *g;
1359 int c1;
1360 int c2;
1362 register uch *col;
1363 register int i;
1364 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1365 register unsigned uc1 = (unsigned char)c1;
1366 register unsigned uc2 = (unsigned char)c2;
1368 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1369 if (col[uc1] != col[uc2])
1370 return(0);
1371 return(1);
1375 - categorize - sort out character categories
1376 == static void categorize(struct parse *p, register struct re_guts *g);
1378 static void
1379 categorize(p, g)
1380 struct parse *p;
1381 register struct re_guts *g;
1383 register cat_t *cats = g->categories;
1384 register int c;
1385 register int c2;
1386 register cat_t cat;
1388 /* avoid making error situations worse */
1389 if (p->error != 0)
1390 return;
1392 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1393 if (cats[c] == 0 && isinsets(g, c)) {
1394 cat = g->ncategories++;
1395 cats[c] = cat;
1396 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1397 if (cats[c2] == 0 && samesets(g, c, c2))
1398 cats[c2] = cat;
1403 - dupl - emit a duplicate of a bunch of sops
1404 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1406 static sopno /* start of duplicate */
1407 dupl(p, start, finish)
1408 register struct parse *p;
1409 sopno start; /* from here */
1410 sopno finish; /* to this less one */
1412 register sopno ret = HERE();
1413 register sopno len = finish - start;
1415 assert(finish >= start);
1416 if (len == 0)
1417 return(ret);
1418 enlarge(p, p->ssize + len); /* this many unexpected additions */
1419 assert(p->ssize >= p->slen + len);
1420 (void) memcpy((char *)(p->strip + p->slen),
1421 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1422 p->slen += len;
1423 return(ret);
1427 - doemit - emit a strip operator
1428 == static void doemit(register struct parse *p, sop op, size_t opnd);
1430 * It might seem better to implement this as a macro with a function as
1431 * hard-case backup, but it's just too big and messy unless there are
1432 * some changes to the data structures. Maybe later.
1434 static void
1435 doemit(p, op, opnd)
1436 register struct parse *p;
1437 sop op;
1438 size_t opnd;
1440 /* avoid making error situations worse */
1441 if (p->error != 0)
1442 return;
1444 /* deal with oversize operands ("can't happen", more or less) */
1445 assert(opnd < 1<<OPSHIFT);
1447 /* deal with undersized strip */
1448 if (p->slen >= p->ssize)
1449 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
1450 assert(p->slen < p->ssize);
1452 /* finally, it's all reduced to the easy case */
1453 p->strip[p->slen++] = SOP(op, opnd);
1457 - doinsert - insert a sop into the strip
1458 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1460 static void
1461 doinsert(p, op, opnd, pos)
1462 register struct parse *p;
1463 sop op;
1464 size_t opnd;
1465 sopno pos;
1467 register sopno sn;
1468 register sop s;
1469 register int i;
1471 /* avoid making error situations worse */
1472 if (p->error != 0)
1473 return;
1475 sn = HERE();
1476 EMIT(op, opnd); /* do checks, ensure space */
1477 assert(HERE() == sn+1);
1478 s = p->strip[sn];
1480 /* adjust paren pointers */
1481 assert(pos > 0);
1482 for (i = 1; i < NPAREN; i++) {
1483 if (p->pbegin[i] >= pos) {
1484 p->pbegin[i]++;
1486 if (p->pend[i] >= pos) {
1487 p->pend[i]++;
1491 int length=(HERE()-pos-1)*sizeof(sop);
1492 bmove_upp((uchar *) &p->strip[pos+1]+length,
1493 (uchar *) &p->strip[pos]+length,
1494 length);
1496 #ifdef OLD_CODE
1497 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1498 (HERE()-pos-1)*sizeof(sop));
1499 #endif
1500 p->strip[pos] = s;
1504 - dofwd - complete a forward reference
1505 == static void dofwd(register struct parse *p, sopno pos, sop value);
1507 static void
1508 dofwd(p, pos, value)
1509 register struct parse *p;
1510 register sopno pos;
1511 sop value;
1513 /* avoid making error situations worse */
1514 if (p->error != 0)
1515 return;
1517 assert(value < 1<<OPSHIFT);
1518 p->strip[pos] = OP(p->strip[pos]) | value;
1522 - enlarge - enlarge the strip
1523 == static void enlarge(register struct parse *p, sopno size);
1525 static void
1526 enlarge(p, size)
1527 register struct parse *p;
1528 register sopno size;
1530 register sop *sp;
1532 if (p->ssize >= size)
1533 return;
1535 sp = (sop *)realloc(p->strip, size*sizeof(sop));
1536 if (sp == NULL) {
1537 SETERROR(REG_ESPACE);
1538 return;
1540 p->strip = sp;
1541 p->ssize = size;
1545 - stripsnug - compact the strip
1546 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1548 static void
1549 stripsnug(p, g)
1550 register struct parse *p;
1551 register struct re_guts *g;
1553 g->nstates = p->slen;
1554 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1555 if (g->strip == NULL) {
1556 SETERROR(REG_ESPACE);
1557 g->strip = p->strip;
1562 - findmust - fill in must and mlen with longest mandatory literal string
1563 == static void findmust(register struct parse *p, register struct re_guts *g);
1565 * This algorithm could do fancy things like analyzing the operands of |
1566 * for common subsequences. Someday. This code is simple and finds most
1567 * of the interesting cases.
1569 * Note that must and mlen got initialized during setup.
1571 static void
1572 findmust(p, g)
1573 struct parse *p;
1574 register struct re_guts *g;
1576 register sop *scan;
1577 sop *UNINIT_VAR(start);
1578 register sop *UNINIT_VAR(newstart);
1579 register sopno newlen;
1580 register sop s;
1581 register char *cp;
1582 register sopno i;
1584 /* avoid making error situations worse */
1585 if (p->error != 0)
1586 return;
1588 /* find the longest OCHAR sequence in strip */
1589 newlen = 0;
1590 scan = g->strip + 1;
1591 do {
1592 s = *scan++;
1593 switch (OP(s)) {
1594 case OCHAR: /* sequence member */
1595 if (newlen == 0) /* new sequence */
1596 newstart = scan - 1;
1597 newlen++;
1598 break;
1599 case OPLUS_: /* things that don't break one */
1600 case OLPAREN:
1601 case ORPAREN:
1602 break;
1603 case OQUEST_: /* things that must be skipped */
1604 case OCH_:
1605 scan--;
1606 do {
1607 scan += OPND(s);
1608 s = *scan;
1609 /* assert() interferes w debug printouts */
1610 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1611 OP(s) != OOR2) {
1612 g->iflags |= BAD;
1613 return;
1615 } while (OP(s) != O_QUEST && OP(s) != O_CH);
1616 /* fallthrough */
1617 default: /* things that break a sequence */
1618 if (newlen > g->mlen) { /* ends one */
1619 start = newstart;
1620 g->mlen = newlen;
1622 newlen = 0;
1623 break;
1625 } while (OP(s) != OEND);
1627 if (g->mlen == 0) /* there isn't one */
1628 return;
1630 /* turn it into a character string */
1631 g->must = malloc((size_t)g->mlen + 1);
1632 if (g->must == NULL) { /* argh; just forget it */
1633 g->mlen = 0;
1634 return;
1636 cp = g->must;
1637 scan = start;
1638 for (i = g->mlen; i > 0; i--) {
1639 while (OP(s = *scan++) != OCHAR)
1640 continue;
1641 assert(cp < g->must + g->mlen);
1642 *cp++ = (char)OPND(s);
1644 assert(cp == g->must + g->mlen);
1645 *cp++ = '\0'; /* just on general principles */
1649 - pluscount - count + nesting
1650 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1652 static sopno /* nesting depth */
1653 pluscount(p, g)
1654 struct parse *p;
1655 register struct re_guts *g;
1657 register sop *scan;
1658 register sop s;
1659 register sopno plusnest = 0;
1660 register sopno maxnest = 0;
1662 if (p->error != 0)
1663 return(0); /* there may not be an OEND */
1665 scan = g->strip + 1;
1666 do {
1667 s = *scan++;
1668 switch (OP(s)) {
1669 case OPLUS_:
1670 plusnest++;
1671 break;
1672 case O_PLUS:
1673 if (plusnest > maxnest)
1674 maxnest = plusnest;
1675 plusnest--;
1676 break;
1678 } while (OP(s) != OEND);
1679 if (plusnest != 0)
1680 g->iflags |= BAD;
1681 return(maxnest);