This commit sets new users to see the DragonFly-tips fortunes instead
[dragonfly.git] / contrib / awk20040207 / b.c
blobe6e4cc9cb43ad5f6cf4018287eae39282e6ff198
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 /* lasciate ogne speranza, voi ch'entrate. */
27 #define DEBUG
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include "awk.h"
34 #include "ytab.h"
36 #define HAT (NCHARS+2) /* matches ^ in regular expr */
37 /* NCHARS is 2**n */
38 #define MAXLIN 22
40 #define type(v) (v)->nobj /* badly overloaded here */
41 #define info(v) (v)->ntype /* badly overloaded here */
42 #define left(v) (v)->narg[0]
43 #define right(v) (v)->narg[1]
44 #define parent(v) (v)->nnext
46 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47 #define UNARY case STAR: case PLUS: case QUEST:
49 /* encoding in tree Nodes:
50 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
51 left is index, right contains value or pointer to value
52 unary (STAR, PLUS, QUEST): left is child, right is null
53 binary (CAT, OR): left and right are children
54 parent contains pointer to parent
58 int *setvec;
59 int *tmpset;
60 int maxsetvec = 0;
62 int rtok; /* next token in current re */
63 int rlxval;
64 static uschar *rlxstr;
65 static uschar *prestr; /* current position in current re */
66 static uschar *lastre; /* origin of last re */
68 static int setcnt;
69 static int poscnt;
71 char *patbeg;
72 int patlen;
74 #define NFA 20 /* cache this many dynamic fa's */
75 fa *fatab[NFA];
76 int nfatab = 0; /* entries in fatab */
78 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
80 int i, use, nuse;
81 fa *pfa;
82 static int now = 1;
84 if (setvec == 0) { /* first time through any RE */
85 maxsetvec = MAXLIN;
86 setvec = (int *) malloc(maxsetvec * sizeof(int));
87 tmpset = (int *) malloc(maxsetvec * sizeof(int));
88 if (setvec == 0 || tmpset == 0)
89 overflo("out of space initializing makedfa");
92 if (compile_time) /* a constant for sure */
93 return mkdfa(s, anchor);
94 for (i = 0; i < nfatab; i++) /* is it there already? */
95 if (fatab[i]->anchor == anchor
96 && strcmp((const char *) fatab[i]->restr, s) == 0) {
97 fatab[i]->use = now++;
98 return fatab[i];
100 pfa = mkdfa(s, anchor);
101 if (nfatab < NFA) { /* room for another */
102 fatab[nfatab] = pfa;
103 fatab[nfatab]->use = now++;
104 nfatab++;
105 return pfa;
107 use = fatab[0]->use; /* replace least-recently used */
108 nuse = 0;
109 for (i = 1; i < nfatab; i++)
110 if (fatab[i]->use < use) {
111 use = fatab[i]->use;
112 nuse = i;
114 freefa(fatab[nuse]);
115 fatab[nuse] = pfa;
116 pfa->use = now++;
117 return pfa;
120 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
121 /* anchor = 1 for anchored matches, else 0 */
123 Node *p, *p1;
124 fa *f;
126 p = reparse(s);
127 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
128 /* put ALL STAR in front of reg. exp. */
129 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
130 /* put FINAL after reg. exp. */
132 poscnt = 0;
133 penter(p1); /* enter parent pointers and leaf indices */
134 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
135 overflo("out of space for fa");
136 f->accept = poscnt-1; /* penter has computed number of positions in re */
137 cfoll(f, p1); /* set up follow sets */
138 freetr(p1);
139 if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL)
140 overflo("out of space in makedfa");
141 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
142 overflo("out of space in makedfa");
143 *f->posns[1] = 0;
144 f->initstat = makeinit(f, anchor);
145 f->anchor = anchor;
146 f->restr = (uschar *) tostring(s);
147 return f;
150 int makeinit(fa *f, int anchor)
152 int i, k;
154 f->curstat = 2;
155 f->out[2] = 0;
156 f->reset = 0;
157 k = *(f->re[0].lfollow);
158 xfree(f->posns[2]);
159 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
160 overflo("out of space in makeinit");
161 for (i=0; i <= k; i++) {
162 (f->posns[2])[i] = (f->re[0].lfollow)[i];
164 if ((f->posns[2])[1] == f->accept)
165 f->out[2] = 1;
166 for (i=0; i < NCHARS; i++)
167 f->gototab[2][i] = 0;
168 f->curstat = cgoto(f, 2, HAT);
169 if (anchor) {
170 *f->posns[2] = k-1; /* leave out position 0 */
171 for (i=0; i < k; i++) {
172 (f->posns[0])[i] = (f->posns[2])[i];
175 f->out[0] = f->out[2];
176 if (f->curstat != 2)
177 --(*f->posns[f->curstat]);
179 return f->curstat;
182 void penter(Node *p) /* set up parent pointers and leaf indices */
184 switch (type(p)) {
185 LEAF
186 info(p) = poscnt;
187 poscnt++;
188 break;
189 UNARY
190 penter(left(p));
191 parent(left(p)) = p;
192 break;
193 case CAT:
194 case OR:
195 penter(left(p));
196 penter(right(p));
197 parent(left(p)) = p;
198 parent(right(p)) = p;
199 break;
200 default: /* can't happen */
201 FATAL("can't happen: unknown type %d in penter", type(p));
202 break;
206 void freetr(Node *p) /* free parse tree */
208 switch (type(p)) {
209 LEAF
210 xfree(p);
211 break;
212 UNARY
213 freetr(left(p));
214 xfree(p);
215 break;
216 case CAT:
217 case OR:
218 freetr(left(p));
219 freetr(right(p));
220 xfree(p);
221 break;
222 default: /* can't happen */
223 FATAL("can't happen: unknown type %d in freetr", type(p));
224 break;
228 /* in the parsing of regular expressions, metacharacters like . have */
229 /* to be seen literally; \056 is not a metacharacter. */
231 int hexstr(char **pp) /* find and eval hex string at pp, return new p */
232 { /* only pick up one 8-bit byte (2 chars) */
233 uschar *p;
234 int n = 0;
235 int i;
237 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
238 if (isdigit(*p))
239 n = 16 * n + *p - '0';
240 else if (*p >= 'a' && *p <= 'f')
241 n = 16 * n + *p - 'a' + 10;
242 else if (*p >= 'A' && *p <= 'F')
243 n = 16 * n + *p - 'A' + 10;
245 *pp = (char *) p;
246 return n;
249 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
251 int quoted(char **pp) /* pick up next thing after a \\ */
252 /* and increment *pp */
254 char *p = *pp;
255 int c;
257 if ((c = *p++) == 't')
258 c = '\t';
259 else if (c == 'n')
260 c = '\n';
261 else if (c == 'f')
262 c = '\f';
263 else if (c == 'r')
264 c = '\r';
265 else if (c == 'b')
266 c = '\b';
267 else if (c == '\\')
268 c = '\\';
269 else if (c == 'x') { /* hexadecimal goo follows */
270 c = hexstr(&p); /* this adds a null if number is invalid */
271 } else if (isoctdigit(c)) { /* \d \dd \ddd */
272 int n = c - '0';
273 if (isoctdigit(*p)) {
274 n = 8 * n + *p++ - '0';
275 if (isoctdigit(*p))
276 n = 8 * n + *p++ - '0';
278 c = n;
279 } /* else */
280 /* c = c; */
281 *pp = p;
282 return c;
285 char *cclenter(const char *argp) /* add a character class */
287 int i, c, c2;
288 uschar *p = (uschar *) argp;
289 uschar *op, *bp;
290 static uschar *buf = 0;
291 static int bufsz = 100;
293 op = p;
294 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
295 FATAL("out of space for character class [%.10s...] 1", p);
296 bp = buf;
297 for (i = 0; (c = *p++) != 0; ) {
298 if (c == '\\') {
299 c = quoted((char **) &p);
300 } else if (c == '-' && i > 0 && bp[-1] != 0) {
301 if (*p != 0) {
302 c = bp[-1];
303 c2 = *p++;
304 if (c2 == '\\')
305 c2 = quoted((char **) &p);
306 if (c > c2) { /* empty; ignore */
307 bp--;
308 i--;
309 continue;
311 while (c < c2) {
312 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
313 FATAL("out of space for character class [%.10s...] 2", p);
314 *bp++ = ++c;
315 i++;
317 continue;
320 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, 0))
321 FATAL("out of space for character class [%.10s...] 3", p);
322 *bp++ = c;
323 i++;
325 *bp = 0;
326 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
327 xfree(op);
328 return (char *) tostring((char *) buf);
331 void overflo(const char *s)
333 FATAL("regular expression too big: %.30s...", s);
336 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
338 int i;
339 int *p;
341 switch (type(v)) {
342 LEAF
343 f->re[info(v)].ltype = type(v);
344 f->re[info(v)].lval.np = right(v);
345 while (f->accept >= maxsetvec) { /* guessing here! */
346 maxsetvec *= 4;
347 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
348 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
349 if (setvec == 0 || tmpset == 0)
350 overflo("out of space in cfoll()");
352 for (i = 0; i <= f->accept; i++)
353 setvec[i] = 0;
354 setcnt = 0;
355 follow(v); /* computes setvec and setcnt */
356 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
357 overflo("out of space building follow set");
358 f->re[info(v)].lfollow = p;
359 *p = setcnt;
360 for (i = f->accept; i >= 0; i--)
361 if (setvec[i] == 1)
362 *++p = i;
363 break;
364 UNARY
365 cfoll(f,left(v));
366 break;
367 case CAT:
368 case OR:
369 cfoll(f,left(v));
370 cfoll(f,right(v));
371 break;
372 default: /* can't happen */
373 FATAL("can't happen: unknown type %d in cfoll", type(v));
377 int first(Node *p) /* collects initially active leaves of p into setvec */
378 /* returns 1 if p matches empty string */
380 int b, lp;
382 switch (type(p)) {
383 LEAF
384 lp = info(p); /* look for high-water mark of subscripts */
385 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
386 maxsetvec *= 4;
387 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
388 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
389 if (setvec == 0 || tmpset == 0)
390 overflo("out of space in first()");
392 if (setvec[lp] != 1) {
393 setvec[lp] = 1;
394 setcnt++;
396 if (type(p) == CCL && (*(char *) right(p)) == '\0')
397 return(0); /* empty CCL */
398 else return(1);
399 case PLUS:
400 if (first(left(p)) == 0) return(0);
401 return(1);
402 case STAR:
403 case QUEST:
404 first(left(p));
405 return(0);
406 case CAT:
407 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
408 return(1);
409 case OR:
410 b = first(right(p));
411 if (first(left(p)) == 0 || b == 0) return(0);
412 return(1);
414 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
415 return(-1);
418 void follow(Node *v) /* collects leaves that can follow v into setvec */
420 Node *p;
422 if (type(v) == FINAL)
423 return;
424 p = parent(v);
425 switch (type(p)) {
426 case STAR:
427 case PLUS:
428 first(v);
429 follow(p);
430 return;
432 case OR:
433 case QUEST:
434 follow(p);
435 return;
437 case CAT:
438 if (v == left(p)) { /* v is left child of p */
439 if (first(right(p)) == 0) {
440 follow(p);
441 return;
443 } else /* v is right child */
444 follow(p);
445 return;
449 int member(int c, const char *sarg) /* is c in s? */
451 uschar *s = (uschar *) sarg;
453 while (*s)
454 if (c == *s++)
455 return(1);
456 return(0);
459 int match(fa *f, const char *p0) /* shortest match ? */
461 int s, ns;
462 uschar *p = (uschar *) p0;
464 s = f->reset ? makeinit(f,0) : f->initstat;
465 if (f->out[s])
466 return(1);
467 do {
468 if ((ns = f->gototab[s][*p]) != 0)
469 s = ns;
470 else
471 s = cgoto(f, s, *p);
472 if (f->out[s])
473 return(1);
474 } while (*p++ != 0);
475 return(0);
478 int pmatch(fa *f, const char *p0) /* longest match, for sub */
480 int s, ns;
481 uschar *p = (uschar *) p0;
482 uschar *q;
483 int i, k;
485 /* s = f->reset ? makeinit(f,1) : f->initstat; */
486 if (f->reset) {
487 f->initstat = s = makeinit(f,1);
488 } else {
489 s = f->initstat;
491 patbeg = (char *) p;
492 patlen = -1;
493 do {
494 q = p;
495 do {
496 if (f->out[s]) /* final state */
497 patlen = q-p;
498 if ((ns = f->gototab[s][*q]) != 0)
499 s = ns;
500 else
501 s = cgoto(f, s, *q);
502 if (s == 1) { /* no transition */
503 if (patlen >= 0) {
504 patbeg = (char *) p;
505 return(1);
507 else
508 goto nextin; /* no match */
510 } while (*q++ != 0);
511 if (f->out[s])
512 patlen = q-p-1; /* don't count $ */
513 if (patlen >= 0) {
514 patbeg = (char *) p;
515 return(1);
517 nextin:
518 s = 2;
519 if (f->reset) {
520 for (i = 2; i <= f->curstat; i++)
521 xfree(f->posns[i]);
522 k = *f->posns[0];
523 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
524 overflo("out of space in pmatch");
525 for (i = 0; i <= k; i++)
526 (f->posns[2])[i] = (f->posns[0])[i];
527 f->initstat = f->curstat = 2;
528 f->out[2] = f->out[0];
529 for (i = 0; i < NCHARS; i++)
530 f->gototab[2][i] = 0;
532 } while (*p++ != 0);
533 return (0);
536 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
538 int s, ns;
539 uschar *p = (uschar *) p0;
540 uschar *q;
541 int i, k;
543 /* s = f->reset ? makeinit(f,1) : f->initstat; */
544 if (f->reset) {
545 f->initstat = s = makeinit(f,1);
546 } else {
547 s = f->initstat;
549 patlen = -1;
550 while (*p) {
551 q = p;
552 do {
553 if (f->out[s]) /* final state */
554 patlen = q-p;
555 if ((ns = f->gototab[s][*q]) != 0)
556 s = ns;
557 else
558 s = cgoto(f, s, *q);
559 if (s == 1) { /* no transition */
560 if (patlen > 0) {
561 patbeg = (char *) p;
562 return(1);
563 } else
564 goto nnextin; /* no nonempty match */
566 } while (*q++ != 0);
567 if (f->out[s])
568 patlen = q-p-1; /* don't count $ */
569 if (patlen > 0 ) {
570 patbeg = (char *) p;
571 return(1);
573 nnextin:
574 s = 2;
575 if (f->reset) {
576 for (i = 2; i <= f->curstat; i++)
577 xfree(f->posns[i]);
578 k = *f->posns[0];
579 if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL)
580 overflo("out of state space");
581 for (i = 0; i <= k; i++)
582 (f->posns[2])[i] = (f->posns[0])[i];
583 f->initstat = f->curstat = 2;
584 f->out[2] = f->out[0];
585 for (i = 0; i < NCHARS; i++)
586 f->gototab[2][i] = 0;
588 p++;
590 return (0);
593 Node *reparse(const char *p) /* parses regular expression pointed to by p */
594 { /* uses relex() to scan regular expression */
595 Node *np;
597 dprintf( ("reparse <%s>\n", p) );
598 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
599 rtok = relex();
600 /* GNU compatibility: an empty regexp matches anything */
601 if (rtok == '\0')
602 /* FATAL("empty regular expression"); previous */
603 return(op2(ALL, NIL, NIL));
604 np = regexp();
605 if (rtok != '\0')
606 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
607 return(np);
610 Node *regexp(void) /* top-level parse of reg expr */
612 return (alt(concat(primary())));
615 Node *primary(void)
617 Node *np;
619 switch (rtok) {
620 case CHAR:
621 np = op2(CHAR, NIL, itonp(rlxval));
622 rtok = relex();
623 return (unary(np));
624 case ALL:
625 rtok = relex();
626 return (unary(op2(ALL, NIL, NIL)));
627 case DOT:
628 rtok = relex();
629 return (unary(op2(DOT, NIL, NIL)));
630 case CCL:
631 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
632 rtok = relex();
633 return (unary(np));
634 case NCCL:
635 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
636 rtok = relex();
637 return (unary(np));
638 case '^':
639 rtok = relex();
640 return (unary(op2(CHAR, NIL, itonp(HAT))));
641 case '$':
642 rtok = relex();
643 return (unary(op2(CHAR, NIL, NIL)));
644 case '(':
645 rtok = relex();
646 if (rtok == ')') { /* special pleading for () */
647 rtok = relex();
648 return unary(op2(CCL, NIL, (Node *) tostring("")));
650 np = regexp();
651 if (rtok == ')') {
652 rtok = relex();
653 return (unary(np));
655 else
656 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
657 default:
658 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
660 return 0; /*NOTREACHED*/
663 Node *concat(Node *np)
665 switch (rtok) {
666 case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
667 return (concat(op2(CAT, np, primary())));
669 return (np);
672 Node *alt(Node *np)
674 if (rtok == OR) {
675 rtok = relex();
676 return (alt(op2(OR, np, concat(primary()))));
678 return (np);
681 Node *unary(Node *np)
683 switch (rtok) {
684 case STAR:
685 rtok = relex();
686 return (unary(op2(STAR, np, NIL)));
687 case PLUS:
688 rtok = relex();
689 return (unary(op2(PLUS, np, NIL)));
690 case QUEST:
691 rtok = relex();
692 return (unary(op2(QUEST, np, NIL)));
693 default:
694 return (np);
699 * Character class definitions conformant to the POSIX locale as
700 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
701 * and operating character sets are both ASCII (ISO646) or supersets
702 * thereof.
704 * Note that to avoid overflowing the temporary buffer used in
705 * relex(), the expanded character class (prior to range expansion)
706 * must be less than twice the size of their full name.
709 /* Because isblank doesn't show up in any of the header files on any
710 * system i use, it's defined here. if some other locale has a richer
711 * definition of "blank", define HAS_ISBLANK and provide your own
712 * version.
713 * the parentheses here are an attempt to find a path through the maze
714 * of macro definition and/or function and/or version provided. thanks
715 * to nelson beebe for the suggestion; let's see if it works everywhere.
718 #ifndef HAS_ISBLANK
720 int (isblank)(int c)
722 return c==' ' || c=='\t';
725 #endif
727 struct charclass {
728 const char *cc_name;
729 int cc_namelen;
730 int (*cc_func)(int);
731 } charclasses[] = {
732 { "alnum", 5, isalnum },
733 { "alpha", 5, isalpha },
734 { "blank", 5, isblank },
735 { "cntrl", 5, iscntrl },
736 { "digit", 5, isdigit },
737 { "graph", 5, isgraph },
738 { "lower", 5, islower },
739 { "print", 5, isprint },
740 { "punct", 5, ispunct },
741 { "space", 5, isspace },
742 { "upper", 5, isupper },
743 { "xdigit", 6, isxdigit },
744 { NULL, 0, NULL },
748 int relex(void) /* lexical analyzer for reparse */
750 int c, n;
751 int cflag;
752 static uschar *buf = 0;
753 static int bufsz = 100;
754 uschar *bp;
755 struct charclass *cc;
756 int i;
758 switch (c = *prestr++) {
759 case '|': return OR;
760 case '*': return STAR;
761 case '+': return PLUS;
762 case '?': return QUEST;
763 case '.': return DOT;
764 case '\0': prestr--; return '\0';
765 case '^':
766 case '$':
767 case '(':
768 case ')':
769 return c;
770 case '\\':
771 rlxval = quoted((char **) &prestr);
772 return CHAR;
773 default:
774 rlxval = c;
775 return CHAR;
776 case '[':
777 if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL)
778 FATAL("out of space in reg expr %.10s..", lastre);
779 bp = buf;
780 if (*prestr == '^') {
781 cflag = 1;
782 prestr++;
784 else
785 cflag = 0;
786 n = 2 * strlen((const char *) prestr)+1;
787 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, 0))
788 FATAL("out of space for reg expr %.10s...", lastre);
789 for (; ; ) {
790 if ((c = *prestr++) == '\\') {
791 *bp++ = '\\';
792 if ((c = *prestr++) == '\0')
793 FATAL("nonterminated character class %.20s...", lastre);
794 *bp++ = c;
795 /* } else if (c == '\n') { */
796 /* FATAL("newline in character class %.20s...", lastre); */
797 } else if (c == '[' && *prestr == ':') {
798 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
799 for (cc = charclasses; cc->cc_name; cc++)
800 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
801 break;
802 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
803 prestr[2 + cc->cc_namelen] == ']') {
804 prestr += cc->cc_namelen + 3;
805 for (i = 0; i < NCHARS; i++) {
806 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, 0))
807 FATAL("out of space for reg expr %.10s...", lastre);
808 if (cc->cc_func(i)) {
809 *bp++ = i;
810 n++;
813 } else
814 *bp++ = c;
815 } else if (c == '\0') {
816 FATAL("nonterminated character class %.20s", lastre);
817 } else if (bp == buf) { /* 1st char is special */
818 *bp++ = c;
819 } else if (c == ']') {
820 *bp++ = 0;
821 rlxstr = (uschar *) tostring((char *) buf);
822 if (cflag == 0)
823 return CCL;
824 else
825 return NCCL;
826 } else
827 *bp++ = c;
832 int cgoto(fa *f, int s, int c)
834 int i, j, k;
835 int *p, *q;
837 while (f->accept >= maxsetvec) { /* guessing here! */
838 maxsetvec *= 4;
839 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
840 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
841 if (setvec == 0 || tmpset == 0)
842 overflo("out of space in cgoto()");
844 for (i = 0; i <= f->accept; i++)
845 setvec[i] = 0;
846 setcnt = 0;
847 /* compute positions of gototab[s,c] into setvec */
848 p = f->posns[s];
849 for (i = 1; i <= *p; i++) {
850 if ((k = f->re[p[i]].ltype) != FINAL) {
851 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
852 || (k == DOT && c != 0 && c != HAT)
853 || (k == ALL && c != 0)
854 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
855 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
856 q = f->re[p[i]].lfollow;
857 for (j = 1; j <= *q; j++) {
858 if (q[j] >= maxsetvec) {
859 maxsetvec *= 4;
860 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
861 tmpset = (int *) realloc(setvec, maxsetvec * sizeof(int));
862 if (setvec == 0 || tmpset == 0)
863 overflo("cgoto overflow");
865 if (setvec[q[j]] == 0) {
866 setcnt++;
867 setvec[q[j]] = 1;
873 /* determine if setvec is a previous state */
874 tmpset[0] = setcnt;
875 j = 1;
876 for (i = f->accept; i >= 0; i--)
877 if (setvec[i]) {
878 tmpset[j++] = i;
880 /* tmpset == previous state? */
881 for (i = 1; i <= f->curstat; i++) {
882 p = f->posns[i];
883 if ((k = tmpset[0]) != p[0])
884 goto different;
885 for (j = 1; j <= k; j++)
886 if (tmpset[j] != p[j])
887 goto different;
888 /* setvec is state i */
889 f->gototab[s][c] = i;
890 return i;
891 different:;
894 /* add tmpset to current set of states */
895 if (f->curstat >= NSTATES-1) {
896 f->curstat = 2;
897 f->reset = 1;
898 for (i = 2; i < NSTATES; i++)
899 xfree(f->posns[i]);
900 } else
901 ++(f->curstat);
902 for (i = 0; i < NCHARS; i++)
903 f->gototab[f->curstat][i] = 0;
904 xfree(f->posns[f->curstat]);
905 if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL)
906 overflo("out of space in cgoto");
908 f->posns[f->curstat] = p;
909 f->gototab[s][c] = f->curstat;
910 for (i = 0; i <= setcnt; i++)
911 p[i] = tmpset[i];
912 if (setvec[f->accept])
913 f->out[f->curstat] = 1;
914 else
915 f->out[f->curstat] = 0;
916 return f->curstat;
920 void freefa(fa *f) /* free a finite automaton */
922 int i;
924 if (f == NULL)
925 return;
926 for (i = 0; i <= f->curstat; i++)
927 xfree(f->posns[i]);
928 for (i = 0; i <= f->accept; i++) {
929 xfree(f->re[i].lfollow);
930 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
931 xfree((f->re[i].lval.np));
933 xfree(f->restr);
934 xfree(f);