Merge commit 'b31320a79e2054c6739b5229259dbf98f3afc547' into merges
[unleashed.git] / contrib / awk / b.c
blob33ecddda94c3487dbfc2ac26dd4d0c6761e39483
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
25 /* lasciate ogne speranza, voi ch'intrate. */
27 #define DEBUG
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include "awk.h"
34 #include "ytab.h"
36 #define HAT (NCHARS+2) /* matches ^ in regular expr */
37 /* NCHARS is 2**n */
38 #define MAXLIN 22
40 #define type(v) (v)->nobj /* badly overloaded here */
41 #define info(v) (v)->ntype /* badly overloaded here */
42 #define left(v) (v)->narg[0]
43 #define right(v) (v)->narg[1]
44 #define parent(v) (v)->nnext
46 #define LEAF case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
47 #define ELEAF case EMPTYRE: /* empty string in regexp */
48 #define UNARY case STAR: case PLUS: case QUEST:
50 /* encoding in tree Nodes:
51 leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL, EMPTYRE):
52 left is index, right contains value or pointer to value
53 unary (STAR, PLUS, QUEST): left is child, right is null
54 binary (CAT, OR): left and right are children
55 parent contains pointer to parent
59 int *setvec;
60 int *tmpset;
61 int maxsetvec = 0;
63 int rtok; /* next token in current re */
64 int rlxval;
65 static uschar *rlxstr;
66 static uschar *prestr; /* current position in current re */
67 static uschar *lastre; /* origin of last re */
69 static int setcnt;
70 static int poscnt;
72 char *patbeg;
73 int patlen;
75 #define NFA 20 /* cache this many dynamic fa's */
76 fa *fatab[NFA];
77 int nfatab = 0; /* entries in fatab */
79 fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */
81 int i, use, nuse;
82 fa *pfa;
83 static int now = 1;
85 if (setvec == NULL) { /* first time through any RE */
86 maxsetvec = MAXLIN;
87 setvec = (int *) malloc(maxsetvec * sizeof(int));
88 tmpset = (int *) malloc(maxsetvec * sizeof(int));
89 if (setvec == NULL || tmpset == NULL)
90 overflo("out of space initializing makedfa");
93 if (compile_time) /* a constant for sure */
94 return mkdfa(s, anchor);
95 for (i = 0; i < nfatab; i++) /* is it there already? */
96 if (fatab[i]->anchor == anchor
97 && strcmp((const char *) fatab[i]->restr, s) == 0) {
98 fatab[i]->use = now++;
99 return fatab[i];
101 pfa = mkdfa(s, anchor);
102 if (nfatab < NFA) { /* room for another */
103 fatab[nfatab] = pfa;
104 fatab[nfatab]->use = now++;
105 nfatab++;
106 return pfa;
108 use = fatab[0]->use; /* replace least-recently used */
109 nuse = 0;
110 for (i = 1; i < nfatab; i++)
111 if (fatab[i]->use < use) {
112 use = fatab[i]->use;
113 nuse = i;
115 freefa(fatab[nuse]);
116 fatab[nuse] = pfa;
117 pfa->use = now++;
118 return pfa;
121 fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */
122 /* anchor = 1 for anchored matches, else 0 */
124 Node *p, *p1;
125 fa *f;
127 p = reparse(s);
128 p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
129 /* put ALL STAR in front of reg. exp. */
130 p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
131 /* put FINAL after reg. exp. */
133 poscnt = 0;
134 penter(p1); /* enter parent pointers and leaf indices */
135 if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL)
136 overflo("out of space for fa");
137 f->accept = poscnt-1; /* penter has computed number of positions in re */
138 cfoll(f, p1); /* set up follow sets */
139 freetr(p1);
140 if ((f->posns[0] = (int *) calloc(*(f->re[0].lfollow), sizeof(int))) == NULL)
141 overflo("out of space in makedfa");
142 if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL)
143 overflo("out of space in makedfa");
144 *f->posns[1] = 0;
145 f->initstat = makeinit(f, anchor);
146 f->anchor = anchor;
147 f->restr = (uschar *) tostring(s);
148 return f;
151 int makeinit(fa *f, int anchor)
153 int i, k;
155 f->curstat = 2;
156 f->out[2] = 0;
157 f->reset = 0;
158 k = *(f->re[0].lfollow);
159 xfree(f->posns[2]);
160 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
161 overflo("out of space in makeinit");
162 for (i=0; i <= k; i++) {
163 (f->posns[2])[i] = (f->re[0].lfollow)[i];
165 if ((f->posns[2])[1] == f->accept)
166 f->out[2] = 1;
167 for (i=0; i < NCHARS; i++)
168 f->gototab[2][i] = 0;
169 f->curstat = cgoto(f, 2, HAT);
170 if (anchor) {
171 *f->posns[2] = k-1; /* leave out position 0 */
172 for (i=0; i < k; i++) {
173 (f->posns[0])[i] = (f->posns[2])[i];
176 f->out[0] = f->out[2];
177 if (f->curstat != 2)
178 --(*f->posns[f->curstat]);
180 return f->curstat;
183 void penter(Node *p) /* set up parent pointers and leaf indices */
185 switch (type(p)) {
186 ELEAF
187 LEAF
188 info(p) = poscnt;
189 poscnt++;
190 break;
191 UNARY
192 penter(left(p));
193 parent(left(p)) = p;
194 break;
195 case CAT:
196 case OR:
197 penter(left(p));
198 penter(right(p));
199 parent(left(p)) = p;
200 parent(right(p)) = p;
201 break;
202 default: /* can't happen */
203 FATAL("can't happen: unknown type %d in penter", type(p));
204 break;
208 void freetr(Node *p) /* free parse tree */
210 switch (type(p)) {
211 ELEAF
212 LEAF
213 xfree(p);
214 break;
215 UNARY
216 freetr(left(p));
217 xfree(p);
218 break;
219 case CAT:
220 case OR:
221 freetr(left(p));
222 freetr(right(p));
223 xfree(p);
224 break;
225 default: /* can't happen */
226 FATAL("can't happen: unknown type %d in freetr", type(p));
227 break;
231 /* in the parsing of regular expressions, metacharacters like . have */
232 /* to be seen literally; \056 is not a metacharacter. */
234 int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */
235 { /* only pick up one 8-bit byte (2 chars) */
236 uschar *p;
237 int n = 0;
238 int i;
240 for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) {
241 if (isdigit(*p))
242 n = 16 * n + *p - '0';
243 else if (*p >= 'a' && *p <= 'f')
244 n = 16 * n + *p - 'a' + 10;
245 else if (*p >= 'A' && *p <= 'F')
246 n = 16 * n + *p - 'A' + 10;
248 *pp = (uschar *) p;
249 return n;
252 #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */
254 int quoted(uschar **pp) /* pick up next thing after a \\ */
255 /* and increment *pp */
257 uschar *p = *pp;
258 int c;
260 if ((c = *p++) == 't')
261 c = '\t';
262 else if (c == 'n')
263 c = '\n';
264 else if (c == 'f')
265 c = '\f';
266 else if (c == 'r')
267 c = '\r';
268 else if (c == 'b')
269 c = '\b';
270 else if (c == '\\')
271 c = '\\';
272 else if (c == 'x') { /* hexadecimal goo follows */
273 c = hexstr(&p); /* this adds a null if number is invalid */
274 } else if (isoctdigit(c)) { /* \d \dd \ddd */
275 int n = c - '0';
276 if (isoctdigit(*p)) {
277 n = 8 * n + *p++ - '0';
278 if (isoctdigit(*p))
279 n = 8 * n + *p++ - '0';
281 c = n;
282 } /* else */
283 /* c = c; */
284 *pp = p;
285 return c;
288 static int collate_range_cmp(int a, int b)
290 static char s[2][2];
292 if ((uschar)a == (uschar)b)
293 return 0;
294 s[0][0] = a;
295 s[1][0] = b;
296 return (strcoll(s[0], s[1]));
299 char *cclenter(const char *argp) /* add a character class */
301 int i, c, c2;
302 int j;
303 uschar *p = (uschar *) argp;
304 uschar *op, *bp;
305 static uschar *buf = NULL;
306 static int bufsz = 100;
308 op = p;
309 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
310 FATAL("out of space for character class [%.10s...] 1", p);
311 bp = buf;
312 for (i = 0; (c = *p++) != 0; ) {
313 if (c == '\\') {
314 c = quoted(&p);
315 } else if (c == '-' && i > 0 && bp[-1] != 0) {
316 if (*p != 0) {
317 c = bp[-1];
318 c2 = *p++;
319 if (c2 == '\\')
320 c2 = quoted(&p);
321 if (collate_range_cmp(c, c2) > 0) {
322 bp--;
323 i--;
324 continue;
326 for (j = 0; j < NCHARS; j++) {
327 if ((collate_range_cmp(c, j) > 0) ||
328 collate_range_cmp(j, c2) > 0)
329 continue;
330 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
331 FATAL("out of space for character class [%.10s...] 2", p);
332 *bp++ = j;
333 i++;
335 continue;
338 if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
339 FATAL("out of space for character class [%.10s...] 3", p);
340 *bp++ = c;
341 i++;
343 *bp = 0;
344 dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) );
345 xfree(op);
346 return (char *) tostring((char *) buf);
349 void overflo(const char *s)
351 FATAL("regular expression too big: %.30s...", s);
354 void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */
356 int i;
357 int *p;
359 switch (type(v)) {
360 ELEAF
361 LEAF
362 f->re[info(v)].ltype = type(v);
363 f->re[info(v)].lval.np = right(v);
364 while (f->accept >= maxsetvec) { /* guessing here! */
365 maxsetvec *= 4;
366 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
367 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
368 if (setvec == NULL || tmpset == NULL)
369 overflo("out of space in cfoll()");
371 for (i = 0; i <= f->accept; i++)
372 setvec[i] = 0;
373 setcnt = 0;
374 follow(v); /* computes setvec and setcnt */
375 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
376 overflo("out of space building follow set");
377 f->re[info(v)].lfollow = p;
378 *p = setcnt;
379 for (i = f->accept; i >= 0; i--)
380 if (setvec[i] == 1)
381 *++p = i;
382 break;
383 UNARY
384 cfoll(f,left(v));
385 break;
386 case CAT:
387 case OR:
388 cfoll(f,left(v));
389 cfoll(f,right(v));
390 break;
391 default: /* can't happen */
392 FATAL("can't happen: unknown type %d in cfoll", type(v));
396 int first(Node *p) /* collects initially active leaves of p into setvec */
397 /* returns 0 if p matches empty string */
399 int b, lp;
401 switch (type(p)) {
402 ELEAF
403 LEAF
404 lp = info(p); /* look for high-water mark of subscripts */
405 while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */
406 maxsetvec *= 4;
407 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
408 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
409 if (setvec == NULL || tmpset == NULL)
410 overflo("out of space in first()");
412 if (type(p) == EMPTYRE) {
413 setvec[lp] = 0;
414 return(0);
416 if (setvec[lp] != 1) {
417 setvec[lp] = 1;
418 setcnt++;
420 if (type(p) == CCL && (*(char *) right(p)) == '\0')
421 return(0); /* empty CCL */
422 else return(1);
423 case PLUS:
424 if (first(left(p)) == 0) return(0);
425 return(1);
426 case STAR:
427 case QUEST:
428 first(left(p));
429 return(0);
430 case CAT:
431 if (first(left(p)) == 0 && first(right(p)) == 0) return(0);
432 return(1);
433 case OR:
434 b = first(right(p));
435 if (first(left(p)) == 0 || b == 0) return(0);
436 return(1);
438 FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */
439 return(-1);
442 void follow(Node *v) /* collects leaves that can follow v into setvec */
444 Node *p;
446 if (type(v) == FINAL)
447 return;
448 p = parent(v);
449 switch (type(p)) {
450 case STAR:
451 case PLUS:
452 first(v);
453 follow(p);
454 return;
456 case OR:
457 case QUEST:
458 follow(p);
459 return;
461 case CAT:
462 if (v == left(p)) { /* v is left child of p */
463 if (first(right(p)) == 0) {
464 follow(p);
465 return;
467 } else /* v is right child */
468 follow(p);
469 return;
473 int member(int c, const char *sarg) /* is c in s? */
475 uschar *s = (uschar *) sarg;
477 while (*s)
478 if (c == *s++)
479 return(1);
480 return(0);
483 int match(fa *f, const char *p0) /* shortest match ? */
485 int s, ns;
486 uschar *p = (uschar *) p0;
488 s = f->reset ? makeinit(f,0) : f->initstat;
489 if (f->out[s])
490 return(1);
491 do {
492 /* assert(*p < NCHARS); */
493 if ((ns = f->gototab[s][*p]) != 0)
494 s = ns;
495 else
496 s = cgoto(f, s, *p);
497 if (f->out[s])
498 return(1);
499 } while (*p++ != 0);
500 return(0);
503 int pmatch(fa *f, const char *p0) /* longest match, for sub */
505 int s, ns;
506 uschar *p = (uschar *) p0;
507 uschar *q;
508 int i, k;
510 /* s = f->reset ? makeinit(f,1) : f->initstat; */
511 if (f->reset) {
512 f->initstat = s = makeinit(f,1);
513 } else {
514 s = f->initstat;
516 patbeg = (char *) p;
517 patlen = -1;
518 do {
519 q = p;
520 do {
521 if (f->out[s]) /* final state */
522 patlen = q-p;
523 /* assert(*q < NCHARS); */
524 if ((ns = f->gototab[s][*q]) != 0)
525 s = ns;
526 else
527 s = cgoto(f, s, *q);
528 if (s == 1) { /* no transition */
529 if (patlen >= 0) {
530 patbeg = (char *) p;
531 return(1);
533 else
534 goto nextin; /* no match */
536 } while (*q++ != 0);
537 if (f->out[s])
538 patlen = q-p-1; /* don't count $ */
539 if (patlen >= 0) {
540 patbeg = (char *) p;
541 return(1);
543 nextin:
544 s = 2;
545 if (f->reset) {
546 for (i = 2; i <= f->curstat; i++)
547 xfree(f->posns[i]);
548 k = *f->posns[0];
549 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
550 overflo("out of space in pmatch");
551 for (i = 0; i <= k; i++)
552 (f->posns[2])[i] = (f->posns[0])[i];
553 f->initstat = f->curstat = 2;
554 f->out[2] = f->out[0];
555 for (i = 0; i < NCHARS; i++)
556 f->gototab[2][i] = 0;
558 } while (*p++ != 0);
559 return (0);
562 int nematch(fa *f, const char *p0) /* non-empty match, for sub */
564 int s, ns;
565 uschar *p = (uschar *) p0;
566 uschar *q;
567 int i, k;
569 /* s = f->reset ? makeinit(f,1) : f->initstat; */
570 if (f->reset) {
571 f->initstat = s = makeinit(f,1);
572 } else {
573 s = f->initstat;
575 patlen = -1;
576 while (*p) {
577 q = p;
578 do {
579 if (f->out[s]) /* final state */
580 patlen = q-p;
581 /* assert(*q < NCHARS); */
582 if ((ns = f->gototab[s][*q]) != 0)
583 s = ns;
584 else
585 s = cgoto(f, s, *q);
586 if (s == 1) { /* no transition */
587 if (patlen > 0) {
588 patbeg = (char *) p;
589 return(1);
590 } else
591 goto nnextin; /* no nonempty match */
593 } while (*q++ != 0);
594 if (f->out[s])
595 patlen = q-p-1; /* don't count $ */
596 if (patlen > 0 ) {
597 patbeg = (char *) p;
598 return(1);
600 nnextin:
601 s = 2;
602 if (f->reset) {
603 for (i = 2; i <= f->curstat; i++)
604 xfree(f->posns[i]);
605 k = *f->posns[0];
606 if ((f->posns[2] = (int *) calloc(k+1, sizeof(int))) == NULL)
607 overflo("out of state space");
608 for (i = 0; i <= k; i++)
609 (f->posns[2])[i] = (f->posns[0])[i];
610 f->initstat = f->curstat = 2;
611 f->out[2] = f->out[0];
612 for (i = 0; i < NCHARS; i++)
613 f->gototab[2][i] = 0;
615 p++;
617 return (0);
620 Node *reparse(const char *p) /* parses regular expression pointed to by p */
621 { /* uses relex() to scan regular expression */
622 Node *np;
624 dprintf( ("reparse <%s>\n", p) );
625 lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */
626 rtok = relex();
627 /* GNU compatibility: an empty regexp matches anything */
628 if (rtok == '\0') {
629 /* FATAL("empty regular expression"); previous */
630 return(op2(EMPTYRE, NIL, NIL));
632 np = regexp();
633 if (rtok != '\0')
634 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
635 return(np);
638 Node *regexp(void) /* top-level parse of reg expr */
640 return (alt(concat(primary())));
643 Node *primary(void)
645 Node *np;
647 switch (rtok) {
648 case CHAR:
649 np = op2(CHAR, NIL, itonp(rlxval));
650 rtok = relex();
651 return (unary(np));
652 case ALL:
653 rtok = relex();
654 return (unary(op2(ALL, NIL, NIL)));
655 case EMPTYRE:
656 rtok = relex();
657 return (unary(op2(ALL, NIL, NIL)));
658 case DOT:
659 rtok = relex();
660 return (unary(op2(DOT, NIL, NIL)));
661 case CCL:
662 np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr));
663 rtok = relex();
664 return (unary(np));
665 case NCCL:
666 np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr));
667 rtok = relex();
668 return (unary(np));
669 case '^':
670 rtok = relex();
671 return (unary(op2(CHAR, NIL, itonp(HAT))));
672 case '$':
673 rtok = relex();
674 return (unary(op2(CHAR, NIL, NIL)));
675 case '(':
676 rtok = relex();
677 if (rtok == ')') { /* special pleading for () */
678 rtok = relex();
679 return unary(op2(CCL, NIL, (Node *) tostring("")));
681 np = regexp();
682 if (rtok == ')') {
683 rtok = relex();
684 return (unary(np));
686 else
687 FATAL("syntax error in regular expression %s at %s", lastre, prestr);
688 default:
689 FATAL("illegal primary in regular expression %s at %s", lastre, prestr);
691 return 0; /*NOTREACHED*/
694 Node *concat(Node *np)
696 switch (rtok) {
697 case CHAR: case DOT: case ALL: case EMPTYRE: case CCL: case NCCL: case '$': case '(':
698 return (concat(op2(CAT, np, primary())));
700 return (np);
703 Node *alt(Node *np)
705 if (rtok == OR) {
706 rtok = relex();
707 return (alt(op2(OR, np, concat(primary()))));
709 return (np);
712 Node *unary(Node *np)
714 switch (rtok) {
715 case STAR:
716 rtok = relex();
717 return (unary(op2(STAR, np, NIL)));
718 case PLUS:
719 rtok = relex();
720 return (unary(op2(PLUS, np, NIL)));
721 case QUEST:
722 rtok = relex();
723 return (unary(op2(QUEST, np, NIL)));
724 default:
725 return (np);
730 * Character class definitions conformant to the POSIX locale as
731 * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
732 * and operating character sets are both ASCII (ISO646) or supersets
733 * thereof.
735 * Note that to avoid overflowing the temporary buffer used in
736 * relex(), the expanded character class (prior to range expansion)
737 * must be less than twice the size of their full name.
740 /* Because isblank doesn't show up in any of the header files on any
741 * system i use, it's defined here. if some other locale has a richer
742 * definition of "blank", define HAS_ISBLANK and provide your own
743 * version.
744 * the parentheses here are an attempt to find a path through the maze
745 * of macro definition and/or function and/or version provided. thanks
746 * to nelson beebe for the suggestion; let's see if it works everywhere.
749 /* #define HAS_ISBLANK */
750 #ifndef HAS_ISBLANK
752 int (xisblank)(int c)
754 return c==' ' || c=='\t';
757 #endif
759 struct charclass {
760 const char *cc_name;
761 int cc_namelen;
762 int (*cc_func)(int);
763 } charclasses[] = {
764 { "alnum", 5, isalnum },
765 { "alpha", 5, isalpha },
766 #ifndef HAS_ISBLANK
767 { "blank", 5, isspace }, /* was isblank */
768 #else
769 { "blank", 5, isblank },
770 #endif
771 { "cntrl", 5, iscntrl },
772 { "digit", 5, isdigit },
773 { "graph", 5, isgraph },
774 { "lower", 5, islower },
775 { "print", 5, isprint },
776 { "punct", 5, ispunct },
777 { "space", 5, isspace },
778 { "upper", 5, isupper },
779 { "xdigit", 6, isxdigit },
780 { NULL, 0, NULL },
784 int relex(void) /* lexical analyzer for reparse */
786 int c, n;
787 int cflag;
788 static uschar *buf = NULL;
789 static int bufsz = 100;
790 uschar *bp;
791 struct charclass *cc;
792 int i;
794 switch (c = *prestr++) {
795 case '|': return OR;
796 case '*': return STAR;
797 case '+': return PLUS;
798 case '?': return QUEST;
799 case '.': return DOT;
800 case '\0': prestr--; return '\0';
801 case '^':
802 case '$':
803 case '(':
804 case ')':
805 return c;
806 case '\\':
807 rlxval = quoted(&prestr);
808 return CHAR;
809 default:
810 rlxval = c;
811 return CHAR;
812 case '[':
813 if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
814 FATAL("out of space in reg expr %.10s..", lastre);
815 bp = buf;
816 if (*prestr == '^') {
817 cflag = 1;
818 prestr++;
820 else
821 cflag = 0;
822 n = 2 * strlen((const char *) prestr)+1;
823 if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
824 FATAL("out of space for reg expr %.10s...", lastre);
825 for (; ; ) {
826 if ((c = *prestr++) == '\\') {
827 *bp++ = '\\';
828 if ((c = *prestr++) == '\0')
829 FATAL("nonterminated character class %.20s...", lastre);
830 *bp++ = c;
831 /* } else if (c == '\n') { */
832 /* FATAL("newline in character class %.20s...", lastre); */
833 } else if (c == '[' && *prestr == ':') {
834 /* POSIX char class names, Dag-Erling Smorgrav, des@ofug.org */
835 for (cc = charclasses; cc->cc_name; cc++)
836 if (strncmp((const char *) prestr + 1, (const char *) cc->cc_name, cc->cc_namelen) == 0)
837 break;
838 if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
839 prestr[2 + cc->cc_namelen] == ']') {
840 prestr += cc->cc_namelen + 3;
841 for (i = 1; i < NCHARS; i++) {
842 if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2"))
843 FATAL("out of space for reg expr %.10s...", lastre);
844 if (cc->cc_func(i)) {
845 *bp++ = i;
846 n++;
849 } else
850 *bp++ = c;
851 } else if (c == '\0') {
852 FATAL("nonterminated character class %.20s", lastre);
853 } else if (bp == buf) { /* 1st char is special */
854 *bp++ = c;
855 } else if (c == ']') {
856 *bp++ = 0;
857 rlxstr = (uschar *) tostring((char *) buf);
858 if (cflag == 0)
859 return CCL;
860 else
861 return NCCL;
862 } else
863 *bp++ = c;
868 int cgoto(fa *f, int s, int c)
870 int i, j, k;
871 int *p, *q;
873 assert(c == HAT || c < NCHARS);
874 while (f->accept >= maxsetvec) { /* guessing here! */
875 maxsetvec *= 4;
876 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
877 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
878 if (setvec == NULL || tmpset == NULL)
879 overflo("out of space in cgoto()");
881 for (i = 0; i <= f->accept; i++)
882 setvec[i] = 0;
883 setcnt = 0;
884 /* compute positions of gototab[s,c] into setvec */
885 p = f->posns[s];
886 for (i = 1; i <= *p; i++) {
887 if ((k = f->re[p[i]].ltype) != FINAL) {
888 if ((k == CHAR && c == ptoi(f->re[p[i]].lval.np))
889 || (k == DOT && c != 0 && c != HAT)
890 || (k == ALL && c != 0)
891 || (k == EMPTYRE && c != 0)
892 || (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
893 || (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
894 q = f->re[p[i]].lfollow;
895 for (j = 1; j <= *q; j++) {
896 if (q[j] >= maxsetvec) {
897 maxsetvec *= 4;
898 setvec = (int *) realloc(setvec, maxsetvec * sizeof(int));
899 tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int));
900 if (setvec == NULL || tmpset == NULL)
901 overflo("cgoto overflow");
903 if (setvec[q[j]] == 0) {
904 setcnt++;
905 setvec[q[j]] = 1;
911 /* determine if setvec is a previous state */
912 tmpset[0] = setcnt;
913 j = 1;
914 for (i = f->accept; i >= 0; i--)
915 if (setvec[i]) {
916 tmpset[j++] = i;
918 /* tmpset == previous state? */
919 for (i = 1; i <= f->curstat; i++) {
920 p = f->posns[i];
921 if ((k = tmpset[0]) != p[0])
922 goto different;
923 for (j = 1; j <= k; j++)
924 if (tmpset[j] != p[j])
925 goto different;
926 /* setvec is state i */
927 f->gototab[s][c] = i;
928 return i;
929 different:;
932 /* add tmpset to current set of states */
933 if (f->curstat >= NSTATES-1) {
934 f->curstat = 2;
935 f->reset = 1;
936 for (i = 2; i < NSTATES; i++)
937 xfree(f->posns[i]);
938 } else
939 ++(f->curstat);
940 for (i = 0; i < NCHARS; i++)
941 f->gototab[f->curstat][i] = 0;
942 xfree(f->posns[f->curstat]);
943 if ((p = (int *) calloc(setcnt+1, sizeof(int))) == NULL)
944 overflo("out of space in cgoto");
946 f->posns[f->curstat] = p;
947 f->gototab[s][c] = f->curstat;
948 for (i = 0; i <= setcnt; i++)
949 p[i] = tmpset[i];
950 if (setvec[f->accept])
951 f->out[f->curstat] = 1;
952 else
953 f->out[f->curstat] = 0;
954 return f->curstat;
958 void freefa(fa *f) /* free a finite automaton */
960 int i;
962 if (f == NULL)
963 return;
964 for (i = 0; i <= f->curstat; i++)
965 xfree(f->posns[i]);
966 for (i = 0; i <= f->accept; i++) {
967 xfree(f->re[i].lfollow);
968 if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL)
969 xfree((f->re[i].lval.np));
971 xfree(f->restr);
972 xfree(f);