8990 /opt/onbld/gk is useless
[unleashed.git] / usr / src / tools / cscope-fast / cgrep.c
blob78757893644a9e3bdd64e84b7a2ecbf64682122c
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
22 /* Copyright (c) 1990 AT&T */
23 /* All Rights Reserved */
27 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
31 #pragma ident "%Z%%M% %I% %E% SMI"
34 * cscope - interactive C symbol or text cross-reference
36 * text searching functions
39 #include <fcntl.h>
40 #include <setjmp.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <memory.h>
44 #include <ctype.h>
45 #include <unistd.h>
46 #include "library.h"
48 typedef enum {
49 NO = 0,
50 YES = 1
51 } BOOL;
53 typedef struct re_bm {
54 int delta0[256];
55 int *delta2;
56 uchar_t *cmap;
57 uchar_t *bmpat;
58 int patlen;
59 } re_bm;
61 typedef struct Link {
62 uchar_t lit;
63 struct Node *node;
64 struct Link *next;
65 } Link;
67 typedef struct Node {
68 short out;
69 short d;
70 short shift1;
71 short shift2;
72 long id;
73 struct Node **tab;
74 Link *alts;
75 struct Node *fail;
76 } Node;
78 typedef struct re_cw {
79 int maxdepth, mindepth;
80 long nodeid;
81 int step[256];
82 uchar_t *cmap;
83 struct Node *root;
84 } re_cw;
86 typedef enum {
87 /* lit expression types */
88 Literal, Dot, Charclass, EOP,
90 /* non-lit expression types */
91 Cat, Alternate, Star, Plus, Quest,
93 /* not really expression types, just helping */
94 Lpar, Rpar, Backslash
96 } Exprtype;
98 typedef int ID;
100 typedef struct Expr {
101 Exprtype type; /* Type of expression (in grammar) */
102 ID id; /* unique ID of lit expression */
103 int lit; /* Literal character or tag */
104 int flen; /* Number of following lit expressions */
105 ID *follow; /* Array of IDs of following lit expressions */
106 struct Expr *l; /* pointer to Left child (or ccl count) */
107 struct Expr *r; /* pointer to Right child (or ccl mask) */
108 struct Expr *parent; /* pointer to Parent */
109 } Expr;
111 typedef struct State {
112 struct State *tab[256];
113 int cnt; /* number of matched chars on full match, -1 otherwise */
114 int npos; /* number of IDs in position set for this state. */
115 int pos; /* index into posbase for beginning of IDs */
116 } State;
118 typedef struct FID {
119 ID id; /* Lit Expression id */
120 int fcount; /* Number of Lit exp matches before this one. */
121 } FID;
123 typedef struct Positionset {
124 int count; /* Number of lit exps in position set */
125 ID last; /* ID of last lit exp in position set */
126 FID *base; /* array of MAXID FIDS */
127 /* 0 means not in position set */
128 /* -1 means first in position set */
129 /* n (>0) is ID of prev member of position set. */
130 } Positionset;
132 typedef struct re_re {
133 FID *posbase; /* Array of IDs from all states */
134 int nposalloc; /* Allocated size of posbase */
135 int posnext; /* Index into free space in posbase */
136 int posreset; /* Index into end of IDs for initial state in posbase */
137 int maxid; /* Number of (also maximum ID of) lit expressions */
138 Expr *root; /* Pointer to root (EOP) expression */
139 Expr **ptr; /* Pointer to array of ptrs to lit expressions. */
140 uchar_t *cmap; /* Character mapping array */
141 Positionset firstpos;
142 Positionset tmp;
143 int nstates; /* Number of current states defined */
144 int statelim; /* Limit on number of states before flushing */
145 State *states; /* Array of states */
146 State istate; /* Initial state */
147 } re_re;
149 typedef struct {
150 uchar_t *cmap;
151 re_re *re_ptr;
152 re_bm *bm_ptr;
153 re_cw *cw_ptr;
154 BOOL fullmatch;
155 BOOL (*procfn)();
156 BOOL (*succfn)();
157 uchar_t *loc1;
158 uchar_t *loc2;
159 uchar_t *expression;
160 } PATTERN;
162 typedef enum {
163 BEGIN, /* File is not yet in buffer at all */
164 MORE, /* File is partly in buffer */
165 NO_MORE /* File has been completely read into buffer */
166 } FILE_STAT;
168 typedef struct {
169 uchar_t *prntbuf; /* current line of input from data file */
170 uchar_t *newline; /* end of line (real or sentinel \n) */
171 long ln; /* line number */
172 } LINE;
174 #define NL '\n'
176 #define CCL_SIZ 32
177 #define CCL_SET(a, c) ((a)[(c) >> 3] |= bittab[(c) & 07])
178 #define CCL_CLR(a, c) ((a)[(c) >> 3] &= ~bittab[(c) & 07])
179 #define CCL_CHK(a, c) ((a)[(c) >> 3] & bittab[(c) & 07])
181 #define ESIZE (BUFSIZ)
182 #define MAXBUFSIZE (64*BUFSIZ)
184 #define MAXMALLOCS 1024
185 #define MAXLIT 256 /* is plenty big enough */
186 #define LARGE MAXBUFSIZE+ESIZE+2
188 #define CLEAR(r, rps) (void) memset((char *)(rps)->base, 0, \
189 (int)((r)->maxid * sizeof (FID))), \
190 (rps)->count = 0, (rps)->last = -1
191 #define SET(rps, n, cnt) { \
192 if ((rps)->base[n].id == 0) {\
193 (rps)->count++;\
194 (rps)->base[n].fcount = (cnt);\
195 (rps)->base[n].id = (rps)->last;\
196 (rps)->last = (n);\
197 } else if ((cnt) > (rps)->base[n].fcount) {\
198 (rps)->base[n].fcount = (cnt);\
201 #define START { _p = tmp; }
202 #define ADDL(c) { if (_p >= &tmp[MAXLIT]) _p--; *_p++ = c; }
203 #define FINISH { ADDL(0) if ((_p-tmp) > bestlen) \
204 (void) memmove(best, tmp, bestlen = _p-tmp); }
207 #define NEW(N) (froot?(t = froot, froot = froot->next, t->next = NULL, \
208 t->node = N, t): newlink(0, N))
209 #define ADD(N) if (qtail) qtail = qtail->next = NEW(N); \
210 else qtail = qhead = NEW(N)
211 #define DEL() { Link *_l = qhead; if ((qhead = qhead->next) == NULL) \
212 qtail = NULL; _l->next = froot; froot = _l; }
214 static uchar_t *buffer;
215 static uchar_t *bufend;
216 static FILE *output;
217 static char *format;
218 static char *file;
219 static int file_desc;
220 static FILE_STAT file_stat;
221 static PATTERN match_pattern;
222 static uchar_t char_map[2][256];
223 static int iflag;
224 static Exprtype toktype;
225 static int toklit, parno, maxid;
226 static uchar_t tmp[MAXLIT], best[MAXLIT];
227 static uchar_t *_p;
228 static int bestlen;
229 static Node *next_node;
230 static Link *froot, *next_link;
231 static jmp_buf env;
233 static int nmalloc;
234 static uchar_t *mallocs[MAXMALLOCS];
236 static uchar_t bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
238 #ifdef DEBUG
239 #define TRACE(n) (n < debug)
240 #define EPRINTSIZE 50000
241 static void spr(int c, int *p, uchar_t *buf);
242 void epr(Expr *e, uchar_t *res);
243 static int debug = 12;
244 #endif
246 static void init_file(LINE *cur_ptr);
247 static void get_line(LINE *cur_ptr, uchar_t *s);
248 static void get_ncount(LINE *cur_ptr, uchar_t *s);
249 static int execute(void);
250 static State *startstate(re_re *r);
251 static State *stateof(re_re *r, Positionset *ps);
252 static State *nextstate(re_re *r, State *s, int a);
253 static State *addstate(re_re *r, Positionset *ps, int cnt);
254 static BOOL match(Expr *e, int a);
255 static BOOL first_lit(Positionset *fpos, Expr *e);
256 static void eptr(re_re *r, Expr *e);
257 static void efollow(re_re *r, Positionset *fpos, Expr *e);
258 static void follow(Positionset *fpos, Expr *e);
259 static void followstate(re_re *r, State *s, int a, Positionset *fpos);
260 static Expr *eall(re_re *r, PATTERN *pat);
261 static Expr *d0(re_re *r, PATTERN *pat);
262 static Expr *d1(re_re *r, PATTERN *pat);
263 static Expr *d2(re_re *r, PATTERN *pat);
264 static Expr *d3(re_re *r, PATTERN *pat);
265 static Expr *newexpr(Exprtype t, int lit, Expr *left, Expr *right);
266 static void lex(re_re *r, PATTERN *pat);
267 static int re_lit(PATTERN *pat, uchar_t **b, uchar_t **e);
268 static void traverse(PATTERN *pat, Expr *e);
269 static int ccl(PATTERN *pat, uchar_t *tab);
270 static BOOL altlist(), word();
271 static BOOL altlist(Expr *e, uchar_t *buf, re_cw *pat);
272 static Node *newnode(re_cw *c, int d);
273 static Link *newlink(uchar_t lit, Node *n);
274 static void fail(Node *root);
275 static void zeroroot(Node *root, Node *n);
276 static void shift(re_cw *c);
277 static void shifttab(Node *n);
278 static void shiftprop(re_cw *c, Node *n);
279 static void delta_2(re_bm *b);
280 static int getstate(re_re *r, Positionset *ps);
281 static void savestate(re_re *r);
282 static void stateinit(re_re *r);
283 static re_bm *re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap);
284 static re_cw *re_cwinit(uchar_t *cmap);
285 static re_cw *re_recw(re_re *r, uchar_t *map);
286 static re_re *egprep(PATTERN *pat);
287 static void re_cwadd(re_cw *c, uchar_t *s, uchar_t *e);
288 static void re_cwcomp(re_cw *c);
289 static void eginit(re_re *r);
290 static BOOL re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb,
291 uchar_t **me);
292 static BOOL re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb,
293 uchar_t **me);
294 static BOOL re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb,
295 uchar_t **me);
296 static uchar_t *egmalloc(size_t n);
297 static void fgetfile(LINE *cur_ptr);
298 static void dogre(PATTERN *pat);
299 static BOOL pattern_match(PATTERN *pat, LINE *lptr);
300 static BOOL fixloc(uchar_t **mb, uchar_t **me);
301 static BOOL grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me);
302 static void err(char *s);
304 static char *message;
306 void
307 egrepcaseless(int i)
309 iflag = i; /* simulate "egrep -i" */
312 char *
313 egrepinit(char *expression)
315 static int firsttime = 1;
316 int i;
318 if (firsttime) {
319 firsttime = 0;
320 for (i = 0; i < 256; i++) {
321 char_map[0][i] = (uchar_t)i;
322 char_map[1][i] = tolower(i);
325 for (i = 0; i < nmalloc; i ++)
326 free(mallocs[i]);
327 nmalloc = 0;
328 message = NULL;
330 match_pattern.expression = (uchar_t *)expression;
331 match_pattern.cmap = char_map[iflag];
332 if (setjmp(env) == 0) {
333 dogre(&match_pattern);
334 #ifdef DEBUG
336 PATTERN *p = match_pattern;
337 if (p->procfn == re_bmexec)
338 if (!p->fullmatch)
339 if (p->succfn == re_reexec)
340 printf("PARTIAL BOYER_MOORE\n");
341 else
342 printf("PARTIAL B_M with GREP\n");
343 else
344 printf("FULL BOYER_MOORE\n");
345 else if (p->procfn == re_cwexec)
346 printf("C_W\n");
347 else
348 printf("GENERAL\n");
350 #endif
352 return (message);
355 static void
356 dogre(PATTERN *pat)
358 uchar_t *lb, *le;
360 #ifdef DEBUG
361 printf("PATTERN %s\n", pat->expression);
362 #endif
363 pat->re_ptr = egprep(pat);
364 bestlen = re_lit(pat, &lb, &le);
366 if (bestlen && pat->fullmatch) { /* Full Boyer Moore */
367 #ifdef DEBUG
368 printf("BESTLEN %d\n", bestlen);
370 uchar_t *p;
371 for (p = lb; p < le; p++) printf("%c", *p);
372 printf("\n");
374 #endif
375 pat->bm_ptr = re_bmcomp(lb, le, pat->cmap);
376 pat->procfn = re_bmexec;
377 return;
379 if (bestlen > 1) {
380 /* Partial Boyer Moore */
381 pat->bm_ptr = re_bmcomp(lb, le, pat->cmap);
382 pat->procfn = re_bmexec;
383 pat->fullmatch = NO;
384 } else {
385 pat->fullmatch = YES;
386 if ((pat->cw_ptr = re_recw(pat->re_ptr, pat->cmap)) != NULL) {
387 pat->procfn = re_cwexec; /* CW */
388 return;
391 /* general egrep regular expression */
392 pat->succfn = re_reexec;
394 if (pat->fullmatch) {
395 pat->procfn = pat->succfn;
396 pat->succfn = NULL;
400 static BOOL
401 fixloc(uchar_t **mb, uchar_t **me)
403 /* Handle match to null string */
405 while (*me <= *mb)
406 (*me)++;
408 if (*(*me - 1) != NL)
409 while (**me != NL)
410 (*me)++;
412 /* Handle match to new-line only */
414 if (*mb == *me - 1 && **mb == NL) {
415 (*me)++;
418 /* Handle match including beginning or ending new-line */
420 if (**mb == NL)
421 (*mb)++;
422 if (*(*me - 1) == NL)
423 (*me)--;
424 return (YES);
427 static BOOL
428 grepmatch(PATTERN *pat, uchar_t **mb, uchar_t **me)
430 uchar_t *s, *f;
432 if (pat->fullmatch)
433 return (fixloc(mb, me));
435 for (f = *me - 1; *f != NL; f++) {
437 f++;
438 for (s = *mb; *s != NL; s--) {
441 if ((*pat->succfn)(pat, s, f, mb, me)) {
442 return (YES);
443 } else {
444 *mb = f;
445 return (NO);
449 static void
450 eginit(re_re *r)
452 unsigned int n;
454 r->ptr = (Expr **)egmalloc(r->maxid * sizeof (Expr *));
455 eptr(r, r->root);
456 n = r->maxid * sizeof (FID);
457 r->firstpos.base = (FID *)egmalloc(n);
458 r->tmp.base = (FID *)egmalloc(n);
459 CLEAR(r, &r->firstpos);
460 if (!first_lit(&r->firstpos, r->root->l)) {
462 * This expression matches the null string!!!!
463 * Add EOP to beginning position set.
465 SET(&r->firstpos, r->root->id, 0)
466 /* (void) printf("first of root->l == 0, b=%s\n", b); */
468 stateinit(r);
469 (void) addstate(r, &r->firstpos, 0);
470 savestate(r);
473 static void
474 eptr(re_re *r, Expr *e)
476 if ((e->id < 0) || (e->id >= r->maxid)) {
477 err("internal error");
479 r->ptr[e->id] = e;
480 if (e->type != Charclass) {
481 if (e->l) eptr(r, e->l);
482 if (e->r) eptr(r, e->r);
486 static BOOL
487 re_reexec(PATTERN *pat, uchar_t *b, uchar_t *e, uchar_t **mb, uchar_t **me)
489 re_re *r = pat->re_ptr;
490 State *s, *t;
492 #ifdef DEBUG
493 if (TRACE(10)) {
494 uchar_t buf[EPRINTSIZE];
495 epr(r->root, buf);
496 (void) printf("expr='%s'\n", buf);
498 #endif
499 s = startstate(r);
501 for (;;) {
502 uchar_t c;
504 if (s->cnt >= 0) {
505 #ifdef DEBUG
506 if (TRACE(6))
507 (void) printf("match at input '%s'\n", b);
508 #endif
509 *mb = b - s->cnt;
510 *me = b;
511 if (fixloc(mb, me))
512 return (YES);
515 if (b >= e) break;
516 c = pat->cmap[*b];
517 #ifdef DEBUG
518 if (TRACE(4))
519 (void) printf("state %d: char '%c'\n", s-r->states, *b);
520 #endif
521 if ((t = s->tab[c]) != NULL) s = t;
522 else s = nextstate(r, s, (int)c);
523 b++;
525 #ifdef DEBUG
526 if (TRACE(3)) {
527 uchar_t buf[EPRINTSIZE];
529 epr(r->root, buf);
530 (void) printf("pat = %s\n", buf);
532 #endif
533 return (NO);
536 static BOOL
537 match(Expr *e, int a)
539 switch (e->type) {
540 case Dot: return ((BOOL)(a != NL));
541 case Literal: return ((BOOL)(a == e->lit));
542 case Charclass: return ((BOOL)(CCL_CHK((uchar_t *)e->r, a)));
543 default: return (NO);
548 * generates the followset for a node in fpos
550 static void
551 follow(Positionset *fpos, Expr *e)
553 Expr *p;
555 if (e->type == EOP)
556 return;
557 else
558 p = e->parent;
559 switch (p->type) {
560 case EOP:
561 SET(fpos, p->id, 0)
562 break;
563 case Plus:
564 case Star:
565 (void) first_lit(fpos, e);
566 follow(fpos, p);
567 break;
568 case Quest:
569 case Alternate:
570 follow(fpos, p);
571 break;
572 case Cat:
573 if (e == p->r || !first_lit(fpos, p->r))
574 follow(fpos, p);
575 break;
576 default:
577 break;
582 * first_lit returns NO if e is nullable and in the process,
583 * ets up fpos.
585 static BOOL
586 first_lit(Positionset *fpos, Expr *e)
588 BOOL k;
590 switch (e->type) {
591 case Literal:
592 case Dot:
593 case Charclass:
594 SET(fpos, e->id, 0)
595 return (YES);
596 case EOP:
597 case Star:
598 case Quest:
599 (void) first_lit(fpos, e->l);
600 return (NO);
601 case Plus:
602 return (first_lit(fpos, e->l));
603 case Cat:
604 return ((BOOL)(first_lit(fpos, e->l) || first_lit(fpos, e->r)));
605 case Alternate:
606 k = first_lit(fpos, e->r);
607 return ((BOOL)(first_lit(fpos, e->l) && k));
608 default:
609 err("internal error");
611 return (NO);
614 static void
615 efollow(re_re *r, Positionset *fpos, Expr *e)
617 ID i, *p;
619 CLEAR(r, fpos);
620 follow(fpos, e);
621 e->flen = fpos->count;
622 e->follow = (ID *)egmalloc(e->flen * sizeof (ID));
623 p = e->follow;
624 #ifdef DEBUG
625 printf("ID = %d LIT %c FLEN = %d\n", e->id, e->lit, e->flen);
626 #endif
627 for (i = fpos->last; i > 0; i = fpos->base[i].id) {
628 *p++ = i;
629 #ifdef DEBUG
630 printf("FOLLOW ID = %d LIT %c\n", r->ptr[i]->id, r->ptr[i]->lit);
631 #endif
633 if (p != e->follow + e->flen) {
634 err("internal error");
638 static State *
639 addstate(re_re *r, Positionset *ps, int cnt)
641 ID j;
642 FID *p, *q;
643 State *s;
645 if (cnt) {
646 s = r->states + getstate(r, ps);
647 (void) memset((char *)s->tab, 0, sizeof (s->tab));
648 s->cnt = r->istate.cnt;
649 } else {
650 s = &r->istate;
651 s->cnt = -1;
653 s->pos = r->posnext;
654 r->posnext += ps->count;
655 s->npos = ps->count;
656 p = r->posbase + s->pos;
657 for (j = ps->last; j > 0; p++, j = q->id) {
658 q = &ps->base[j];
659 p->id = j;
660 p->fcount = q->fcount;
661 if (p->id == r->root->id && s->cnt < p->fcount)
662 s->cnt = p->fcount;
664 #ifdef DEBUG
665 if (TRACE(3)) {
666 uchar_t buf[2000];
667 spr(s->npos, s->pos+r->posbase, buf);
668 (void) printf("new state[%d] %s%s\n", s-r->states, buf,
669 s->cnt?" cnt":"");
671 #endif
672 return (s);
675 static State *
676 nextstate(re_re *r, State *s, int a)
678 State *news;
680 CLEAR(r, &r->tmp);
681 followstate(r, s, a, &r->tmp);
682 if (s != &r->istate) followstate(r, &r->istate, a, &r->tmp);
684 #ifdef DEBUG
685 if (TRACE(5)) {
686 uchar_t buf[2000];
687 ppr(&r->tmp, buf);
688 (void) printf("nextstate(%d, '%c'): found %s\n", s-r->states,
689 a, buf);
691 #endif
692 if (r->tmp.count == 0)
693 news = &r->istate;
694 else if ((news = stateof(r, &r->tmp)) == NULL)
695 news = addstate(r, &r->tmp, 1);
696 s->tab[a] = news;
697 #ifdef DEBUG
698 if (TRACE(5)) {
699 (void) printf("nextstate(%d, '%c'): returning %ld\n",
700 s-r->states, a, news);
702 #endif
703 return (news);
706 static void
707 followstate(re_re *r, State *s, int a, Positionset *fpos)
709 int j;
710 ID *q, *eq;
711 Expr *e;
713 for (j = s->pos; j < (s->pos + s->npos); j++) {
714 e = r->ptr[r->posbase[j].id];
715 if (e->type == EOP) continue;
716 if (match(e, a)) {
717 if (e->follow == NULL) efollow(r, &r->firstpos, e);
718 for (q = e->follow, eq = q + e->flen; q < eq; q++) {
719 SET(fpos, *q, r->posbase[j].fcount + 1)
720 #ifdef DEBUG
721 printf("CHAR %c FC %c COUNT %d\n",
723 r->ptr[*q]->lit,
724 r->posbase[j].fcount+1);
725 #endif
731 static uchar_t *
732 egmalloc(size_t n)
734 uchar_t *x;
736 x = (uchar_t *)mymalloc(n);
737 mallocs[nmalloc++] = x;
738 if (nmalloc >= MAXMALLOCS)
739 nmalloc = MAXMALLOCS - 1;
740 return (x);
743 #ifdef DEBUG
744 void
745 ppr(Positionse *ps, char *p)
747 ID n;
749 if (ps->count < 1) {
750 (void) sprintf(p, "{}");
751 return;
753 *p++ = '{';
754 for (n = ps->last; n > 0; n = ps->base[n].id) {
755 (void) sprintf(p, "%d,", n);
756 p = strchr(p, 0);
758 p[-1] = '}';
761 void
762 epr(Expr *e, uchar_t *res)
764 uchar_t r1[EPRINTSIZE], r2[EPRINTSIZE];
765 int i;
767 if (e == NULL) {
768 (void) sprintf(res, "!0!");
769 return;
771 switch (e->type) {
772 case Dot:
773 case Literal:
774 spr(e->flen, e->follow, r1);
775 (void) sprintf(res, "%c%s", e->lit, r1);
776 break;
777 case Charclass:
778 *res++ = '[';
779 for (i = 0; i < 256; i++)
780 if (CCL_CHK((uchar_t *)e->r, i)) {
781 *res++ = i;
783 *res++ = ']';
784 *res = '\0';
785 break;
786 case Cat:
787 epr(e->l, r1);
788 epr(e->r, r2);
789 (void) sprintf(res, "%s%s", r1, r2);
790 break;
791 case Alternate:
792 epr(e->l, r1);
793 epr(e->r, r2);
794 (void) sprintf(res, "(%s|%s)", r1, r2);
795 break;
796 case Star:
797 epr(e->l, r1);
798 (void) sprintf(res, "(%s)*", r1);
799 break;
800 case Plus:
801 epr(e->l, r1);
802 (void) sprintf(res, "(%s)+", r1);
803 break;
804 case Quest:
805 epr(e->l, r1);
806 (void) sprintf(res, "(%s)?", r1);
807 break;
808 case EOP:
809 epr(e->l, r1);
810 (void) sprintf(res, "%s<EOP>", r1);
811 break;
812 default:
813 (void) sprintf(res, "<undef type %d>", e->type);
814 err(res);
815 break;
819 static void
820 spr(int c, int *p, uchar_t *buf)
822 if (c > 0) {
823 *buf++ = '{';
824 *buf = '\0';
825 while (--c > 0) {
826 (void) sprintf(buf, "%d,", *p++);
827 buf = strchr(buf, 0);
829 (void) sprintf(buf, "%d}", *p);
830 } else
831 (void) sprintf(buf, "{}");
833 #endif
835 static void
836 stateinit(re_re *r)
838 /* CONSTANTCONDITION */
839 r->statelim = (sizeof (int) < 4 ? 32 : 128);
840 r->states = (State *)egmalloc(r->statelim * sizeof (State));
842 /* CONSTANTCONDITION */
843 r->nposalloc = (sizeof (int) < 4 ? 2048 : 8192);
844 r->posbase = (FID *)egmalloc(r->nposalloc * sizeof (FID));
845 r->nstates = r->posnext = 0;
848 static void
849 clrstates(re_re *r)
851 r->nstates = 0; /* reclaim space for states and positions */
852 r->posnext = r->posreset;
853 (void) memset((char *)r->istate.tab, 0, sizeof (r->istate.tab));
856 static void
857 savestate(re_re *r)
859 r->posreset = r->posnext; /* save for reset */
862 static State *
863 startstate(re_re *r)
865 return (&r->istate);
868 static int
869 getstate(re_re *r, Positionset *ps)
871 if (r->nstates >= r->statelim ||
872 r->posnext + ps->count >= r->nposalloc) {
873 clrstates(r);
874 #ifdef DEBUG
875 printf("%d STATES FLUSHED\n", r->statelim);
876 #endif
878 return (r->nstates++);
881 static State *
882 stateof(re_re *r, Positionset *ps)
884 State *s;
885 int i;
886 FID *p, *e;
888 for (i = 0, s = r->states; i < r->nstates; i++, s++) {
889 if (s->npos == ps->count) {
890 for (p = s->pos+r->posbase, e = p+s->npos; p < e; p++)
891 if (ps->base[p->id].id == 0 ||
892 ps->base[p->id].fcount != p->fcount)
893 goto next;
894 return (s);
896 next:;
898 return (NULL);
901 static re_re *
902 egprep(PATTERN *pat)
904 re_re *r;
906 r = (re_re *)egmalloc(sizeof (re_re));
907 (void) memset((char *)r, 0, sizeof (re_re));
909 pat->loc1 = pat->expression;
910 pat->loc2 = pat->expression + strlen((char *)pat->expression);
912 parno = 0;
913 maxid = 1;
914 r->cmap = pat->cmap;
915 lex(r, pat);
916 r->root = newexpr(EOP, '#', eall(r, pat), (Expr *)NULL);
917 r->maxid = maxid;
919 eginit(r);
920 return (r);
923 static Expr *
924 newexpr(Exprtype t, int lit, Expr *left, Expr *right)
926 Expr *e = (Expr *)egmalloc(sizeof (Expr));
928 e->type = t;
929 e->parent = NULL;
930 e->lit = lit;
932 if (e->lit) e->id = maxid++;
933 else e->id = 0;
935 if ((e->l = left) != NULL) {
936 left->parent = e;
938 if ((e->r = right) != NULL) {
939 right->parent = e;
941 e->follow = NULL;
942 e->flen = 0;
943 return (e);
946 static void
947 lex(re_re *r, PATTERN *pat)
949 if (pat->loc1 == pat->loc2) {
950 toktype = EOP;
951 toklit = -1;
952 } else switch (toklit = *pat->loc1++) {
953 case '.': toktype = Dot; break;
954 case '*': toktype = Star; break;
955 case '+': toktype = Plus; break;
956 case '?': toktype = Quest; break;
957 case '[': toktype = Charclass; break;
958 case '|': toktype = Alternate; break;
959 case '(': toktype = Lpar; break;
960 case ')': toktype = Rpar; break;
961 case '\\': toktype = Backslash;
962 if (pat->loc1 == pat->loc2) {
963 err("syntax error - missing character "
964 "after \\");
965 } else
966 toklit = r->cmap[*pat->loc1++];
967 break;
968 case '^': case '$': toktype = Literal; toklit = NL; break;
969 default: toktype = Literal; toklit = r->cmap[toklit]; break;
973 static int
974 ccl(PATTERN *pat, uchar_t *tab)
976 int i;
977 int range = 0;
978 int lastc = -1;
979 int count = 0;
980 BOOL comp = NO;
982 (void) memset((char *)tab, 0, CCL_SIZ * sizeof (uchar_t));
983 if (*pat->loc1 == '^') {
984 pat->loc1++;
985 comp = YES;
987 if (*pat->loc1 == ']') {
988 uchar_t c = pat->cmap[*pat->loc1];
989 CCL_SET(tab, c);
990 lastc = *pat->loc1++;
992 /* scan for chars */
993 for (; (pat->loc1 < pat->loc2) && (*pat->loc1 != ']');
994 pat->loc1++) {
995 if (*pat->loc1 == '-') {
996 if (lastc < 0) CCL_SET(tab, pat->cmap['-']);
997 else range = 1;
998 continue;
1000 if (range) {
1001 for (i = *pat->loc1; i >= lastc; i--) {
1002 CCL_SET(tab, pat->cmap[i]);
1004 } else {
1005 uchar_t c = pat->cmap[*pat->loc1];
1006 CCL_SET(tab, c);
1009 range = 0;
1011 lastc = *pat->loc1;
1013 if (range) CCL_SET(tab, pat->cmap['-']);
1015 if (pat->loc1 < pat->loc2) pat->loc1++;
1016 else err("syntax error - missing ]");
1018 if (comp) {
1019 CCL_SET(tab, pat->cmap[NL]);
1020 for (i = 0; i < CCL_SIZ; i++) tab[i] ^= 0xff;
1022 for (i = 0; i < 256; i++) {
1023 if (pat->cmap[i] != i) CCL_CLR(tab, i);
1024 if (CCL_CHK(tab, i)) {
1025 lastc = i;
1026 count++;
1029 if (count == 1)
1030 *tab = (char)lastc;
1031 return (count);
1035 * egrep patterns:
1037 * Alternation: d0: d1 { '|' d1 }*
1038 * Concatenation: d1: d2 { d2 }*
1039 * Repetition: d2: d3 { '*' | '?' | '+' }
1040 * Literal: d3: lit | '.' | '[]' | '(' d0 ')'
1043 static Expr *
1044 d3(re_re *r, PATTERN *pat)
1046 Expr *e;
1047 uchar_t *tab;
1048 int count;
1050 switch (toktype) {
1051 case Backslash:
1052 case Literal:
1053 e = newexpr(Literal, toklit, (Expr *)NULL, (Expr *)NULL);
1054 lex(r, pat);
1055 break;
1056 case Dot:
1057 e = newexpr(Dot, '.', (Expr *)NULL, (Expr *)NULL);
1058 lex(r, pat);
1059 break;
1060 case Charclass:
1061 tab = egmalloc(CCL_SIZ * sizeof (uchar_t));
1062 count = ccl(pat, tab);
1063 if (count == 1) {
1064 toklit = *tab;
1065 e = newexpr(Literal, toklit, (Expr *)NULL,
1066 (Expr *)NULL);
1067 } else {
1068 e = newexpr(Charclass, '[', (Expr *)NULL, (Expr *)NULL);
1069 e->l = (Expr *)count; /* number of chars */
1070 e->r = (Expr *)tab; /* bitmap of chars */
1072 lex(r, pat);
1073 break;
1074 case Lpar:
1075 lex(r, pat);
1076 count = ++parno;
1077 e = d0(r, pat);
1078 if (toktype == Rpar)
1079 lex(r, pat);
1080 else
1081 err("syntax error - missing )");
1082 return (e);
1083 default:
1084 err("syntax error");
1085 e = NULL;
1087 return (e);
1090 static Expr *
1091 d2(re_re *r, PATTERN *pat)
1093 Expr *e;
1094 Exprtype t;
1096 e = d3(r, pat);
1097 while ((toktype == Star) || (toktype == Plus) || (toktype == Quest)) {
1098 t = toktype;
1099 lex(r, pat);
1100 e = newexpr(t, 0, e, (Expr *)NULL);
1102 return (e);
1105 static Expr *
1106 d1(re_re *r, PATTERN *pat)
1108 Expr *e, *f;
1110 e = d2(r, pat);
1111 while ((toktype == Literal) || (toktype == Dot) || (toktype == Lpar) ||
1112 (toktype == Backslash) || (toktype == Charclass)) {
1113 f = d2(r, pat);
1114 e = newexpr(Cat, 0, e, f);
1116 return (e);
1119 static Expr *
1120 d0(re_re *r, PATTERN *pat)
1122 Expr *e, *f;
1124 e = d1(r, pat);
1125 while (toktype == Alternate) {
1126 lex(r, pat);
1127 if (toktype == EOP)
1128 continue;
1129 f = d1(r, pat);
1130 e = newexpr(Alternate, 0, e, f);
1132 return (e);
1135 static Expr *
1136 eall(re_re *r, PATTERN *pat)
1138 Expr *e;
1140 while (toktype == Alternate) /* bogus but user-friendly */
1141 lex(r, pat);
1142 e = d0(r, pat);
1143 while (toktype == Alternate) /* bogus but user-friendly */
1144 lex(r, pat);
1145 if (toktype != EOP)
1146 err("syntax error");
1147 return (e);
1150 static void
1151 err(char *s)
1153 message = s;
1154 longjmp(env, 1);
1157 static int
1158 re_lit(PATTERN *pat, uchar_t **b, uchar_t **e)
1160 bestlen = 0;
1161 pat->fullmatch = YES;
1162 START
1163 traverse(pat, pat->re_ptr->root->l);
1164 FINISH
1165 if (bestlen < 2)
1166 return (0);
1167 *b = egmalloc(bestlen * sizeof (uchar_t));
1168 (void) memmove(*b, best, bestlen);
1169 *e = *b + bestlen - 1;
1170 return (bestlen - 1);
1173 static void
1174 traverse(PATTERN *pat, Expr *e)
1176 switch (e->type) {
1177 case Literal:
1178 ADDL(e->lit)
1179 break;
1180 case Cat:
1181 traverse(pat, e->l);
1182 traverse(pat, e->r);
1183 break;
1184 case Plus:
1185 traverse(pat, e->l);
1186 FINISH /* can't go on past a + */
1187 pat->fullmatch = NO;
1188 START /* but we can start with one! */
1189 traverse(pat, e->l);
1190 break;
1191 default:
1192 FINISH
1193 pat->fullmatch = NO;
1194 START
1195 break;
1199 static re_bm *
1200 re_bmcomp(uchar_t *pb, uchar_t *pe, uchar_t *cmap)
1202 int j;
1203 int delta[256];
1204 re_bm *b;
1206 b = (re_bm *)egmalloc(sizeof (*b));
1208 b->patlen = pe - pb;
1209 b->cmap = cmap;
1210 b->bmpat = pb;
1212 delta_2(b);
1214 for (j = 0; j < 256; j++)
1215 delta[j] = b->patlen;
1217 for (pe--; pb < pe; pb++)
1218 delta[b->cmap[*pb]] = pe - pb;
1220 delta[b->cmap[*pb]] = LARGE;
1222 for (j = 0; j < 256; j++)
1223 b->delta0[j] = delta[b->cmap[j]];
1224 return (b);
1227 static void
1228 delta_2(re_bm *b)
1230 int m = b->patlen;
1231 int i, k, j;
1233 b->delta2 = (int *)egmalloc(m * sizeof (int));
1235 for (j = 0; j < m; j++) {
1236 k = 1;
1237 again:
1238 while (k <= j && b->bmpat[j-k] == b->bmpat[j]) k++;
1240 for (i = j + 1; i < m; i++) {
1241 if (k <= i && b->bmpat[i-k] != b->bmpat[i]) {
1242 k++;
1243 goto again;
1246 b->delta2[j] = k + m - j - 1;
1250 static BOOL
1251 re_bmexec(PATTERN *pat, uchar_t *s, uchar_t *e, uchar_t **mb, uchar_t **me)
1253 re_bm *b = pat->bm_ptr;
1254 uchar_t *sp;
1255 int k;
1257 s += b->patlen - 1;
1258 while ((unsigned long)s < (unsigned long)e) {
1259 while ((unsigned long)(s += b->delta0[*s]) < (unsigned long)e)
1261 if ((unsigned long)s < (unsigned long)(e + b->patlen))
1262 return (NO); /* no match */
1263 s -= LARGE;
1264 for (k = b->patlen-2, sp = s-1; k >= 0; k--) {
1265 if (b->cmap[*sp--] != b->bmpat[k]) break;
1267 if (k < 0) {
1268 *mb = ++sp;
1269 *me = s+1;
1270 if (grepmatch(pat, mb, me))
1271 return (YES);
1272 s = *mb;
1273 } else if (k < 0) {
1274 s++;
1275 } else {
1276 int j;
1277 j = b->delta2[k];
1278 k = b->delta0[*++sp];
1279 if ((j > k) || (k == (int)LARGE))
1280 k = j;
1281 s = sp + k;
1284 return (NO);
1287 static re_cw *
1288 re_recw(re_re *r, uchar_t *map)
1290 Expr *e, *root = r->root;
1291 re_cw *pat;
1292 uchar_t *buf;
1294 if (root->type != EOP)
1295 return (NULL);
1296 e = root->l;
1297 pat = re_cwinit(map);
1298 buf = (uchar_t *)egmalloc(20000 * sizeof (uchar_t));
1299 if (!altlist(e, buf, pat)) {
1300 return (NULL);
1302 re_cwcomp(pat);
1303 return (pat);
1306 static BOOL
1307 altlist(Expr *e, uchar_t *buf, re_cw *pat)
1309 if (e->type == Alternate)
1310 return ((BOOL)(altlist(e->l, buf, pat) &&
1311 altlist(e->r, buf, pat)));
1312 return (word(e, buf, pat));
1315 static BOOL
1316 word(Expr *e, uchar_t *buf, re_cw *pat)
1318 static uchar_t *p;
1320 if (buf) p = buf;
1322 if (e->type == Cat) {
1323 if (!word(e->l, (uchar_t *)NULL, pat))
1324 return (NO);
1325 if (!word(e->r, (uchar_t *)NULL, pat))
1326 return (NO);
1327 } else if (e->type == Literal)
1328 *p++ = e->lit;
1329 else
1330 return (NO);
1332 if (buf)
1333 re_cwadd(pat, buf, p);
1334 return (YES);
1337 static re_cw *
1338 re_cwinit(uchar_t *cmap)
1340 re_cw *c;
1342 froot = NULL;
1343 next_node = NULL;
1344 next_link = NULL;
1345 c = (re_cw *)egmalloc(sizeof (*c));
1346 c->nodeid = 0;
1347 c->maxdepth = 0;
1348 c->mindepth = 10000;
1349 c->root = newnode(c, 0);
1350 c->cmap = cmap;
1351 return (c);
1354 static void
1355 re_cwadd(re_cw *c, uchar_t *s, uchar_t *e)
1357 Node *p, *state;
1358 Link *l;
1359 int depth;
1361 state = c->root;
1362 while (s <= --e) {
1363 for (l = state->alts; l; l = l->next)
1364 if (l->lit == *e)
1365 break;
1366 if (l == NULL)
1367 break;
1368 else
1369 state = l->node;
1371 if (s <= e) {
1372 depth = state->d+1;
1373 l = newlink(*e--, p = newnode(c, depth++));
1374 l->next = state->alts;
1375 state->alts = l;
1376 state = p;
1377 while (s <= e) {
1378 state->alts = newlink(*e--, p = newnode(c, depth++));
1379 state = p;
1381 if (c->mindepth >= depth) c->mindepth = depth-1;
1383 state->out = 1;
1386 #ifdef DEBUG
1387 static
1388 pr(Node *n)
1390 Link *l;
1392 printf("%d[%d,%d]: ", n->id, n->shift1, n->shift2);
1393 for (l = n->alts; l; l = l->next) {
1394 printf("edge from \"%d\" to \"%d\" label {\"%c\"};",
1395 n->id, l->node->id, l->lit);
1396 if (l->node->out) {
1397 printf(" draw \"%d\" as Doublecircle;", l->node->id);
1399 if (l->node->fail) {
1400 printf(" edge from \"%d\" to \"%d\" dashed;",
1401 l->node->id, l->node->fail->id);
1403 printf("\n");
1404 pr(l->node);
1407 #endif
1409 static void
1410 fail(Node *root)
1412 Link *qhead = NULL, *qtail = NULL;
1413 Link *l, *ll;
1414 Link *t;
1415 Node *state, *r, *s;
1416 int a;
1418 for (l = root->alts; l; l = l->next) {
1419 ADD(l->node);
1420 l->node->fail = root;
1422 while (qhead) {
1423 r = qhead->node;
1424 DEL();
1425 for (l = r->alts; l; l = l->next) {
1426 s = l->node;
1427 a = l->lit;
1428 ADD(s);
1429 state = r->fail;
1430 while (state) {
1431 for (ll = state->alts; ll; ll = ll->next)
1432 if (ll->lit == a)
1433 break;
1434 if (ll || (state == root)) {
1435 if (ll)
1436 state = ll->node;
1438 * do it here as only other exit is
1439 * state 0
1441 if (state->out) {
1442 s->out = 1;
1444 break;
1445 } else
1446 state = state->fail;
1448 s->fail = state;
1451 zeroroot(root, root);
1454 static void
1455 zeroroot(Node *root, Node *n)
1457 Link *l;
1459 if (n->fail == root)
1460 n->fail = NULL;
1461 for (l = n->alts; l; l = l->next)
1462 zeroroot(root, l->node);
1465 static void
1466 shift(re_cw *c)
1468 Link *qhead = NULL, *qtail = NULL;
1469 Link *l;
1470 Link *t;
1471 Node *state, *r, *s;
1472 int k;
1474 for (k = 0; k < 256; k++)
1475 c->step[k] = c->mindepth+1;
1476 c->root->shift1 = 1;
1477 c->root->shift2 = c->mindepth;
1478 for (l = c->root->alts; l; l = l->next) {
1479 l->node->shift2 = c->root->shift2;
1480 ADD(l->node);
1481 l->node->fail = NULL;
1483 while (qhead) {
1484 r = qhead->node;
1485 DEL();
1486 r->shift1 = c->mindepth;
1487 r->shift2 = c->mindepth;
1488 if ((state = r->fail) != NULL) {
1489 do {
1490 k = r->d - state->d;
1491 if (k < state->shift1) {
1492 state->shift1 = k;
1494 if (r->out && (k < state->shift2)) {
1495 state->shift2 = k;
1497 } while ((state = state->fail) != NULL);
1499 for (l = r->alts; l; l = l->next) {
1500 s = l->node;
1501 ADD(s);
1504 shiftprop(c, c->root);
1505 shifttab(c->root);
1506 c->step[0] = 1;
1509 static void
1510 shifttab(Node *n)
1512 Link *l;
1513 Node **nn;
1515 n->tab = nn = (Node **)egmalloc(256 * sizeof (Node *));
1516 (void) memset((char *)n->tab, 0, 256 * sizeof (Node *));
1517 for (l = n->alts; l; l = l->next)
1518 nn[l->lit] = l->node;
1521 static void
1522 shiftprop(re_cw *c, Node *n)
1524 Link *l;
1525 Node *nn;
1527 for (l = n->alts; l; l = l->next) {
1528 if (c->step[l->lit] > l->node->d)
1529 c->step[l->lit] = l->node->d;
1530 nn = l->node;
1531 if (n->shift2 < nn->shift2)
1532 nn->shift2 = n->shift2;
1533 shiftprop(c, nn);
1537 static void
1538 re_cwcomp(re_cw *c)
1540 fail(c->root);
1541 shift(c);
1544 static BOOL
1545 re_cwexec(PATTERN *pat, uchar_t *rs, uchar_t *re, uchar_t **mb, uchar_t **me)
1547 Node *state;
1548 Link *l;
1549 uchar_t *sp;
1550 uchar_t *s;
1551 uchar_t *e;
1552 Node *ostate;
1553 int k;
1554 re_cw *c = pat->cw_ptr;
1555 uchar_t fake[1];
1556 uchar_t mappedsp;
1558 fake[0] = 0;
1559 state = c->root;
1561 s = rs+c->mindepth-1;
1562 e = re;
1563 while (s < e) {
1564 /* scan */
1565 for (sp = s; (ostate = state) != NULL; ) {
1566 mappedsp = c->cmap[*sp];
1567 if (state->tab) {
1568 if ((state = state->tab[mappedsp]) == NULL)
1569 goto nomatch;
1570 } else {
1571 for (l = state->alts; ; l = l->next) {
1572 if (l == NULL)
1573 goto nomatch;
1574 if (l->lit == mappedsp) {
1575 state = l->node;
1576 break;
1580 if (state->out) {
1581 *mb = sp;
1582 *me = s+1;
1583 if (fixloc(mb, me))
1584 return (YES);
1586 if (--sp < rs) {
1587 sp = fake;
1588 break;
1591 nomatch:
1592 k = c->step[c->cmap[*sp]] - ostate->d - 1;
1593 if (k < ostate->shift1)
1594 k = ostate->shift1;
1595 if (k > ostate->shift2)
1596 k = ostate->shift2;
1597 s += k;
1598 state = c->root;
1600 return (NO);
1603 static Node *
1604 newnode(re_cw *c, int d)
1606 static Node *lim = NULL;
1607 static int incr = 256;
1609 if (!next_node) lim = NULL;
1610 if (next_node == lim) {
1611 next_node = (Node *)egmalloc(incr * sizeof (Node));
1612 lim = next_node + incr;
1614 next_node->d = d;
1615 if (d > c->maxdepth) c->maxdepth = d;
1616 next_node->id = c->nodeid++;
1617 next_node->alts = NULL;
1618 next_node->tab = NULL;
1619 next_node->out = 0;
1620 return (next_node++);
1623 static Link *
1624 newlink(uchar_t lit, Node *n)
1626 static Link *lim = NULL;
1627 static int incr = 256;
1629 if (!next_link) lim = NULL;
1630 if (next_link == lim) {
1631 next_link = (Link *)egmalloc(incr * sizeof (Link));
1632 lim = next_link + incr;
1634 next_link->lit = lit;
1635 next_link->node = n;
1636 next_link->next = NULL;
1637 return (next_link++);
1641 egrep(char *f, FILE *o, char *fo)
1643 uchar_t buff[MAXBUFSIZE + ESIZE];
1645 buffer = buff;
1646 *buffer++ = NL; /* New line precedes buffer to prevent runover */
1647 file = f;
1648 output = o;
1649 format = fo;
1650 return (execute());
1653 static int
1654 execute(void)
1656 LINE current;
1657 BOOL matched;
1658 BOOL all_searched; /* file being matched until end */
1660 if ((file_desc = open(file, O_RDONLY)) < 0) {
1661 return (-1);
1663 init_file(&current);
1664 /* while there is more get more text from the data stream */
1665 for (;;) {
1666 all_searched = NO;
1669 * Find next new-line in buffer.
1670 * Begin after previous new line character
1673 current.prntbuf = current.newline + 1;
1674 current.ln++; /* increment line number */
1676 if (current.prntbuf < bufend) {
1677 /* There is more text in buffer */
1680 * Take our next
1681 * "line" as the entire remaining buffer.
1682 * However, if there is more of the file yet
1683 * to be read in, exclude any incomplete
1684 * line at end.
1686 if (file_stat == NO_MORE) {
1687 all_searched = YES;
1688 current.newline = bufend - 1;
1689 if (*current.newline != NL) {
1690 current.newline = bufend;
1692 } else {
1694 * Find end of the last
1695 * complete line in the buffer.
1697 current.newline = bufend;
1698 while (*--current.newline != NL) {
1700 if (current.newline < current.prntbuf) {
1701 /* end not found */
1702 current.newline = bufend;
1705 } else {
1706 /* There is no more text in the buffer. */
1707 current.newline = bufend;
1709 if (current.newline >= bufend) {
1711 * There is no more text in the buffer,
1712 * or no new line was found.
1714 switch (file_stat) {
1715 case MORE: /* file partly unread */
1716 case BEGIN:
1717 fgetfile(&current);
1719 current.ln--;
1720 continue; /* with while loop */
1721 case NO_MORE:
1722 break;
1724 /* Nothing more to read in for this file. */
1725 if (current.newline <= current.prntbuf) {
1726 /* Nothing in the buffer, either */
1727 /* We are done with the file. */
1728 current.ln--;
1729 break; /* out of while loop */
1731 /* There is no NL at the end of the file */
1734 matched = pattern_match(&match_pattern, &current);
1735 if (matched) {
1736 int nc;
1738 get_ncount(&current, match_pattern.loc1);
1739 get_line(&current, match_pattern.loc2);
1741 nc = current.newline + 1 - current.prntbuf;
1742 (void) fprintf(output, format, file, current.ln);
1743 (void) fwrite((char *)current.prntbuf, 1, nc, output);
1744 } else {
1745 if (all_searched)
1746 break; /* out of while loop */
1748 get_ncount(&current, current.newline + 1);
1752 (void) close(file_desc);
1753 return (0);
1756 static void
1757 init_file(LINE *cur_ptr)
1759 file_stat = BEGIN;
1760 cur_ptr->ln = 0;
1761 bufend = buffer;
1762 cur_ptr->newline = buffer - 1;
1765 static BOOL
1766 pattern_match(PATTERN *pat, LINE *lptr)
1768 if ((*pat->procfn)(pat, lptr->prntbuf - 1, lptr->newline + 1,
1769 &pat->loc1, &pat->loc2)) {
1770 return (YES);
1771 } else {
1772 pat->loc1 = lptr->prntbuf;
1773 pat->loc2 = lptr->newline - 1;
1774 return (NO);
1776 } /* end of function pattern_match() */
1778 static void
1779 fgetfile(LINE *cur_ptr)
1782 * This function reads as much of the current file into the buffer
1783 * as will fit.
1785 int bytes; /* bytes read in current buffer */
1786 int bufsize = MAXBUFSIZE; /* free space in data buffer */
1787 int save_current;
1788 uchar_t *begin = cur_ptr->prntbuf;
1791 * Bytes of current incomplete line, if any, save_current in buffer.
1792 * These must be saved.
1794 save_current = (int)(bufend - begin);
1796 if (file_stat == MORE) {
1798 * A portion of the file fills the buffer. We must clear
1799 * out the dead wood to make room for more of the file.
1802 int k = 0;
1804 k = begin - buffer;
1805 if (!k) {
1807 * We have one humungous current line,
1808 * which fills the whole buffer.
1809 * Toss it.
1811 begin = bufend;
1812 k = begin - buffer;
1813 save_current = 0;
1816 begin = buffer;
1817 bufend = begin + save_current;
1819 bufsize = MAXBUFSIZE - save_current;
1821 if (save_current > 0) {
1823 * Must save portion of current line.
1824 * Copy to beginning of buffer.
1826 (void) memmove(buffer, buffer + k, save_current);
1830 /* Now read in the file. */
1832 do {
1833 bytes = read(file_desc, (char *)bufend, (unsigned int)bufsize);
1834 if (bytes < 0) {
1835 /* can't read any more of file */
1836 bytes = 0;
1838 bufend += bytes;
1839 bufsize -= bytes;
1840 } while (bytes > 0 && bufsize > 0);
1843 if (begin >= bufend) {
1844 /* No new lines or incomplete line in buffer */
1845 file_stat = NO_MORE;
1846 } else if (bufsize) {
1847 /* Still space in the buffer, so we have read entire file */
1848 file_stat = NO_MORE;
1849 } else {
1850 /* We filled entire buffer, so there may be more yet to read */
1851 file_stat = MORE;
1853 /* Note: bufend is 1 past last good char */
1854 *bufend = NL; /* Sentinel new-line character */
1855 /* Set newline to character preceding the current line */
1856 cur_ptr->newline = begin - 1;
1859 static void
1860 get_line(LINE *cur_ptr, uchar_t *s)
1862 while (*s++ != NL) {
1864 cur_ptr->newline = --s;
1865 cur_ptr->ln++;
1868 static void
1869 get_ncount(LINE *cur_ptr, uchar_t *s)
1871 uchar_t *p = cur_ptr->prntbuf;
1873 while (*--s != NL) {
1875 cur_ptr->newline = s++;
1876 while ((s > p) &&
1877 (p = (uchar_t *)memchr((char *)p, NL, s - p)) != NULL) {
1878 cur_ptr->ln++;
1879 ++p;
1881 cur_ptr->ln--;
1882 cur_ptr->prntbuf = s;