Copy the whole envarc directory to Scalos:Storage so
[AROS-Contrib.git] / fish / grep / grep.c
blob850b086bfc13a9dd0511a35bb1a65001e19c5545
1 /*
4 * The information in this document is subject to change
5 * without notice and should not be construed as a commitment
6 * by Digital Equipment Corporation or by DECUS.
8 * Neither Digital Equipment Corporation, DECUS, nor the authors
9 * assume any responsibility for the use or reliability of this
10 * document or the described software.
12 * Copyright (C) 1980, DECUS
15 * General permission to copy or modify, but not for profit, is
16 * hereby granted, provided that the above copyright notice is
17 * included and reference made to the fact that reproduction
18 * privileges were granted by DECUS.
21 #include <stdio.h>
22 #include <ctype.h>
23 #include <stdlib.h>
25 void file(char *);
26 void cant(char *);
27 void help(char **);
28 void usage(char *);
29 void compile(char *);
30 char *cclass(char *,char *);
31 void store(int);
32 void badpat(char *,char *,char *);
33 void grep(FILE *, char *);
34 int match();
35 char * pmatch(char *,char *);
36 void error(char *);
39 * grep.
41 * Runs on the Decus compiler or on vms.
42 * Converted for BDS compiler (under CP/M-80), 20-Jan-83, by Chris Kern.
44 * Converted to IBM PC with CI-C86 C Compiler June 1983 by David N. Smith
46 * On vms, define as:
48 * grep :== "$disk:[account]grep" (native)
49 * grep :== "$disk:[account]grep grep" (Decus)
51 * See below for more information.
54 char *documentation[] = {
55 "grep searches a file for a given pattern. Execute by",
56 " grep [flags] regular_expression file_list",
57 "",
58 "Flags are single characters preceeded by '-':",
59 " -c Only a count of matching lines is printed",
60 " -f Print file name for matching lines switch, see below",
61 " -n Each line is preceeded by its line number",
62 " -v Only print non-matching lines",
63 "",
64 "The file_list is a list of files (wildcards are acceptable on RSX modes).",
65 "",
66 "The file name is normally printed if there is a file given.",
67 "The -f flag reverses this action (print name no file, not if more).",
68 "",
69 0 };
70 char *patdoc[] = {
71 "The regular_expression defines the pattern to search for. Upper- and",
72 "lower-case are always ignored. Blank lines never match. The expression",
73 "should be quoted to prevent file-name translation.",
74 "x An ordinary character (not mentioned below) matches that character.",
75 "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
76 "'^' A circumflex at the beginning of an expression matches the",
77 " beginning of a line.",
78 "'$' A dollar-sign at the end of an expression matches the end of a line.",
79 "'.' A period matches any character except \"new-line\".",
80 "':a' A colon matches a class of characters described by the following",
81 "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
82 "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
83 "': ' other control characters, such as new-line.",
84 "'*' An expression followed by an asterisk matches zero or more",
85 " occurrances of that expression: \"fo*\" matches \"f\", \"fo\"",
86 " \"foo\", etc.",
87 "'+' An expression followed by a plus sign matches one or more",
88 " occurrances of that expression: \"fo+\" matches \"fo\", etc.",
89 "'-' An expression followed by a minus sign optionally matches",
90 " the expression.",
91 "'[]' A string enclosed in square brackets matches any character in",
92 " that string, but no others. If the first character in the",
93 " string is a circumflex, the expression matches any character",
94 " except \"new-line\" and the characters in the string. For",
95 " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
96 " matches \"abc\" but not \"axb\". A range of characters may be",
97 " specified by two characters separated by \"-\". Note that,",
98 " [a-z] matches alphabetics, while [z-a] never matches.",
99 "The concatenation of regular expressions is a regular expression.",
101 /*$ifdef vms */
102 /*$define VMS VMS native compiler */
103 /*$define error(s) _error(s) */
104 /*$endif */
105 #define LMAX 512
106 #define PMAX 256
107 #define CHAR 1
108 #define BOL 2
109 #define EOL 3
110 #define ANY 4
111 #define CLASS 5
112 #define NCLASS 6
113 #define STAR 7
114 #define PLUS 8
115 #define MINUS 9
116 #define ALPHA 10
117 #define DIGIT 11
118 #define NALPHA 12
119 #define PUNCT 13
120 #define RANGE 14
121 #define ENDPAT 15
122 int cflag;
123 int fflag;
124 int nflag;
125 int vflag;
126 int nfile;
127 int debug = 0; /* Set for debug code */
128 char *pp;
129 #ifndef vms
130 char file_name[81];
131 #endif
132 char lbuf[LMAX];
133 char pbuf[PMAX];
134 /*******************************************************/
135 int main(argc, argv)
136 int argc;
137 char *argv[];
139 register char *p;
140 register int c, i;
141 int gotpattern;
142 FILE *f;
143 if (argc <= 1)
144 usage("No arguments");
145 if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
146 help(documentation);
147 help(patdoc);
148 return (0);
150 nfile = argc-1;
151 gotpattern = 0;
152 for (i=1; i < argc; ++i) {
153 p = argv[i];
154 if (*p == '-') {
155 ++p;
156 while ((c = *p++)) {
157 switch(tolower(c)) {
158 case '?':
159 help(documentation);
160 break;
161 case 'C':
162 case 'c':
163 ++cflag;
164 break;
165 case 'D':
166 case 'd':
167 ++debug;
168 break;
169 case 'F':
170 case 'f':
171 ++fflag;
172 break;
173 case 'n':
174 case 'N':
175 ++nflag;
176 break;
177 case 'v':
178 case 'V':
179 ++vflag;
180 break;
181 default:
182 usage("Unknown flag");
185 argv[i] = 0;
186 --nfile;
187 } else if (!gotpattern) {
188 compile(p);
189 argv[i] = 0;
190 ++gotpattern;
191 --nfile;
194 if (!gotpattern)
195 usage("No pattern");
196 if (nfile == 0)
197 grep(stdin, 0);
198 else {
199 fflag = fflag ^ (nfile > 0);
200 for (i=1; i < argc; ++i) {
201 if ((p = argv[i])) {
202 if ((f=fopen(p, "r")) == NULL)
203 cant(p);
204 else {
205 grep(f, p);
206 fclose(f);
211 return (0);
213 /*******************************************************/
214 void file(s)
215 char *s;
217 printf("File %s:\n", s);
219 /*******************************************************/
220 void cant(s)
221 char *s;
223 fprintf(stderr, "%s: cannot open\n", s);
225 /*******************************************************/
226 void help(hp)
227 char **hp;
229 * Give good help
232 register char **dp;
233 for (dp = hp; *dp; dp++)
234 printf("%s\n", *dp);
236 /*******************************************************/
237 void usage(s)
238 char *s;
240 fprintf(stderr, "?GREP-E-%s\n", s);
241 fprintf(stderr,
242 "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
243 exit(1);
245 /*******************************************************/
246 void compile(source)
247 char *source; /* Pattern to compile */
249 * Compile the pattern into global pbuf[]
252 register char *s; /* Source string pointer */
253 register char *lp; /* Last pattern pointer */
254 register int c; /* Current character */
255 int o; /* Temp */
256 char *spp; /* Save beginning of pattern */
257 char *cclass(); /* Compile class routine */
258 s = source;
259 if (debug)
260 printf("Pattern = \"%s\"\n", s);
261 lp = pp = pbuf;
262 while ((c = *s++)) {
264 * STAR, PLUS and MINUS are special.
266 if (c == '*' || c == '+' || c == '-') {
267 if (pp == pbuf ||
268 (o=pp[-1]) == BOL ||
269 o == EOL ||
270 o == STAR ||
271 o == PLUS ||
272 o == MINUS)
273 badpat("Illegal occurrance op.", source, s);
274 store(ENDPAT);
275 store(ENDPAT);
276 spp = pp; /* Save pattern end */
277 while (--pp > lp) /* Move pattern down */
278 *pp = pp[-1]; /* one byte */
279 *pp = (c == '*') ? STAR :
280 (c == '-') ? MINUS : PLUS;
281 pp = spp; /* Restore pattern end */
282 continue;
285 * All the rest.
287 lp = pp; /* Remember start */
288 switch(c) {
289 case '^':
290 store(BOL);
291 break;
292 case '$':
293 store(EOL);
294 break;
295 case '.':
296 store(ANY);
297 break;
298 case '[':
299 s = cclass(source, s);
300 break;
301 case ':':
302 if (*s) {
303 c = *s++;
304 switch(tolower(c)) {
305 case 'a':
306 case 'A':
307 store(ALPHA);
308 break;
309 case 'd':
310 case 'D':
311 store(DIGIT);
312 break;
313 case 'n':
314 case 'N':
315 store(NALPHA);
316 break;
317 case ' ':
318 store(PUNCT);
319 break;
320 default:
321 badpat("Unknown : type", source, s);
323 break;
325 else badpat("No : type", source, s);
326 case '\\':
327 if (*s)
328 c = *s++;
329 default:
330 store(CHAR);
331 store(tolower(c));
334 store(ENDPAT);
335 store(0); /* Terminate string */
336 if (debug) {
337 for (lp = pbuf; lp < pp;) {
338 if ((c = (*lp++ & 0377)) < ' ')
339 printf("\\%o ", c);
340 else printf("%c ", c);
342 printf("\n");
345 /*******************************************************/
346 char *
347 cclass(source, src)
348 char *source; /* Pattern start -- for error msg. */
349 char *src; /* Class start */
351 * Compile a class (within [])
354 register char *s; /* Source pointer */
355 register char *cp; /* Pattern start */
356 register int c; /* Current character */
357 int o; /* Temp */
358 s = src;
359 o = CLASS;
360 if (*s == '^') {
361 ++s;
362 o = NCLASS;
364 store(o);
365 cp = pp;
366 store(0); /* Byte count */
367 while ((c = *s++) && c!=']') {
368 if (c == '\\') { /* Store quoted char */
369 if ((c = *s++) == '\0') /* Gotta get something */
370 badpat("Class terminates badly", source, s);
371 else store(tolower(c));
373 else if (c == '-' &&
374 (pp - cp) > 1 && *s != ']' && *s != '\0') {
375 c = pp[-1]; /* Range start */
376 pp[-1] = RANGE; /* Range signal */
377 store(c); /* Re-store start */
378 c = *s++; /* Get end char and*/
379 store(tolower(c)); /* Store it */
381 else {
382 store(tolower(c)); /* Store normal char */
385 if (c != ']')
386 badpat("Unterminated class", source, s);
387 if ((c = (pp - cp)) >= 256)
388 badpat("Class too large", source, s);
389 if (c == 0)
390 badpat("Empty class", source, s);
391 *cp = c;
392 return(s);
394 /*******************************************************/
395 void store(op)
396 int op;
398 if (pp > &pbuf[PMAX-1])
399 error("Pattern too complex\n");
400 *pp++ = op;
402 /*******************************************************/
403 void badpat(message, source, stop)
404 char *message; /* Error message */
405 char *source; /* Pattern start */
406 char *stop; /* Pattern end */
408 fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
409 fprintf(stderr, "-GREP-E-Stopped at byte %d, '%c'\n",
410 (int)(stop-source), stop[-1]);
411 error("?GREP-E-Bad pattern\n");
413 /*******************************************************/
414 void grep(fp, fn)
415 FILE *fp; /* File to process */
416 char *fn; /* File name (for -f option) */
418 * Scan the file for the pattern in pbuf[]
421 register int lno, count, m;
422 lno = 0;
423 count = 0;
424 while (fgets(lbuf, LMAX, fp)) {
425 ++lno;
426 m = match();
427 if ((m && !vflag) || (!m && vflag)) {
428 ++count;
429 if (!cflag) {
430 if (fflag && fn) {
431 file(fn);
432 fn = 0;
434 if (nflag)
435 printf("%d\t", lno);
436 printf("%s\n", lbuf);
440 if (cflag) {
441 if (fflag && fn)
442 file(fn);
443 printf("%d\n", count);
446 /*******************************************************/
447 int match()
449 * Match the current line (in lbuf[]), return 1 if it does.
452 register char *l; /* Line pointer */
453 char *pmatch();
454 for (l = lbuf; *l; l++) {
455 if (pmatch(l, pbuf))
456 return(1);
458 return(0);
460 /*******************************************************/
461 char *
462 pmatch(line, pattern)
463 char *line; /* (partial) line to match */
464 char *pattern; /* (partial) pattern to match */
466 register char *l; /* Current line pointer */
467 register char *p; /* Current pattern pointer */
468 register char c; /* Current character */
469 char *e; /* End for STAR and PLUS match */
470 int op; /* Pattern operation */
471 int n; /* Class counter */
472 char *are; /* Start of STAR match */
473 l = line;
474 if (debug > 1)
475 printf("pmatch(\"%s\")\n", line);
476 p = pattern;
477 while ((op = *p++) != ENDPAT) {
478 if (debug > 1)
479 printf("byte[%d] = 0%o, '%c', op = 0%o\n",
480 (int)(l-line), *l, *l, op);
481 switch(op) {
482 case CHAR:
483 if (tolower(*l) != *p++)
484 return(0);
485 l++;
486 break;
487 case BOL:
488 if (l != lbuf)
489 return(0);
490 break;
491 case EOL:
492 if (*l != '\0')
493 return(0);
494 break;
495 case ANY:
496 if (*l++ == '\0')
497 return(0);
498 break;
499 case DIGIT:
500 if ((c = *l++) < '0' || (c > '9'))
501 return(0);
502 break;
503 case ALPHA:
504 c = tolower(*l);
505 l++;
506 if (c < 'a' || c > 'z')
507 return(0);
508 break;
509 case NALPHA:
510 c = tolower(*l);
511 l++;
512 if (c >= 'a' && c <= 'z')
513 break;
514 else if (c < '0' || c > '9')
515 return(0);
516 break;
517 case PUNCT:
518 c = *l++;
519 if (c == 0 || c > ' ')
520 return(0);
521 break;
522 case CLASS:
523 case NCLASS:
524 c = tolower(*l);
525 l++;
526 n = *p++ & 0377;
527 do {
528 if (*p == RANGE) {
529 p += 3;
530 n -= 2;
531 if (c >= p[-2] && c <= p[-1])
532 break;
534 else if (c == *p++)
535 break;
536 } while (--n > 1);
537 if ((op == CLASS) == (n <= 1))
538 return(0);
539 if (op == CLASS)
540 p += n - 2;
541 break;
542 case MINUS:
543 e = pmatch(l, p); /* Look for a match */
544 while (*p++ != ENDPAT); /* Skip over pattern */
545 if (e) /* Got a match? */
546 l = e; /* Yes, update string */
547 break; /* Always succeeds */
548 case PLUS: /* One or more ... */
549 if ((l = pmatch(l, p)) == 0)
550 return(0); /* Gotta have a match */
551 case STAR: /* Zero or more ... */
552 are = l; /* Remember line start */
553 while (*l && (e = pmatch(l, p)))
554 l = e; /* Get longest match */
555 while (*p++ != ENDPAT); /* Skip over pattern */
556 while (l >= are) { /* Try to match rest */
557 if ((e = pmatch(l, p)))
558 return(e);
559 --l; /* Nope, try earlier */
561 return(0); /* Nothing else worked */
562 default:
563 printf("Bad op code %d\n", op);
564 error("Cannot happen -- match\n");
567 return(l);
569 /*******************************************************/
570 void error(s)
571 char *s;
573 fprintf(stderr, "%s", s);
574 exit(1);