Add a comment.
[tinycc.git] / tests / tests2 / 46_grep.c
blob3123bc397c77399f07187475ffed97aafb56ece1
1 /*
2 * The information in this document is subject to change
3 * without notice and should not be construed as a commitment
4 * by Digital Equipment Corporation or by DECUS.
6 * Neither Digital Equipment Corporation, DECUS, nor the authors
7 * assume any responsibility for the use or reliability of this
8 * document or the described software.
10 * Copyright (C) 1980, DECUS
12 * General permission to copy or modify, but not for profit, is
13 * hereby granted, provided that the above copyright notice is
14 * included and reference made to the fact that reproduction
15 * privileges were granted by DECUS.
17 #include <stdio.h>
18 #include <stdlib.h>
21 * grep
23 * Runs on the Decus compiler or on vms, On vms, define as:
24 * grep :== "$disk:[account]grep" (native)
25 * grep :== "$disk:[account]grep grep" (Decus)
26 * See below for more information.
29 char *documentation[] = {
30 "grep searches a file for a given pattern. Execute by",
31 " grep [flags] regular_expression file_list\n",
32 "Flags are single characters preceded by '-':",
33 " -c Only a count of matching lines is printed",
34 " -f Print file name for matching lines switch, see below",
35 " -n Each line is preceded by its line number",
36 " -v Only print non-matching lines\n",
37 "The file_list is a list of files (wildcards are acceptable on RSX modes).",
38 "\nThe file name is normally printed if there is a file given.",
39 "The -f flag reverses this action (print name no file, not if more).\n",
40 0 };
42 char *patdoc[] = {
43 "The regular_expression defines the pattern to search for. Upper- and",
44 "lower-case are always ignored. Blank lines never match. The expression",
45 "should be quoted to prevent file-name translation.",
46 "x An ordinary character (not mentioned below) matches that character.",
47 "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
48 "'^' A circumflex at the beginning of an expression matches the",
49 " beginning of a line.",
50 "'$' A dollar-sign at the end of an expression matches the end of a line.",
51 "'.' A period matches any character except \"new-line\".",
52 "':a' A colon matches a class of characters described by the following",
53 "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
54 "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
55 "': ' other control characters, such as new-line.",
56 "'*' An expression followed by an asterisk matches zero or more",
57 " occurrences of that expression: \"fo*\" matches \"f\", \"fo\"",
58 " \"foo\", etc.",
59 "'+' An expression followed by a plus sign matches one or more",
60 " occurrences of that expression: \"fo+\" matches \"fo\", etc.",
61 "'-' An expression followed by a minus sign optionally matches",
62 " the expression.",
63 "'[]' A string enclosed in square brackets matches any character in",
64 " that string, but no others. If the first character in the",
65 " string is a circumflex, the expression matches any character",
66 " except \"new-line\" and the characters in the string. For",
67 " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
68 " matches \"abc\" but not \"axb\". A range of characters may be",
69 " specified by two characters separated by \"-\". Note that,",
70 " [a-z] matches alphabetics, while [z-a] never matches.",
71 "The concatenation of regular expressions is a regular expression.",
72 0};
74 #define LMAX 512
75 #define PMAX 256
77 #define CHAR 1
78 #define BOL 2
79 #define EOL 3
80 #define ANY 4
81 #define CLASS 5
82 #define NCLASS 6
83 #define STAR 7
84 #define PLUS 8
85 #define MINUS 9
86 #define ALPHA 10
87 #define DIGIT 11
88 #define NALPHA 12
89 #define PUNCT 13
90 #define RANGE 14
91 #define ENDPAT 15
93 int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0;
95 char *pp, lbuf[LMAX], pbuf[PMAX];
97 char *cclass();
98 char *pmatch();
99 void store(int);
100 void error(char *);
101 void badpat(char *, char *, char *);
102 int match(void);
105 /*** Display a file name *******************************/
106 void file(char *s)
108 printf("File %s:\n", s);
111 /*** Report unopenable file ****************************/
112 void cant(char *s)
114 fprintf(stderr, "%s: cannot open\n", s);
117 /*** Give good help ************************************/
118 void help(char **hp)
120 char **dp;
122 for (dp = hp; *dp; ++dp)
123 printf("%s\n", *dp);
126 /*** Display usage summary *****************************/
127 void usage(char *s)
129 fprintf(stderr, "?GREP-E-%s\n", s);
130 fprintf(stderr,
131 "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
132 exit(1);
135 /*** Compile the pattern into global pbuf[] ************/
136 void compile(char *source)
138 char *s; /* Source string pointer */
139 char *lp; /* Last pattern pointer */
140 int c; /* Current character */
141 int o; /* Temp */
142 char *spp; /* Save beginning of pattern */
144 s = source;
145 if (debug)
146 printf("Pattern = \"%s\"\n", s);
147 pp = pbuf;
148 while (c = *s++) {
150 * STAR, PLUS and MINUS are special.
152 if (c == '*' || c == '+' || c == '-') {
153 if (pp == pbuf ||
154 (o=pp[-1]) == BOL ||
155 o == EOL ||
156 o == STAR ||
157 o == PLUS ||
158 o == MINUS)
159 badpat("Illegal occurrence op.", source, s);
160 store(ENDPAT);
161 store(ENDPAT);
162 spp = pp; /* Save pattern end */
163 while (--pp > lp) /* Move pattern down */
164 *pp = pp[-1]; /* one byte */
165 *pp = (c == '*') ? STAR :
166 (c == '-') ? MINUS : PLUS;
167 pp = spp; /* Restore pattern end */
168 continue;
171 * All the rest.
173 lp = pp; /* Remember start */
174 switch(c) {
176 case '^':
177 store(BOL);
178 break;
180 case '$':
181 store(EOL);
182 break;
184 case '.':
185 store(ANY);
186 break;
188 case '[':
189 s = cclass(source, s);
190 break;
192 case ':':
193 if (*s) {
194 switch(tolower(c = *s++)) {
196 case 'a':
197 case 'A':
198 store(ALPHA);
199 break;
201 case 'd':
202 case 'D':
203 store(DIGIT);
204 break;
206 case 'n':
207 case 'N':
208 store(NALPHA);
209 break;
211 case ' ':
212 store(PUNCT);
213 break;
215 default:
216 badpat("Unknown : type", source, s);
219 break;
221 else badpat("No : type", source, s);
223 case '\\':
224 if (*s)
225 c = *s++;
227 default:
228 store(CHAR);
229 store(tolower(c));
232 store(ENDPAT);
233 store(0); /* Terminate string */
234 if (debug) {
235 for (lp = pbuf; lp < pp;) {
236 if ((c = (*lp++ & 0377)) < ' ')
237 printf("\\%o ", c);
238 else printf("%c ", c);
240 printf("\n");
244 /*** Compile a class (within []) ***********************/
245 char *cclass(char *source, char *src)
246 /* char *source; // Pattern start -- for error msg. */
247 /* char *src; // Class start */
249 char *s; /* Source pointer */
250 char *cp; /* Pattern start */
251 int c; /* Current character */
252 int o; /* Temp */
254 s = src;
255 o = CLASS;
256 if (*s == '^') {
257 ++s;
258 o = NCLASS;
260 store(o);
261 cp = pp;
262 store(0); /* Byte count */
263 while ((c = *s++) && c!=']') {
264 if (c == '\\') { /* Store quoted char */
265 if ((c = *s++) == '\0') /* Gotta get something */
266 badpat("Class terminates badly", source, s);
267 else store(tolower(c));
269 else if (c == '-' &&
270 (pp - cp) > 1 && *s != ']' && *s != '\0') {
271 c = pp[-1]; /* Range start */
272 pp[-1] = RANGE; /* Range signal */
273 store(c); /* Re-store start */
274 c = *s++; /* Get end char and*/
275 store(tolower(c)); /* Store it */
277 else {
278 store(tolower(c)); /* Store normal char */
281 if (c != ']')
282 badpat("Unterminated class", source, s);
283 if ((c = (pp - cp)) >= 256)
284 badpat("Class too large", source, s);
285 if (c == 0)
286 badpat("Empty class", source, s);
287 *cp = c;
288 return(s);
291 /*** Store an entry in the pattern buffer **************/
292 void store(int op)
294 if (pp >= &pbuf[PMAX])
295 error("Pattern too complex\n");
296 *pp++ = op;
299 /*** Report a bad pattern specification ****************/
300 void badpat(char *message, char *source, char *stop)
301 /* char *message; // Error message */
302 /* char *source; // Pattern start */
303 /* char *stop; // Pattern end */
305 fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
306 fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n",
307 stop-source, stop[-1]);
308 error("?GREP-E-Bad pattern\n");
311 /*** Scan the file for the pattern in pbuf[] ***********/
312 void grep(FILE *fp, char *fn)
313 /* FILE *fp; // File to process */
314 /* char *fn; // File name (for -f option) */
316 int lno, count, m;
318 lno = 0;
319 count = 0;
320 while (fgets(lbuf, LMAX, fp)) {
321 ++lno;
322 m = match();
323 if ((m && !vflag) || (!m && vflag)) {
324 ++count;
325 if (!cflag) {
326 if (fflag && fn) {
327 file(fn);
328 fn = 0;
330 if (nflag)
331 printf("%d\t", lno);
332 printf("%s\n", lbuf);
336 if (cflag) {
337 if (fflag && fn)
338 file(fn);
339 printf("%d\n", count);
343 /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
344 int match()
346 char *l; /* Line pointer */
348 for (l = lbuf; *l; ++l) {
349 if (pmatch(l, pbuf))
350 return(1);
352 return(0);
355 /*** Match partial line with pattern *******************/
356 char *pmatch(char *line, char *pattern)
357 /* char *line; // (partial) line to match */
358 /* char *pattern; // (partial) pattern to match */
360 char *l; /* Current line pointer */
361 char *p; /* Current pattern pointer */
362 char c; /* Current character */
363 char *e; /* End for STAR and PLUS match */
364 int op; /* Pattern operation */
365 int n; /* Class counter */
366 char *are; /* Start of STAR match */
368 l = line;
369 if (debug > 1)
370 printf("pmatch(\"%s\")\n", line);
371 p = pattern;
372 while ((op = *p++) != ENDPAT) {
373 if (debug > 1)
374 printf("byte[%ld] = 0%o, '%c', op = 0%o\n",
375 l-line, *l, *l, op);
376 switch(op) {
378 case CHAR:
379 if (tolower(*l++) != *p++)
380 return(0);
381 break;
383 case BOL:
384 if (l != lbuf)
385 return(0);
386 break;
388 case EOL:
389 if (*l != '\0')
390 return(0);
391 break;
393 case ANY:
394 if (*l++ == '\0')
395 return(0);
396 break;
398 case DIGIT:
399 if ((c = *l++) < '0' || (c > '9'))
400 return(0);
401 break;
403 case ALPHA:
404 c = tolower(*l++);
405 if (c < 'a' || c > 'z')
406 return(0);
407 break;
409 case NALPHA:
410 c = tolower(*l++);
411 if (c >= 'a' && c <= 'z')
412 break;
413 else if (c < '0' || c > '9')
414 return(0);
415 break;
417 case PUNCT:
418 c = *l++;
419 if (c == 0 || c > ' ')
420 return(0);
421 break;
423 case CLASS:
424 case NCLASS:
425 c = tolower(*l++);
426 n = *p++ & 0377;
427 do {
428 if (*p == RANGE) {
429 p += 3;
430 n -= 2;
431 if (c >= p[-2] && c <= p[-1])
432 break;
434 else if (c == *p++)
435 break;
436 } while (--n > 1);
437 if ((op == CLASS) == (n <= 1))
438 return(0);
439 if (op == CLASS)
440 p += n - 2;
441 break;
443 case MINUS:
444 e = pmatch(l, p); /* Look for a match */
445 while (*p++ != ENDPAT); /* Skip over pattern */
446 if (e) /* Got a match? */
447 l = e; /* Yes, update string */
448 break; /* Always succeeds */
450 case PLUS: /* One or more ... */
451 if ((l = pmatch(l, p)) == 0)
452 return(0); /* Gotta have a match */
453 case STAR: /* Zero or more ... */
454 are = l; /* Remember line start */
455 while (*l && (e = pmatch(l, p)))
456 l = e; /* Get longest match */
457 while (*p++ != ENDPAT); /* Skip over pattern */
458 while (l >= are) { /* Try to match rest */
459 if (e = pmatch(l, p))
460 return(e);
461 --l; /* Nope, try earlier */
463 return(0); /* Nothing else worked */
465 default:
466 printf("Bad op code %d\n", op);
467 error("Cannot happen -- match\n");
470 return(l);
473 /*** Report an error ***********************************/
474 void error(char *s)
476 fprintf(stderr, "%s", s);
477 exit(1);
480 /*** Main program - parse arguments & grep *************/
481 int main(int argc, char **argv)
483 char *p;
484 int c, i;
485 int gotpattern;
487 FILE *f;
489 if (argc <= 1)
490 usage("No arguments");
491 if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
492 help(documentation);
493 help(patdoc);
494 return 0;
496 nfile = argc-1;
497 gotpattern = 0;
498 for (i=1; i < argc; ++i) {
499 p = argv[i];
500 if (*p == '-') {
501 ++p;
502 while (c = *p++) {
503 switch(tolower(c)) {
505 case '?':
506 help(documentation);
507 break;
509 case 'C':
510 case 'c':
511 ++cflag;
512 break;
514 case 'D':
515 case 'd':
516 ++debug;
517 break;
519 case 'F':
520 case 'f':
521 ++fflag;
522 break;
524 case 'n':
525 case 'N':
526 ++nflag;
527 break;
529 case 'v':
530 case 'V':
531 ++vflag;
532 break;
534 default:
535 usage("Unknown flag");
538 argv[i] = 0;
539 --nfile;
540 } else if (!gotpattern) {
541 compile(p);
542 argv[i] = 0;
543 ++gotpattern;
544 --nfile;
547 if (!gotpattern)
548 usage("No pattern");
549 if (nfile == 0)
550 grep(stdin, 0);
551 else {
552 fflag = fflag ^ (nfile > 0);
553 for (i=1; i < argc; ++i) {
554 if (p = argv[i]) {
555 if ((f=fopen(p, "r")) == NULL)
556 cant(p);
557 else {
558 grep(f, p);
559 fclose(f);
564 return 0;
567 /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/