configure: cleanup
[tinycc.git] / tests / tests2 / 46_grep.c
blob5f52220ad4e3ec053ded7238cd156cdae93727a3
1 /*
2 * The information in this document is subject to change
3 * without notice and should not be construed as a commitment
4 * by Digital Equipment Corporation or by DECUS.
6 * Neither Digital Equipment Corporation, DECUS, nor the authors
7 * assume any responsibility for the use or reliability of this
8 * document or the described software.
10 * Copyright (C) 1980, DECUS
12 * General permission to copy or modify, but not for profit, is
13 * hereby granted, provided that the above copyright notice is
14 * included and reference made to the fact that reproduction
15 * privileges were granted by DECUS.
17 #include <stdio.h>
20 * grep
22 * Runs on the Decus compiler or on vms, On vms, define as:
23 * grep :== "$disk:[account]grep" (native)
24 * grep :== "$disk:[account]grep grep" (Decus)
25 * See below for more information.
28 #if 0
29 char *documentation[] = {
30 "grep searches a file for a given pattern. Execute by",
31 " grep [flags] regular_expression file_list\n",
32 "Flags are single characters preceeded by '-':",
33 " -c Only a count of matching lines is printed",
34 " -f Print file name for matching lines switch, see below",
35 " -n Each line is preceeded by its line number",
36 " -v Only print non-matching lines\n",
37 "The file_list is a list of files (wildcards are acceptable on RSX modes).",
38 "\nThe file name is normally printed if there is a file given.",
39 "The -f flag reverses this action (print name no file, not if more).\n",
40 0 };
42 char *patdoc[] = {
43 "The regular_expression defines the pattern to search for. Upper- and",
44 "lower-case are always ignored. Blank lines never match. The expression",
45 "should be quoted to prevent file-name translation.",
46 "x An ordinary character (not mentioned below) matches that character.",
47 "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
48 "'^' A circumflex at the beginning of an expression matches the",
49 " beginning of a line.",
50 "'$' A dollar-sign at the end of an expression matches the end of a line.",
51 "'.' A period matches any character except \"new-line\".",
52 "':a' A colon matches a class of characters described by the following",
53 "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
54 "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
55 "': ' other control characters, such as new-line.",
56 "'*' An expression followed by an asterisk matches zero or more",
57 " occurrances of that expression: \"fo*\" matches \"f\", \"fo\"",
58 " \"foo\", etc.",
59 "'+' An expression followed by a plus sign matches one or more",
60 " occurrances of that expression: \"fo+\" matches \"fo\", etc.",
61 "'-' An expression followed by a minus sign optionally matches",
62 " the expression.",
63 "'[]' A string enclosed in square brackets matches any character in",
64 " that string, but no others. If the first character in the",
65 " string is a circumflex, the expression matches any character",
66 " except \"new-line\" and the characters in the string. For",
67 " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
68 " matches \"abc\" but not \"axb\". A range of characters may be",
69 " specified by two characters separated by \"-\". Note that,",
70 " [a-z] matches alphabetics, while [z-a] never matches.",
71 "The concatenation of regular expressions is a regular expression.",
72 0};
73 #endif
75 #define LMAX 512
76 #define PMAX 256
78 #define CHAR 1
79 #define BOL 2
80 #define EOL 3
81 #define ANY 4
82 #define CLASS 5
83 #define NCLASS 6
84 #define STAR 7
85 #define PLUS 8
86 #define MINUS 9
87 #define ALPHA 10
88 #define DIGIT 11
89 #define NALPHA 12
90 #define PUNCT 13
91 #define RANGE 14
92 #define ENDPAT 15
94 int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0;
96 char *pp, lbuf[LMAX], pbuf[PMAX];
98 char *cclass();
99 char *pmatch();
102 /*** Display a file name *******************************/
103 void file(char *s)
105 printf("File %s:\n", s);
108 /*** Report unopenable file ****************************/
109 void cant(char *s)
111 fprintf(stderr, "%s: cannot open\n", s);
114 /*** Give good help ************************************/
115 void help(char **hp)
117 char **dp;
119 for (dp = hp; *dp; ++dp)
120 printf("%s\n", *dp);
123 /*** Display usage summary *****************************/
124 void usage(char *s)
126 fprintf(stderr, "?GREP-E-%s\n", s);
127 fprintf(stderr,
128 "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
129 exit(1);
132 /*** Compile the pattern into global pbuf[] ************/
133 void compile(char *source)
135 char *s; /* Source string pointer */
136 char *lp; /* Last pattern pointer */
137 int c; /* Current character */
138 int o; /* Temp */
139 char *spp; /* Save beginning of pattern */
141 s = source;
142 if (debug)
143 printf("Pattern = \"%s\"\n", s);
144 pp = pbuf;
145 while (c = *s++) {
147 * STAR, PLUS and MINUS are special.
149 if (c == '*' || c == '+' || c == '-') {
150 if (pp == pbuf ||
151 (o=pp[-1]) == BOL ||
152 o == EOL ||
153 o == STAR ||
154 o == PLUS ||
155 o == MINUS)
156 badpat("Illegal occurrance op.", source, s);
157 store(ENDPAT);
158 store(ENDPAT);
159 spp = pp; /* Save pattern end */
160 while (--pp > lp) /* Move pattern down */
161 *pp = pp[-1]; /* one byte */
162 *pp = (c == '*') ? STAR :
163 (c == '-') ? MINUS : PLUS;
164 pp = spp; /* Restore pattern end */
165 continue;
168 * All the rest.
170 lp = pp; /* Remember start */
171 switch(c) {
173 case '^':
174 store(BOL);
175 break;
177 case '$':
178 store(EOL);
179 break;
181 case '.':
182 store(ANY);
183 break;
185 case '[':
186 s = cclass(source, s);
187 break;
189 case ':':
190 if (*s) {
191 switch(tolower(c = *s++)) {
193 case 'a':
194 case 'A':
195 store(ALPHA);
196 break;
198 case 'd':
199 case 'D':
200 store(DIGIT);
201 break;
203 case 'n':
204 case 'N':
205 store(NALPHA);
206 break;
208 case ' ':
209 store(PUNCT);
210 break;
212 default:
213 badpat("Unknown : type", source, s);
216 break;
218 else badpat("No : type", source, s);
220 case '\\':
221 if (*s)
222 c = *s++;
224 default:
225 store(CHAR);
226 store(tolower(c));
229 store(ENDPAT);
230 store(0); /* Terminate string */
231 if (debug) {
232 for (lp = pbuf; lp < pp;) {
233 if ((c = (*lp++ & 0377)) < ' ')
234 printf("\\%o ", c);
235 else printf("%c ", c);
237 printf("\n");
241 /*** Compile a class (within []) ***********************/
242 char *cclass(char *source, char *src)
243 /* char *source; // Pattern start -- for error msg. */
244 /* char *src; // Class start */
246 char *s; /* Source pointer */
247 char *cp; /* Pattern start */
248 int c; /* Current character */
249 int o; /* Temp */
251 s = src;
252 o = CLASS;
253 if (*s == '^') {
254 ++s;
255 o = NCLASS;
257 store(o);
258 cp = pp;
259 store(0); /* Byte count */
260 while ((c = *s++) && c!=']') {
261 if (c == '\\') { /* Store quoted char */
262 if ((c = *s++) == '\0') /* Gotta get something */
263 badpat("Class terminates badly", source, s);
264 else store(tolower(c));
266 else if (c == '-' &&
267 (pp - cp) > 1 && *s != ']' && *s != '\0') {
268 c = pp[-1]; /* Range start */
269 pp[-1] = RANGE; /* Range signal */
270 store(c); /* Re-store start */
271 c = *s++; /* Get end char and*/
272 store(tolower(c)); /* Store it */
274 else {
275 store(tolower(c)); /* Store normal char */
278 if (c != ']')
279 badpat("Unterminated class", source, s);
280 if ((c = (pp - cp)) >= 256)
281 badpat("Class too large", source, s);
282 if (c == 0)
283 badpat("Empty class", source, s);
284 *cp = c;
285 return(s);
288 /*** Store an entry in the pattern buffer **************/
289 void store(int op)
291 if (pp >= &pbuf[PMAX])
292 error("Pattern too complex\n");
293 *pp++ = op;
296 /*** Report a bad pattern specification ****************/
297 void badpat(char *message, char *source, char *stop)
298 /* char *message; // Error message */
299 /* char *source; // Pattern start */
300 /* char *stop; // Pattern end */
302 fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
303 fprintf(stderr, "-GREP-E-Stopped at byte %d, '%c'\n",
304 stop-source, stop[-1]);
305 error("?GREP-E-Bad pattern\n");
308 /*** Scan the file for the pattern in pbuf[] ***********/
309 void grep(FILE *fp, char *fn)
310 /* FILE *fp; // File to process */
311 /* char *fn; // File name (for -f option) */
313 int lno, count, m;
315 lno = 0;
316 count = 0;
317 while (fgets(lbuf, LMAX, fp)) {
318 ++lno;
319 m = match();
320 if ((m && !vflag) || (!m && vflag)) {
321 ++count;
322 if (!cflag) {
323 if (fflag && fn) {
324 file(fn);
325 fn = 0;
327 if (nflag)
328 printf("%d\t", lno);
329 printf("%s\n", lbuf);
333 if (cflag) {
334 if (fflag && fn)
335 file(fn);
336 printf("%d\n", count);
340 /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
341 void match()
343 char *l; /* Line pointer */
345 for (l = lbuf; *l; ++l) {
346 if (pmatch(l, pbuf))
347 return(1);
349 return(0);
352 /*** Match partial line with pattern *******************/
353 char *pmatch(char *line, char *pattern)
354 /* char *line; // (partial) line to match */
355 /* char *pattern; // (partial) pattern to match */
357 char *l; /* Current line pointer */
358 char *p; /* Current pattern pointer */
359 char c; /* Current character */
360 char *e; /* End for STAR and PLUS match */
361 int op; /* Pattern operation */
362 int n; /* Class counter */
363 char *are; /* Start of STAR match */
365 l = line;
366 if (debug > 1)
367 printf("pmatch(\"%s\")\n", line);
368 p = pattern;
369 while ((op = *p++) != ENDPAT) {
370 if (debug > 1)
371 printf("byte[%d] = 0%o, '%c', op = 0%o\n",
372 l-line, *l, *l, op);
373 switch(op) {
375 case CHAR:
376 if (tolower(*l++) != *p++)
377 return(0);
378 break;
380 case BOL:
381 if (l != lbuf)
382 return(0);
383 break;
385 case EOL:
386 if (*l != '\0')
387 return(0);
388 break;
390 case ANY:
391 if (*l++ == '\0')
392 return(0);
393 break;
395 case DIGIT:
396 if ((c = *l++) < '0' || (c > '9'))
397 return(0);
398 break;
400 case ALPHA:
401 c = tolower(*l++);
402 if (c < 'a' || c > 'z')
403 return(0);
404 break;
406 case NALPHA:
407 c = tolower(*l++);
408 if (c >= 'a' && c <= 'z')
409 break;
410 else if (c < '0' || c > '9')
411 return(0);
412 break;
414 case PUNCT:
415 c = *l++;
416 if (c == 0 || c > ' ')
417 return(0);
418 break;
420 case CLASS:
421 case NCLASS:
422 c = tolower(*l++);
423 n = *p++ & 0377;
424 do {
425 if (*p == RANGE) {
426 p += 3;
427 n -= 2;
428 if (c >= p[-2] && c <= p[-1])
429 break;
431 else if (c == *p++)
432 break;
433 } while (--n > 1);
434 if ((op == CLASS) == (n <= 1))
435 return(0);
436 if (op == CLASS)
437 p += n - 2;
438 break;
440 case MINUS:
441 e = pmatch(l, p); /* Look for a match */
442 while (*p++ != ENDPAT); /* Skip over pattern */
443 if (e) /* Got a match? */
444 l = e; /* Yes, update string */
445 break; /* Always succeeds */
447 case PLUS: /* One or more ... */
448 if ((l = pmatch(l, p)) == 0)
449 return(0); /* Gotta have a match */
450 case STAR: /* Zero or more ... */
451 are = l; /* Remember line start */
452 while (*l && (e = pmatch(l, p)))
453 l = e; /* Get longest match */
454 while (*p++ != ENDPAT); /* Skip over pattern */
455 while (l >= are) { /* Try to match rest */
456 if (e = pmatch(l, p))
457 return(e);
458 --l; /* Nope, try earlier */
460 return(0); /* Nothing else worked */
462 default:
463 printf("Bad op code %d\n", op);
464 error("Cannot happen -- match\n");
467 return(l);
470 /*** Report an error ***********************************/
471 void error(char *s)
473 fprintf(stderr, "%s", s);
474 exit(1);
477 /*** Main program - parse arguments & grep *************/
478 int main(int argc, char **argv)
480 char *p;
481 int c, i;
482 int gotpattern;
484 FILE *f;
486 if (argc <= 1)
487 usage("No arguments");
488 if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
489 help(documentation);
490 help(patdoc);
491 return 0;
493 nfile = argc-1;
494 gotpattern = 0;
495 for (i=1; i < argc; ++i) {
496 p = argv[i];
497 if (*p == '-') {
498 ++p;
499 while (c = *p++) {
500 switch(tolower(c)) {
502 case '?':
503 help(documentation);
504 break;
506 case 'C':
507 case 'c':
508 ++cflag;
509 break;
511 case 'D':
512 case 'd':
513 ++debug;
514 break;
516 case 'F':
517 case 'f':
518 ++fflag;
519 break;
521 case 'n':
522 case 'N':
523 ++nflag;
524 break;
526 case 'v':
527 case 'V':
528 ++vflag;
529 break;
531 default:
532 usage("Unknown flag");
535 argv[i] = 0;
536 --nfile;
537 } else if (!gotpattern) {
538 compile(p);
539 argv[i] = 0;
540 ++gotpattern;
541 --nfile;
544 if (!gotpattern)
545 usage("No pattern");
546 if (nfile == 0)
547 grep(stdin, 0);
548 else {
549 fflag = fflag ^ (nfile > 0);
550 for (i=1; i < argc; ++i) {
551 if (p = argv[i]) {
552 if ((f=fopen(p, "r")) == NULL)
553 cant(p);
554 else {
555 grep(f, p);
556 fclose(f);
561 return 0;
564 /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/