9088 fgrep and egrep don't need to have separate man pages
[unleashed.git] / usr / src / cmd / grep / grep.c
blobeabd465cca1a3d43f3f3f8da9de20e4753bd386b
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * grep - pattern matching program - combined grep, egrep, and fgrep.
29 * Based on MKS grep command, with XCU & Solaris mods.
33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
38 * Copyright 2018 Nexenta Systems, Inc.
39 * Copyright 2013 Damian Bogel. All rights reserved.
42 #include <string.h>
43 #include <stdlib.h>
44 #include <ctype.h>
45 #include <stdarg.h>
46 #include <regex.h>
47 #include <limits.h>
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <fcntl.h>
51 #include <stdio.h>
52 #include <locale.h>
53 #include <wchar.h>
54 #include <errno.h>
55 #include <unistd.h>
56 #include <wctype.h>
57 #include <ftw.h>
58 #include <sys/param.h>
60 #define STDIN_FILENAME gettext("(standard input)")
62 #define BSIZE 512 /* Size of block for -b */
63 #define BUFSIZE 8192 /* Input buffer size */
64 #define MAX_DEPTH 1000 /* how deep to recurse */
66 #define AFTER 1 /* 'After' Context */
67 #define BEFORE 2 /* 'Before' Context */
68 #define CONTEXT (AFTER|BEFORE) /* Full Context */
70 #define M_CSETSIZE 256 /* singlebyte chars */
71 static int bmglen; /* length of BMG pattern */
72 static char *bmgpat; /* BMG pattern */
73 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
75 typedef struct _PATTERN {
76 char *pattern; /* original pattern */
77 wchar_t *wpattern; /* wide, lowercased pattern */
78 struct _PATTERN *next;
79 regex_t re; /* compiled pattern */
80 } PATTERN;
82 static PATTERN *patterns;
83 static char errstr[128]; /* regerror string buffer */
84 static int regflags = 0; /* regcomp options */
85 static int matched = 0; /* return of the grep() */
86 static int errors = 0; /* count of errors */
87 static uchar_t fgrep = 0; /* Invoked as fgrep */
88 static uchar_t egrep = 0; /* Invoked as egrep */
89 static boolean_t nvflag = B_TRUE; /* Print matching lines */
90 static uchar_t cflag; /* Count of matches */
91 static uchar_t iflag; /* Case insensitve matching */
92 static uchar_t Hflag; /* Precede lines by file name */
93 static uchar_t hflag; /* Supress printing of filename */
94 static uchar_t lflag; /* Print file names of matches */
95 static uchar_t nflag; /* Precede lines by line number */
96 static uchar_t rflag; /* Search directories recursively */
97 static uchar_t bflag; /* Preccede matches by block number */
98 static uchar_t sflag; /* Suppress file error messages */
99 static uchar_t qflag; /* Suppress standard output */
100 static uchar_t wflag; /* Search for expression as a word */
101 static uchar_t xflag; /* Anchoring */
102 static uchar_t Eflag; /* Egrep or -E flag */
103 static uchar_t Fflag; /* Fgrep or -F flag */
104 static uchar_t Rflag; /* Like rflag, but follow symlinks */
105 static uchar_t outfn; /* Put out file name */
106 static uchar_t conflag; /* show context of matches */
107 static char *cmdname;
109 static int use_wchar, use_bmg, mblocale;
111 static size_t outbuflen, prntbuflen, conbuflen;
112 static unsigned long conalen, conblen, conmatches;
113 static char *prntbuf, *conbuf;
114 static wchar_t *outline;
116 static void addfile(const char *fn);
117 static void addpattern(char *s);
118 static void fixpatterns(void);
119 static void usage(void);
120 static int grep(int, const char *);
121 static void bmgcomp(char *, int);
122 static char *bmgexec(char *, char *);
123 static int recursive(const char *, const struct stat *, int, struct FTW *);
124 static void process_path(const char *);
125 static void process_file(const char *, int);
128 * mainline for grep
131 main(int argc, char **argv)
133 char *ap, *test;
134 int c;
135 int fflag = 0;
136 int i, n_pattern = 0, n_file = 0;
137 char **pattern_list = NULL;
138 char **file_list = NULL;
140 (void) setlocale(LC_ALL, "");
141 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
142 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
143 #endif
144 (void) textdomain(TEXT_DOMAIN);
147 * true if this is running on the multibyte locale
149 mblocale = (MB_CUR_MAX > 1);
151 * Skip leading slashes
153 cmdname = argv[0];
154 if (ap = strrchr(cmdname, '/'))
155 cmdname = ap + 1;
157 ap = cmdname;
159 * Detect egrep/fgrep via command name, map to -E and -F options.
161 if (*ap == 'e' || *ap == 'E') {
162 regflags |= REG_EXTENDED;
163 egrep++;
164 } else {
165 if (*ap == 'f' || *ap == 'F') {
166 fgrep++;
170 /* check for non-standard "-line-count" option */
171 for (i = 1; i < argc; i++) {
172 if (strcmp(argv[i], "--") == 0)
173 break;
175 /* isdigit() check prevents negative arguments */
176 if ((argv[i][0] == '-') && isdigit(argv[i][1])) {
177 if (strlen(&argv[i][1]) !=
178 strspn(&argv[i][1], "0123456789")) {
179 (void) fprintf(stderr, gettext(
180 "%s: Bad number flag\n"), argv[0]);
181 usage();
184 errno = 0;
185 conalen = conblen = strtoul(&argv[i][1], (char **)NULL,
186 10);
188 if (errno != 0 || conalen >= ULONG_MAX) {
189 (void) fprintf(stderr, gettext(
190 "%s: Bad context argument\n"), argv[0]);
191 } else if (conalen)
192 conflag = CONTEXT;
194 while (i < argc) {
195 argv[i] = argv[i + 1];
196 i++;
198 argc--;
202 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIRA:B:C:")) != EOF) {
203 unsigned long tval;
204 switch (c) {
205 case 'v': /* POSIX: negate matches */
206 nvflag = B_FALSE;
207 break;
209 case 'c': /* POSIX: write count */
210 cflag++;
211 break;
213 case 'i': /* POSIX: ignore case */
214 iflag++;
215 regflags |= REG_ICASE;
216 break;
218 case 'l': /* POSIX: Write filenames only */
219 lflag++;
220 break;
222 case 'n': /* POSIX: Write line numbers */
223 nflag++;
224 break;
226 case 'r': /* Solaris: search recursively */
227 rflag++;
228 break;
230 case 'b': /* Solaris: Write file block numbers */
231 bflag++;
232 break;
234 case 's': /* POSIX: No error msgs for files */
235 sflag++;
236 break;
238 case 'e': /* POSIX: pattern list */
239 n_pattern++;
240 pattern_list = realloc(pattern_list,
241 sizeof (char *) * n_pattern);
242 if (pattern_list == NULL) {
243 (void) fprintf(stderr,
244 gettext("%s: out of memory\n"),
245 cmdname);
246 exit(2);
248 *(pattern_list + n_pattern - 1) = optarg;
249 break;
251 case 'f': /* POSIX: pattern file */
252 fflag = 1;
253 n_file++;
254 file_list = realloc(file_list,
255 sizeof (char *) * n_file);
256 if (file_list == NULL) {
257 (void) fprintf(stderr,
258 gettext("%s: out of memory\n"),
259 cmdname);
260 exit(2);
262 *(file_list + n_file - 1) = optarg;
263 break;
265 /* based on options order h or H is set as in GNU grep */
266 case 'h': /* Solaris: supress printing of file name */
267 hflag = 1;
268 Hflag = 0;
269 break;
270 /* Solaris: precede every matching with file name */
271 case 'H':
272 Hflag = 1;
273 hflag = 0;
274 break;
276 case 'q': /* POSIX: quiet: status only */
277 qflag++;
278 break;
280 case 'w': /* Solaris: treat pattern as word */
281 wflag++;
282 break;
284 case 'x': /* POSIX: full line matches */
285 xflag++;
286 break;
288 case 'E': /* POSIX: Extended RE's */
289 regflags |= REG_EXTENDED;
290 Eflag++;
291 break;
293 case 'F': /* POSIX: strings, not RE's */
294 Fflag++;
295 break;
297 case 'R': /* Solaris: like rflag, but follow symlinks */
298 Rflag++;
299 rflag++;
300 break;
302 case 'A': /* print N lines after each match */
303 errno = 0;
304 conalen = strtoul(optarg, &test, 10);
305 /* *test will be non-null if optarg is negative */
306 if (errno != 0 || *test != '\0' ||
307 conalen >= ULONG_MAX) {
308 (void) fprintf(stderr, gettext(
309 "%s: Bad context argument: %s\n"),
310 argv[0], optarg);
311 exit(2);
313 if (conalen)
314 conflag |= AFTER;
315 else
316 conflag &= ~AFTER;
317 break;
318 case 'B': /* print N lines before each match */
319 errno = 0;
320 conblen = strtoul(optarg, &test, 10);
321 /* *test will be non-null if optarg is negative */
322 if (errno != 0 || *test != '\0' ||
323 conblen >= ULONG_MAX) {
324 (void) fprintf(stderr, gettext(
325 "%s: Bad context argument: %s\n"),
326 argv[0], optarg);
327 exit(2);
329 if (conblen)
330 conflag |= BEFORE;
331 else
332 conflag &= ~BEFORE;
333 break;
334 case 'C': /* print N lines around each match */
335 errno = 0;
336 tval = strtoul(optarg, &test, 10);
337 /* *test will be non-null if optarg is negative */
338 if (errno != 0 || *test != '\0' || tval >= ULONG_MAX) {
339 (void) fprintf(stderr, gettext(
340 "%s: Bad context argument: %s\n"),
341 argv[0], optarg);
342 exit(2);
344 if (tval) {
345 if ((conflag & BEFORE) == 0)
346 conblen = tval;
347 if ((conflag & AFTER) == 0)
348 conalen = tval;
349 conflag = CONTEXT;
351 break;
353 default:
354 usage();
358 * If we're invoked as egrep or fgrep we need to do some checks
361 if (egrep || fgrep) {
363 * Use of -E or -F with egrep or fgrep is illegal
365 if (Eflag || Fflag)
366 usage();
368 * Don't allow use of wflag with egrep / fgrep
370 if (wflag)
371 usage();
373 * For Solaris the -s flag is equivalent to XCU -q
375 if (sflag)
376 qflag++;
378 * done with above checks - set the appropriate flags
380 if (egrep)
381 Eflag++;
382 else /* Else fgrep */
383 Fflag++;
386 if (wflag && (Eflag || Fflag)) {
388 * -w cannot be specified with grep -F
390 usage();
394 * -E and -F flags are mutually exclusive - check for this
396 if (Eflag && Fflag)
397 usage();
400 * -l overrides -H like in GNU grep
402 if (lflag)
403 Hflag = 0;
406 * -c, -l and -q flags are mutually exclusive
407 * We have -c override -l like in Solaris.
408 * -q overrides -l & -c programmatically in grep() function.
410 if (cflag && lflag)
411 lflag = 0;
413 argv += optind - 1;
414 argc -= optind - 1;
417 * Now handling -e and -f option
419 if (pattern_list) {
420 for (i = 0; i < n_pattern; i++) {
421 addpattern(pattern_list[i]);
423 free(pattern_list);
425 if (file_list) {
426 for (i = 0; i < n_file; i++) {
427 addfile(file_list[i]);
429 free(file_list);
433 * No -e or -f? Make sure there is one more arg, use it as the pattern.
435 if (patterns == NULL && !fflag) {
436 if (argc < 2)
437 usage();
438 addpattern(argv[1]);
439 argc--;
440 argv++;
444 * If -x flag is not specified or -i flag is specified
445 * with fgrep in a multibyte locale, need to use
446 * the wide character APIs. Otherwise, byte-oriented
447 * process will be done.
449 use_wchar = Fflag && mblocale && (!xflag || iflag);
452 * Compile Patterns and also decide if BMG can be used
454 fixpatterns();
456 /* Process all files: stdin, or rest of arg list */
457 if (argc < 2) {
458 matched = grep(0, STDIN_FILENAME);
459 } else {
460 if (Hflag || (argc > 2 && hflag == 0))
461 outfn = 1; /* Print filename on match line */
462 for (argv++; *argv != NULL; argv++) {
463 process_path(*argv);
467 * Return() here is used instead of exit
470 (void) fflush(stdout);
472 if (errors)
473 return (2);
474 return (matched ? 0 : 1);
477 static void
478 process_path(const char *path)
480 struct stat st;
481 int walkflags = FTW_CHDIR;
482 char *buf = NULL;
484 if (rflag) {
485 if (stat(path, &st) != -1 &&
486 (st.st_mode & S_IFMT) == S_IFDIR) {
487 outfn = 1; /* Print filename */
490 * Add trailing slash if arg
491 * is directory, to resolve symlinks.
493 if (path[strlen(path) - 1] != '/') {
494 (void) asprintf(&buf, "%s/", path);
495 if (buf != NULL)
496 path = buf;
500 * Search through subdirs if path is directory.
501 * Don't follow symlinks if Rflag is not set.
503 if (!Rflag)
504 walkflags |= FTW_PHYS;
506 if (nftw(path, recursive, MAX_DEPTH, walkflags) != 0) {
507 if (!sflag)
508 (void) fprintf(stderr,
509 gettext("%s: can't open \"%s\"\n"),
510 cmdname, path);
511 errors = 1;
513 return;
516 process_file(path, 0);
520 * Read and process all files in directory recursively.
522 static int
523 recursive(const char *name, const struct stat *statp, int info, struct FTW *ftw)
526 * Process files and follow symlinks if Rflag set.
528 if (info != FTW_F) {
529 /* Report broken symlinks and unreadable files */
530 if (!sflag &&
531 (info == FTW_SLN || info == FTW_DNR || info == FTW_NS)) {
532 (void) fprintf(stderr,
533 gettext("%s: can't open \"%s\"\n"), cmdname, name);
535 return (0);
539 /* Skip devices and pipes if Rflag is not set */
540 if (!Rflag && !S_ISREG(statp->st_mode))
541 return (0);
542 /* Pass offset to relative name from FTW_CHDIR */
543 process_file(name, ftw->base);
544 return (0);
548 * Opens file and call grep function.
550 static void
551 process_file(const char *name, int base)
553 int fd;
555 if ((fd = open(name + base, O_RDONLY)) == -1) {
556 errors = 1;
557 if (!sflag) /* Silent mode */
558 (void) fprintf(stderr, gettext(
559 "%s: can't open \"%s\"\n"),
560 cmdname, name);
561 return;
563 matched |= grep(fd, name);
564 (void) close(fd);
566 if (ferror(stdout)) {
567 (void) fprintf(stderr, gettext(
568 "%s: error writing to stdout\n"),
569 cmdname);
570 (void) fflush(stdout);
571 exit(2);
577 * Add a file of strings to the pattern list.
579 static void
580 addfile(const char *fn)
582 FILE *fp;
583 char *inbuf;
584 char *bufp;
585 size_t bufsiz, buflen, bufused;
588 * Open the pattern file
590 if ((fp = fopen(fn, "r")) == NULL) {
591 (void) fprintf(stderr, gettext("%s: can't open \"%s\"\n"),
592 cmdname, fn);
593 exit(2);
595 bufsiz = BUFSIZE;
596 if ((inbuf = malloc(bufsiz)) == NULL) {
597 (void) fprintf(stderr,
598 gettext("%s: out of memory\n"), cmdname);
599 exit(2);
601 bufp = inbuf;
602 bufused = 0;
604 * Read in the file, reallocing as we need more memory
606 while (fgets(bufp, bufsiz - bufused, fp) != NULL) {
607 buflen = strlen(bufp);
608 bufused += buflen;
609 if (bufused + 1 == bufsiz && bufp[buflen - 1] != '\n') {
611 * if this line does not fit to the buffer,
612 * realloc larger buffer
614 bufsiz += BUFSIZE;
615 if ((inbuf = realloc(inbuf, bufsiz)) == NULL) {
616 (void) fprintf(stderr,
617 gettext("%s: out of memory\n"),
618 cmdname);
619 exit(2);
621 bufp = inbuf + bufused;
622 continue;
624 if (bufp[buflen - 1] == '\n') {
625 bufp[--buflen] = '\0';
627 addpattern(inbuf);
629 bufp = inbuf;
630 bufused = 0;
632 free(inbuf);
633 free(prntbuf);
634 free(conbuf);
635 (void) fclose(fp);
639 * Add a string to the pattern list.
641 static void
642 addpattern(char *s)
644 PATTERN *pp;
645 char *wordbuf;
646 char *np;
648 for (; ; ) {
649 np = strchr(s, '\n');
650 if (np != NULL)
651 *np = '\0';
652 if ((pp = malloc(sizeof (PATTERN))) == NULL) {
653 (void) fprintf(stderr, gettext(
654 "%s: out of memory\n"),
655 cmdname);
656 exit(2);
658 if (wflag) {
660 * Solaris wflag support: Add '<' '>' to pattern to
661 * select it as a word. Doesn't make sense with -F
662 * but we're Libertarian.
664 size_t slen, wordlen;
666 slen = strlen(s);
667 wordlen = slen + 5; /* '\\' '<' s '\\' '>' '\0' */
668 if ((wordbuf = malloc(wordlen)) == NULL) {
669 (void) fprintf(stderr,
670 gettext("%s: out of memory\n"),
671 cmdname);
672 exit(2);
674 (void) strcpy(wordbuf, "\\<");
675 (void) strcpy(wordbuf + 2, s);
676 (void) strcpy(wordbuf + 2 + slen, "\\>");
677 } else {
678 if ((wordbuf = strdup(s)) == NULL) {
679 (void) fprintf(stderr,
680 gettext("%s: out of memory\n"),
681 cmdname);
682 exit(2);
685 pp->pattern = wordbuf;
686 pp->next = patterns;
687 patterns = pp;
688 if (np == NULL)
689 break;
690 s = np + 1;
695 * Fix patterns.
696 * Must do after all arguments read, in case later -i option.
698 static void
699 fixpatterns(void)
701 PATTERN *pp;
702 int rv, fix_pattern, npatterns;
705 * Fix the specified pattern if -x is specified.
707 fix_pattern = !Fflag && xflag;
709 for (npatterns = 0, pp = patterns; pp != NULL; pp = pp->next) {
710 npatterns++;
711 if (fix_pattern) {
712 char *cp, *cq;
713 size_t plen, nplen;
715 plen = strlen(pp->pattern);
716 /* '^' pattern '$' */
717 nplen = 1 + plen + 1 + 1;
718 if ((cp = malloc(nplen)) == NULL) {
719 (void) fprintf(stderr,
720 gettext("%s: out of memory\n"),
721 cmdname);
722 exit(2);
724 cq = cp;
725 *cq++ = '^';
726 cq = strcpy(cq, pp->pattern) + plen;
727 *cq++ = '$';
728 *cq = '\0';
729 free(pp->pattern);
730 pp->pattern = cp;
733 if (Fflag) {
734 if (use_wchar) {
736 * Fflag && mblocale && iflag
737 * Fflag && mblocale && !xflag
739 size_t n;
740 n = strlen(pp->pattern) + 1;
741 if ((pp->wpattern =
742 malloc(sizeof (wchar_t) * n)) == NULL) {
743 (void) fprintf(stderr,
744 gettext("%s: out of memory\n"),
745 cmdname);
746 exit(2);
748 if (mbstowcs(pp->wpattern, pp->pattern, n) ==
749 (size_t)-1) {
750 (void) fprintf(stderr,
751 gettext("%s: failed to convert "
752 "\"%s\" to wide-characters\n"),
753 cmdname, pp->pattern);
754 exit(2);
756 if (iflag) {
757 wchar_t *wp;
758 for (wp = pp->wpattern; *wp != L'\0';
759 wp++) {
760 *wp = towlower((wint_t)*wp);
763 free(pp->pattern);
764 } else {
766 * Fflag && mblocale && !iflag
767 * Fflag && !mblocale && iflag
768 * Fflag && !mblocale && !iflag
770 if (iflag) {
771 unsigned char *cp;
772 for (cp = (unsigned char *)pp->pattern;
773 *cp != '\0'; cp++) {
774 *cp = tolower(*cp);
779 * fgrep: No regular expressions.
781 continue;
785 * For non-fgrep, compile the regular expression,
786 * give an informative error message, and exit if
787 * it didn't compile.
789 if ((rv = regcomp(&pp->re, pp->pattern, regflags)) != 0) {
790 (void) regerror(rv, &pp->re, errstr, sizeof (errstr));
791 (void) fprintf(stderr,
792 gettext("%s: RE error in %s: %s\n"),
793 cmdname, pp->pattern, errstr);
794 exit(2);
796 free(pp->pattern);
800 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
801 * Use the Boyer-Moore-Gosper algorithm if:
802 * - fgrep (Fflag)
803 * - singlebyte locale (!mblocale)
804 * - no ignoring case (!iflag)
805 * - no printing line numbers (!nflag)
806 * - no negating the output (nvflag)
807 * - only one pattern (npatterns == 1)
808 * - non zero length pattern (strlen(patterns->pattern) != 0)
809 * - no context required (conflag == 0)
811 * It's guaranteed patterns->pattern is still alive
812 * when Fflag && !mblocale.
814 use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
815 (npatterns == 1) && (strlen(patterns->pattern) != 0) &&
816 conflag == 0;
820 * Search a newline from the beginning of the string
822 static char *
823 find_nl(const char *ptr, size_t len)
825 while (len-- != 0) {
826 if (*ptr++ == '\n') {
827 return ((char *)--ptr);
830 return (NULL);
834 * Search a newline from the end of the string
836 static char *
837 rfind_nl(const char *ptr, size_t len)
839 const char *uptr = ptr + len;
840 while (len--) {
841 if (*--uptr == '\n') {
842 return ((char *)uptr);
845 return (NULL);
849 * Duplicate the specified string converting each character
850 * into a lower case.
852 static char *
853 istrdup(const char *s1)
855 static size_t ibuflen = 0;
856 static char *ibuf = NULL;
857 size_t slen;
858 char *p;
860 slen = strlen(s1);
861 if (slen >= ibuflen) {
862 /* ibuf does not fit to s1 */
863 ibuflen = slen + 1;
864 ibuf = realloc(ibuf, ibuflen);
865 if (ibuf == NULL) {
866 (void) fprintf(stderr,
867 gettext("%s: out of memory\n"), cmdname);
868 exit(2);
871 p = ibuf;
872 do {
873 *p++ = tolower(*s1);
874 } while (*s1++ != '\0');
875 return (ibuf);
879 * Do grep on a single file.
880 * Return true in any lines matched.
882 * We have two strategies:
883 * The fast one is used when we have a single pattern with
884 * a string known to occur in the pattern. We can then
885 * do a BMG match on the whole buffer.
886 * This is an order of magnitude faster.
887 * Otherwise we split the buffer into lines,
888 * and check for a match on each line.
890 static int
891 grep(int fd, const char *fn)
893 PATTERN *pp;
894 off_t data_len; /* length of the data chunk */
895 off_t line_len; /* length of the current line */
896 off_t line_offset; /* current line's offset from the beginning */
897 off_t blkoffset; /* line_offset but context-compatible */
898 long long lineno, linenum;
899 long long matches = 0; /* Number of matching lines */
900 long long conacnt = 0, conbcnt = 0; /* context line count */
901 int newlinep; /* 0 if the last line of file has no newline */
902 char *ptr, *ptrend, *prntptr, *prntptrend;
903 char *nextptr = NULL, *nextend = NULL;
904 char *conptr = NULL, *conptrend = NULL;
905 char *matchptr = NULL;
906 int conaprnt = 0, conbprnt = 0, lastmatch = 0;
907 boolean_t nearmatch; /* w/in N+1 of last match */
908 boolean_t havematch = B_FALSE; /* have a match in context */
909 size_t prntlen;
911 if (patterns == NULL)
912 return (0); /* no patterns to match -- just return */
914 pp = patterns;
916 if (use_bmg) {
917 bmgcomp(pp->pattern, strlen(pp->pattern));
920 if (use_wchar && outline == NULL) {
921 outbuflen = BUFSIZE + 1;
922 outline = malloc(sizeof (wchar_t) * outbuflen);
923 if (outline == NULL) {
924 (void) fprintf(stderr, gettext("%s: out of memory\n"),
925 cmdname);
926 exit(2);
930 if (prntbuf == NULL) {
931 prntbuflen = BUFSIZE;
932 if ((prntbuf = malloc(prntbuflen + 1)) == NULL) {
933 (void) fprintf(stderr, gettext("%s: out of memory\n"),
934 cmdname);
935 exit(2);
939 if (conflag != 0 && (conbuf == NULL)) {
940 conbuflen = BUFSIZE;
941 if ((conbuf = malloc(BUFSIZE+1)) == NULL) {
942 (void) fprintf(stderr, gettext("%s: out of memory\n"),
943 cmdname);
944 exit(2);
948 nearmatch = (conmatches != 0);
949 blkoffset = line_offset = 0;
950 lineno = 0;
951 linenum = 1;
952 newlinep = 1;
953 data_len = 0;
954 for (; ; ) {
955 long count;
956 off_t offset = 0;
957 char separate;
958 boolean_t last_ctx = B_FALSE, eof = B_FALSE;
960 if (data_len == 0) {
962 * If no data in the buffer, reset ptr
964 ptr = prntbuf;
965 if (conflag != 0 && conptr == NULL) {
966 conptr = conbuf;
967 conptrend = conptr - 1;
970 if (ptr == prntbuf) {
972 * The current data chunk starts from prntbuf.
973 * This means either the buffer has no data
974 * or the buffer has no newline.
975 * So, read more data from input.
977 count = read(fd, ptr + data_len, prntbuflen - data_len);
978 if (count < 0) {
979 /* read error */
980 if (cflag) {
981 if (outfn && !rflag) {
982 (void) fprintf(stdout,
983 "%s:", fn);
985 if (!qflag && !rflag) {
986 (void) fprintf(stdout, "%lld\n",
987 matches);
990 return (0);
991 } else if (count == 0) {
992 /* no new data */
993 eof = B_TRUE;
995 if (data_len == 0) {
996 /* end of file already reached */
997 if (conflag != 0) {
998 if (conptrend >= conptr)
999 *conptrend = '\n';
1000 last_ctx = B_TRUE;
1001 goto L_next_line;
1002 } else {
1003 goto out;
1006 /* last line of file has no newline */
1007 ptrend = ptr + data_len;
1008 newlinep = 0;
1009 goto L_start_process;
1011 offset = data_len;
1012 data_len += count;
1016 * Look for newline in the chunk
1017 * between ptr + offset and ptr + data_len - offset.
1019 ptrend = find_nl(ptr + offset, data_len - offset);
1020 if (ptrend == NULL) {
1021 /* no newline found in this chunk */
1022 if (ptr > prntbuf) {
1024 * Move remaining data to the beginning
1025 * of the buffer.
1026 * Remaining data lie from ptr for
1027 * data_len bytes.
1029 (void) memmove(prntbuf, ptr, data_len);
1031 if (data_len == prntbuflen) {
1033 * Not enough room in the buffer
1035 if (prntbuflen > SIZE_MAX - BUFSIZE) {
1036 (void) fprintf(stderr,
1037 gettext("%s: buflen would"
1038 " overflow\n"),
1039 cmdname);
1040 exit(2);
1043 prntbuflen += BUFSIZE;
1044 prntbuf = realloc(prntbuf, prntbuflen + 1);
1045 if (prntbuf == NULL) {
1046 (void) fprintf(stderr,
1047 gettext("%s: out of memory\n"),
1048 cmdname);
1049 exit(2);
1052 ptr = prntbuf;
1053 /* read the next input */
1054 continue;
1056 L_start_process:
1059 * Beginning of the chunk: ptr
1060 * End of the chunk: ptr + data_len
1061 * Beginning of the line: ptr
1062 * End of the line: ptrend
1064 * conptr: Beginning of the context.
1065 * conptrend: If context is empty, conptr - 1 (invalid memory).
1066 * Otherwise, Last newline in the context.
1069 if (use_bmg) {
1071 * Use Boyer-Moore-Gosper algorithm to find out if
1072 * this chunk (not this line) contains the specified
1073 * pattern. If not, restart from the last line
1074 * of this chunk.
1076 char *bline;
1077 bline = bmgexec(ptr, ptr + data_len);
1078 if (bline == NULL) {
1080 * No pattern found in this chunk.
1081 * Need to find the last line
1082 * in this chunk.
1084 ptrend = rfind_nl(ptr, data_len);
1087 * When this chunk does not contain newline,
1088 * ptrend becomes NULL, which should happen
1089 * when the last line of file does not end
1090 * with a newline. At such a point,
1091 * newlinep should have been set to 0.
1092 * Therefore, just after jumping to
1093 * L_skip_line, the main for-loop quits,
1094 * and the line_len value won't be
1095 * used.
1097 line_len = ptrend - ptr;
1098 goto L_skip_line;
1100 if (bline > ptrend) {
1102 * Pattern found not in the first line
1103 * of this chunk.
1104 * Discard the first line.
1106 line_len = ptrend - ptr;
1107 goto L_skip_line;
1110 * Pattern found in the first line of this chunk.
1111 * Using this result.
1113 *ptrend = '\0';
1114 line_len = ptrend - ptr;
1117 * before jumping to L_next_line,
1118 * need to handle xflag if specified
1120 if (xflag && (line_len != bmglen ||
1121 strcmp(bmgpat, ptr) != 0)) {
1122 /* didn't match */
1123 pp = NULL;
1124 } else {
1125 pp = patterns; /* to make it happen */
1127 goto L_next_line;
1129 lineno++;
1131 * Line starts from ptr and ends at ptrend.
1132 * line_len will be the length of the line.
1134 *ptrend = '\0';
1135 line_len = ptrend - ptr;
1138 * From now, the process will be performed based
1139 * on the line from ptr to ptrend.
1141 if (use_wchar) {
1142 size_t len;
1144 if (line_len >= outbuflen) {
1145 outbuflen = line_len + 1;
1146 outline = realloc(outline,
1147 sizeof (wchar_t) * outbuflen);
1148 if (outline == NULL) {
1149 (void) fprintf(stderr,
1150 gettext("%s: out of memory\n"),
1151 cmdname);
1152 exit(2);
1156 len = mbstowcs(outline, ptr, line_len);
1157 if (len == (size_t)-1) {
1158 (void) fprintf(stderr, gettext(
1159 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1160 cmdname, fn, lineno);
1161 /* never match a line with invalid sequence */
1162 goto L_skip_line;
1164 outline[len] = L'\0';
1166 if (iflag) {
1167 wchar_t *cp;
1168 for (cp = outline; *cp != '\0'; cp++) {
1169 *cp = towlower((wint_t)*cp);
1173 if (xflag) {
1174 for (pp = patterns; pp; pp = pp->next) {
1175 if (outline[0] == pp->wpattern[0] &&
1176 wcscmp(outline,
1177 pp->wpattern) == 0) {
1178 /* matched */
1179 break;
1182 } else {
1183 for (pp = patterns; pp; pp = pp->next) {
1184 if (wcswcs(outline, pp->wpattern)
1185 != NULL) {
1186 /* matched */
1187 break;
1191 } else if (Fflag) {
1192 /* fgrep in byte-oriented handling */
1193 char *fptr;
1194 if (iflag) {
1195 fptr = istrdup(ptr);
1196 } else {
1197 fptr = ptr;
1199 if (xflag) {
1200 /* fgrep -x */
1201 for (pp = patterns; pp; pp = pp->next) {
1202 if (fptr[0] == pp->pattern[0] &&
1203 strcmp(fptr, pp->pattern) == 0) {
1204 /* matched */
1205 break;
1208 } else {
1209 for (pp = patterns; pp; pp = pp->next) {
1210 if (strstr(fptr, pp->pattern) != NULL) {
1211 /* matched */
1212 break;
1216 } else {
1217 /* grep or egrep */
1218 for (pp = patterns; pp; pp = pp->next) {
1219 int rv;
1221 rv = regexec(&pp->re, ptr, 0, NULL, 0);
1222 if (rv == REG_OK) {
1223 /* matched */
1224 break;
1227 switch (rv) {
1228 case REG_NOMATCH:
1229 break;
1230 case REG_ECHAR:
1231 (void) fprintf(stderr, gettext(
1232 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1233 cmdname, fn, lineno);
1234 break;
1235 default:
1236 (void) regerror(rv, &pp->re, errstr,
1237 sizeof (errstr));
1238 (void) fprintf(stderr, gettext(
1239 "%s: input file \"%s\": line %lld: %s\n"),
1240 cmdname, fn, lineno, errstr);
1241 exit(2);
1247 * Context is set up as follows:
1248 * For a 'Before' context, we maintain a set of pointers
1249 * containing 'N' lines of context. If the current number of
1250 * lines contained is greater than N, and N isn't a match, the
1251 * start pointer is moved forward to the next newline.
1253 * If we ever find a match, we print out immediately.
1254 * 'nearmatch' tells us if we're within N+1 lines of the last
1255 * match ; if we are, and we find another match, we don't
1256 * separate the matches. 'nearmatch' becomes false when
1257 * a line gets rotated out of the context.
1259 * For an 'After' context, we simply wait until we've found a
1260 * match, then create a context N+1 lines big. If we don't find
1261 * a match within the context, we print out the current context.
1262 * Otherwise, we save a reference to the new matching line,
1263 * print out the other context, and reset our context pointers
1264 * to the new matching line.
1266 * 'nearmatch' becomes false when we find a non-matching line
1267 * that isn't a part of any context.
1269 * A full-context is implemented as a combination of the
1270 * 'Before' and 'After' context logic. Before we find a match,
1271 * we follow the Before logic. When we find a match, we
1272 * follow the After logic. 'nearmatch' is handled by the Before
1273 * logic.
1276 if (conflag == 0)
1277 goto L_next_line;
1279 /* Do we have room to add this line to the context buffer? */
1280 if ((line_len + 1) > (conbuflen -
1281 (conptrend >= conptr) ? conptrend - conbuf : 0)) {
1282 char *oldconbuf = conbuf;
1283 char *oldconptr = conptr;
1284 long tmp = matchptr - conptr;
1286 if (conbuflen > SIZE_MAX - BUFSIZE) {
1287 (void) fprintf(stderr,
1288 gettext("%s: buflen would overflow\n"),
1289 cmdname);
1290 exit(2);
1293 conbuflen += BUFSIZE;
1294 conbuf = realloc(conbuf, conbuflen + 1);
1295 if (conbuf == NULL) {
1296 (void) fprintf(stderr,
1297 gettext("%s: out of memory\n"),
1298 cmdname);
1299 exit(2);
1302 conptr = conbuf + (conptr - oldconbuf);
1303 conptrend = conptr + (conptrend - oldconptr);
1304 if (matchptr)
1305 matchptr = conptr + tmp;
1307 (void) memcpy(conptrend + 1, ptr, line_len);
1308 conptrend += line_len + 1;
1309 *conptrend = '\n';
1311 if (nvflag == (pp != NULL)) {
1312 /* matched */
1313 if (havematch) {
1314 if ((conflag & AFTER) != 0) {
1315 conaprnt = 1;
1316 nextend = conptrend;
1317 conptrend = conptr + lastmatch;
1318 nextptr = conptrend + 1;
1319 *nextend = '\n';
1321 } else {
1322 if (conflag == AFTER) {
1323 conptr = conptrend - (line_len);
1324 linenum = lineno;
1326 blkoffset = line_offset -
1327 (conptrend - conptr - line_len);
1330 if (conflag == BEFORE)
1331 conbprnt = 1;
1333 lastmatch = conptrend - conptr;
1334 havematch = B_TRUE;
1335 goto L_next_line;
1338 if (!havematch) {
1339 if ((conflag & BEFORE) != 0) {
1340 if (conbcnt >= conblen) {
1341 char *tmp = conptr;
1342 conptr = find_nl(conptr,
1343 conptrend - conptr) + 1;
1344 if (bflag)
1345 blkoffset += conptr - tmp;
1346 linenum++;
1347 nearmatch = B_TRUE;
1348 } else {
1349 conbcnt++;
1352 if (conflag == AFTER)
1353 nearmatch = B_TRUE;
1354 } else {
1355 if (++conacnt >= conalen && !conaprnt && conalen)
1356 conaprnt = 1;
1357 else
1358 lastmatch = conptrend - conptr;
1361 L_next_line:
1363 * Here, if pp points to non-NULL, something has been matched
1364 * to the pattern.
1366 if (!last_ctx && nvflag == (pp != NULL)) {
1367 matches++;
1368 if (!nextend)
1369 matchptr = (conflag != 0) ? conptrend : ptrend;
1373 * Set up some print context so that we can treat
1374 * single-line matches as a zero-N context.
1375 * Apply CLI flags to each line of the context.
1377 * For context, we only print if we both have a match and are
1378 * either at the end of the data stream, or we've previously
1379 * declared that we want to print for a particular context.
1381 if (havematch && (eof || conaprnt || conbprnt)) {
1384 * We'd normally do this earlier, but we had to
1385 * escape early because we reached the end of the data.
1387 if (eof && nextptr)
1388 conptrend = nextend;
1390 prntlen = conptrend - conptr + 1;
1391 prntptr = conptr;
1392 if (conmatches++ && nearmatch && !cflag)
1393 (void) fwrite("--\n", 1, 3, stdout);
1394 } else if (conflag == 0 && nvflag == (pp != NULL)) {
1395 *ptrend = '\n';
1396 prntlen = line_len + 1;
1397 prntptr = ptr;
1398 linenum = lineno;
1399 blkoffset = line_offset;
1400 } else if (eof) {
1401 /* No match and no more data */
1402 goto out;
1403 } else {
1404 /* No match, or we're not done building context */
1405 goto L_skip_line;
1408 prntptrend = prntptr - 1;
1409 while ((prntptrend = find_nl(prntptrend + 1,
1410 prntlen)) != NULL) {
1413 * GNU grep uses '-' for context lines and ':' for
1414 * matching lines, so replicate that here.
1416 if (prntptrend == matchptr) {
1417 if (eof && nextptr) {
1418 matchptr = nextend;
1419 nextptr = NULL;
1420 } else {
1421 matchptr = NULL;
1423 separate = ':';
1424 } else {
1425 separate = '-';
1429 * Handle q, l, and c flags.
1431 if (qflag) {
1432 /* no need to continue */
1434 * End of this line is ptrend.
1435 * We have read up to ptr + data_len.
1437 off_t pos;
1438 pos = ptr + data_len - (ptrend + 1);
1439 (void) lseek(fd, -pos, SEEK_CUR);
1440 exit(0);
1442 if (lflag) {
1443 (void) printf("%s\n", fn);
1444 goto out;
1446 if (!cflag) {
1447 if (Hflag || outfn) {
1448 (void) printf("%s%c", fn, separate);
1450 if (bflag) {
1451 (void) printf("%lld%c", (offset_t)
1452 (blkoffset / BSIZE), separate);
1454 if (nflag) {
1455 (void) printf("%lld%c", linenum,
1456 separate);
1458 (void) fwrite(prntptr, 1,
1459 prntptrend - prntptr + 1, stdout);
1461 if (ferror(stdout)) {
1462 return (0);
1464 linenum++;
1465 prntlen -= prntptrend - prntptr + 1;
1466 blkoffset += prntptrend - prntptr + 1;
1467 prntptr = prntptrend + 1;
1470 if (eof)
1471 goto out;
1474 * Update context buffer and variables post-print
1476 if (conflag != 0) {
1477 conptr = conbuf;
1478 conaprnt = conbprnt = 0;
1479 nearmatch = B_FALSE;
1480 conacnt = conbcnt = 0;
1482 if (nextptr) {
1483 (void) memmove(conbuf, nextptr,
1484 nextend - nextptr + 1);
1485 blkoffset += nextptr - conptrend - 1;
1486 conptrend = conptr + (nextend - nextptr);
1487 matchptr = conptrend;
1488 linenum = lineno;
1489 lastmatch = conptrend - conptr;
1490 havematch = B_TRUE;
1491 } else {
1492 conptrend = conptr - 1;
1493 conacnt = 0;
1494 lastmatch = 0;
1495 havematch = B_FALSE;
1497 nextptr = nextend = NULL;
1500 L_skip_line:
1501 if (!newlinep)
1502 break;
1504 data_len -= line_len + 1;
1505 line_offset += line_len + 1;
1506 ptr = ptrend + 1;
1509 out:
1510 if (cflag) {
1511 if (Hflag || outfn) {
1512 (void) printf("%s:", fn);
1514 if (!qflag) {
1515 (void) printf("%lld\n", matches);
1518 return (matches != 0);
1522 * usage message for grep
1524 static void
1525 usage(void)
1527 (void) fprintf(stderr, gettext("usage: %5s"), cmdname);
1528 if (!egrep && !fgrep)
1529 (void) fprintf(stderr, gettext(" [-E|-F]"));
1530 (void) fprintf(stderr, gettext(" [-bchHilnqrRsvx] [-A num] [-B num] "
1531 "[-C num|-num]\n [-e pattern_list]... "
1532 "[-f pattern_file]... [pattern_list] [file]...\n"));
1533 exit(2);
1534 /* NOTREACHED */
1538 * Compile literal pattern into BMG tables
1540 static void
1541 bmgcomp(char *pat, int len)
1543 int i;
1544 int tlen;
1545 unsigned char *uc = (unsigned char *)pat;
1547 bmglen = len;
1548 bmgpat = pat;
1550 for (i = 0; i < M_CSETSIZE; i++) {
1551 bmgtab[i] = len;
1554 len--;
1555 for (tlen = len, i = 0; i <= len; i++, tlen--) {
1556 bmgtab[*uc++] = tlen;
1561 * BMG search.
1563 static char *
1564 bmgexec(char *str, char *end)
1566 int t;
1567 char *k, *s, *p;
1569 k = str + bmglen - 1;
1570 if (bmglen == 1) {
1571 return (memchr(str, bmgpat[0], end - str));
1573 for (; ; ) {
1574 /* inner loop, should be most optimized */
1575 while (k < end && (t = bmgtab[(unsigned char)*k]) != 0) {
1576 k += t;
1578 if (k >= end) {
1579 return (NULL);
1581 for (s = k, p = bmgpat + bmglen - 1; *--s == *--p; ) {
1582 if (p == bmgpat) {
1583 return (s);
1586 k++;
1588 /* NOTREACHED */