4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * grep - pattern matching program - combined grep, egrep, and fgrep.
29 * Based on MKS grep command, with XCU & Solaris mods.
33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
38 * Copyright 2018 Nexenta Systems, Inc.
39 * Copyright 2013 Damian Bogel. All rights reserved.
48 #include <sys/types.h>
58 #include <sys/param.h>
60 #define STDIN_FILENAME gettext("(standard input)")
62 #define BSIZE 512 /* Size of block for -b */
63 #define BUFSIZE 8192 /* Input buffer size */
64 #define MAX_DEPTH 1000 /* how deep to recurse */
66 #define AFTER 1 /* 'After' Context */
67 #define BEFORE 2 /* 'Before' Context */
68 #define CONTEXT (AFTER|BEFORE) /* Full Context */
70 #define M_CSETSIZE 256 /* singlebyte chars */
71 static int bmglen
; /* length of BMG pattern */
72 static char *bmgpat
; /* BMG pattern */
73 static int bmgtab
[M_CSETSIZE
]; /* BMG delta1 table */
75 typedef struct _PATTERN
{
76 char *pattern
; /* original pattern */
77 wchar_t *wpattern
; /* wide, lowercased pattern */
78 struct _PATTERN
*next
;
79 regex_t re
; /* compiled pattern */
82 static PATTERN
*patterns
;
83 static char errstr
[128]; /* regerror string buffer */
84 static int regflags
= 0; /* regcomp options */
85 static int matched
= 0; /* return of the grep() */
86 static int errors
= 0; /* count of errors */
87 static uchar_t fgrep
= 0; /* Invoked as fgrep */
88 static uchar_t egrep
= 0; /* Invoked as egrep */
89 static boolean_t nvflag
= B_TRUE
; /* Print matching lines */
90 static uchar_t cflag
; /* Count of matches */
91 static uchar_t iflag
; /* Case insensitve matching */
92 static uchar_t Hflag
; /* Precede lines by file name */
93 static uchar_t hflag
; /* Supress printing of filename */
94 static uchar_t lflag
; /* Print file names of matches */
95 static uchar_t nflag
; /* Precede lines by line number */
96 static uchar_t rflag
; /* Search directories recursively */
97 static uchar_t bflag
; /* Preccede matches by block number */
98 static uchar_t sflag
; /* Suppress file error messages */
99 static uchar_t qflag
; /* Suppress standard output */
100 static uchar_t wflag
; /* Search for expression as a word */
101 static uchar_t xflag
; /* Anchoring */
102 static uchar_t Eflag
; /* Egrep or -E flag */
103 static uchar_t Fflag
; /* Fgrep or -F flag */
104 static uchar_t Rflag
; /* Like rflag, but follow symlinks */
105 static uchar_t outfn
; /* Put out file name */
106 static uchar_t conflag
; /* show context of matches */
107 static char *cmdname
;
109 static int use_wchar
, use_bmg
, mblocale
;
111 static size_t outbuflen
, prntbuflen
, conbuflen
;
112 static unsigned long conalen
, conblen
, conmatches
;
113 static char *prntbuf
, *conbuf
;
114 static wchar_t *outline
;
116 static void addfile(const char *fn
);
117 static void addpattern(char *s
);
118 static void fixpatterns(void);
119 static void usage(void);
120 static int grep(int, const char *);
121 static void bmgcomp(char *, int);
122 static char *bmgexec(char *, char *);
123 static int recursive(const char *, const struct stat
*, int, struct FTW
*);
124 static void process_path(const char *);
125 static void process_file(const char *, int);
131 main(int argc
, char **argv
)
136 int i
, n_pattern
= 0, n_file
= 0;
137 char **pattern_list
= NULL
;
138 char **file_list
= NULL
;
140 (void) setlocale(LC_ALL
, "");
141 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
142 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
144 (void) textdomain(TEXT_DOMAIN
);
147 * true if this is running on the multibyte locale
149 mblocale
= (MB_CUR_MAX
> 1);
151 * Skip leading slashes
154 if (ap
= strrchr(cmdname
, '/'))
159 * Detect egrep/fgrep via command name, map to -E and -F options.
161 if (*ap
== 'e' || *ap
== 'E') {
162 regflags
|= REG_EXTENDED
;
165 if (*ap
== 'f' || *ap
== 'F') {
170 /* check for non-standard "-line-count" option */
171 for (i
= 1; i
< argc
; i
++) {
172 if (strcmp(argv
[i
], "--") == 0)
175 /* isdigit() check prevents negative arguments */
176 if ((argv
[i
][0] == '-') && isdigit(argv
[i
][1])) {
177 if (strlen(&argv
[i
][1]) !=
178 strspn(&argv
[i
][1], "0123456789")) {
179 (void) fprintf(stderr
, gettext(
180 "%s: Bad number flag\n"), argv
[0]);
185 conalen
= conblen
= strtoul(&argv
[i
][1], (char **)NULL
,
188 if (errno
!= 0 || conalen
>= ULONG_MAX
) {
189 (void) fprintf(stderr
, gettext(
190 "%s: Bad context argument\n"), argv
[0]);
195 argv
[i
] = argv
[i
+ 1];
202 while ((c
= getopt(argc
, argv
, "vwchHilnrbse:f:qxEFIRA:B:C:")) != EOF
) {
205 case 'v': /* POSIX: negate matches */
209 case 'c': /* POSIX: write count */
213 case 'i': /* POSIX: ignore case */
215 regflags
|= REG_ICASE
;
218 case 'l': /* POSIX: Write filenames only */
222 case 'n': /* POSIX: Write line numbers */
226 case 'r': /* Solaris: search recursively */
230 case 'b': /* Solaris: Write file block numbers */
234 case 's': /* POSIX: No error msgs for files */
238 case 'e': /* POSIX: pattern list */
240 pattern_list
= realloc(pattern_list
,
241 sizeof (char *) * n_pattern
);
242 if (pattern_list
== NULL
) {
243 (void) fprintf(stderr
,
244 gettext("%s: out of memory\n"),
248 *(pattern_list
+ n_pattern
- 1) = optarg
;
251 case 'f': /* POSIX: pattern file */
254 file_list
= realloc(file_list
,
255 sizeof (char *) * n_file
);
256 if (file_list
== NULL
) {
257 (void) fprintf(stderr
,
258 gettext("%s: out of memory\n"),
262 *(file_list
+ n_file
- 1) = optarg
;
265 /* based on options order h or H is set as in GNU grep */
266 case 'h': /* Solaris: supress printing of file name */
270 /* Solaris: precede every matching with file name */
276 case 'q': /* POSIX: quiet: status only */
280 case 'w': /* Solaris: treat pattern as word */
284 case 'x': /* POSIX: full line matches */
288 case 'E': /* POSIX: Extended RE's */
289 regflags
|= REG_EXTENDED
;
293 case 'F': /* POSIX: strings, not RE's */
297 case 'R': /* Solaris: like rflag, but follow symlinks */
302 case 'A': /* print N lines after each match */
304 conalen
= strtoul(optarg
, &test
, 10);
305 /* *test will be non-null if optarg is negative */
306 if (errno
!= 0 || *test
!= '\0' ||
307 conalen
>= ULONG_MAX
) {
308 (void) fprintf(stderr
, gettext(
309 "%s: Bad context argument: %s\n"),
318 case 'B': /* print N lines before each match */
320 conblen
= strtoul(optarg
, &test
, 10);
321 /* *test will be non-null if optarg is negative */
322 if (errno
!= 0 || *test
!= '\0' ||
323 conblen
>= ULONG_MAX
) {
324 (void) fprintf(stderr
, gettext(
325 "%s: Bad context argument: %s\n"),
334 case 'C': /* print N lines around each match */
336 tval
= strtoul(optarg
, &test
, 10);
337 /* *test will be non-null if optarg is negative */
338 if (errno
!= 0 || *test
!= '\0' || tval
>= ULONG_MAX
) {
339 (void) fprintf(stderr
, gettext(
340 "%s: Bad context argument: %s\n"),
345 if ((conflag
& BEFORE
) == 0)
347 if ((conflag
& AFTER
) == 0)
358 * If we're invoked as egrep or fgrep we need to do some checks
361 if (egrep
|| fgrep
) {
363 * Use of -E or -F with egrep or fgrep is illegal
368 * Don't allow use of wflag with egrep / fgrep
373 * For Solaris the -s flag is equivalent to XCU -q
378 * done with above checks - set the appropriate flags
382 else /* Else fgrep */
386 if (wflag
&& (Eflag
|| Fflag
)) {
388 * -w cannot be specified with grep -F
394 * -E and -F flags are mutually exclusive - check for this
400 * -l overrides -H like in GNU grep
406 * -c, -l and -q flags are mutually exclusive
407 * We have -c override -l like in Solaris.
408 * -q overrides -l & -c programmatically in grep() function.
417 * Now handling -e and -f option
420 for (i
= 0; i
< n_pattern
; i
++) {
421 addpattern(pattern_list
[i
]);
426 for (i
= 0; i
< n_file
; i
++) {
427 addfile(file_list
[i
]);
433 * No -e or -f? Make sure there is one more arg, use it as the pattern.
435 if (patterns
== NULL
&& !fflag
) {
444 * If -x flag is not specified or -i flag is specified
445 * with fgrep in a multibyte locale, need to use
446 * the wide character APIs. Otherwise, byte-oriented
447 * process will be done.
449 use_wchar
= Fflag
&& mblocale
&& (!xflag
|| iflag
);
452 * Compile Patterns and also decide if BMG can be used
456 /* Process all files: stdin, or rest of arg list */
458 matched
= grep(0, STDIN_FILENAME
);
460 if (Hflag
|| (argc
> 2 && hflag
== 0))
461 outfn
= 1; /* Print filename on match line */
462 for (argv
++; *argv
!= NULL
; argv
++) {
467 * Return() here is used instead of exit
470 (void) fflush(stdout
);
474 return (matched
? 0 : 1);
478 process_path(const char *path
)
481 int walkflags
= FTW_CHDIR
;
485 if (stat(path
, &st
) != -1 &&
486 (st
.st_mode
& S_IFMT
) == S_IFDIR
) {
487 outfn
= 1; /* Print filename */
490 * Add trailing slash if arg
491 * is directory, to resolve symlinks.
493 if (path
[strlen(path
) - 1] != '/') {
494 (void) asprintf(&buf
, "%s/", path
);
500 * Search through subdirs if path is directory.
501 * Don't follow symlinks if Rflag is not set.
504 walkflags
|= FTW_PHYS
;
506 if (nftw(path
, recursive
, MAX_DEPTH
, walkflags
) != 0) {
508 (void) fprintf(stderr
,
509 gettext("%s: can't open \"%s\"\n"),
516 process_file(path
, 0);
520 * Read and process all files in directory recursively.
523 recursive(const char *name
, const struct stat
*statp
, int info
, struct FTW
*ftw
)
526 * Process files and follow symlinks if Rflag set.
529 /* Report broken symlinks and unreadable files */
531 (info
== FTW_SLN
|| info
== FTW_DNR
|| info
== FTW_NS
)) {
532 (void) fprintf(stderr
,
533 gettext("%s: can't open \"%s\"\n"), cmdname
, name
);
539 /* Skip devices and pipes if Rflag is not set */
540 if (!Rflag
&& !S_ISREG(statp
->st_mode
))
542 /* Pass offset to relative name from FTW_CHDIR */
543 process_file(name
, ftw
->base
);
548 * Opens file and call grep function.
551 process_file(const char *name
, int base
)
555 if ((fd
= open(name
+ base
, O_RDONLY
)) == -1) {
557 if (!sflag
) /* Silent mode */
558 (void) fprintf(stderr
, gettext(
559 "%s: can't open \"%s\"\n"),
563 matched
|= grep(fd
, name
);
566 if (ferror(stdout
)) {
567 (void) fprintf(stderr
, gettext(
568 "%s: error writing to stdout\n"),
570 (void) fflush(stdout
);
577 * Add a file of strings to the pattern list.
580 addfile(const char *fn
)
585 size_t bufsiz
, buflen
, bufused
;
588 * Open the pattern file
590 if ((fp
= fopen(fn
, "r")) == NULL
) {
591 (void) fprintf(stderr
, gettext("%s: can't open \"%s\"\n"),
596 if ((inbuf
= malloc(bufsiz
)) == NULL
) {
597 (void) fprintf(stderr
,
598 gettext("%s: out of memory\n"), cmdname
);
604 * Read in the file, reallocing as we need more memory
606 while (fgets(bufp
, bufsiz
- bufused
, fp
) != NULL
) {
607 buflen
= strlen(bufp
);
609 if (bufused
+ 1 == bufsiz
&& bufp
[buflen
- 1] != '\n') {
611 * if this line does not fit to the buffer,
612 * realloc larger buffer
615 if ((inbuf
= realloc(inbuf
, bufsiz
)) == NULL
) {
616 (void) fprintf(stderr
,
617 gettext("%s: out of memory\n"),
621 bufp
= inbuf
+ bufused
;
624 if (bufp
[buflen
- 1] == '\n') {
625 bufp
[--buflen
] = '\0';
639 * Add a string to the pattern list.
649 np
= strchr(s
, '\n');
652 if ((pp
= malloc(sizeof (PATTERN
))) == NULL
) {
653 (void) fprintf(stderr
, gettext(
654 "%s: out of memory\n"),
660 * Solaris wflag support: Add '<' '>' to pattern to
661 * select it as a word. Doesn't make sense with -F
662 * but we're Libertarian.
664 size_t slen
, wordlen
;
667 wordlen
= slen
+ 5; /* '\\' '<' s '\\' '>' '\0' */
668 if ((wordbuf
= malloc(wordlen
)) == NULL
) {
669 (void) fprintf(stderr
,
670 gettext("%s: out of memory\n"),
674 (void) strcpy(wordbuf
, "\\<");
675 (void) strcpy(wordbuf
+ 2, s
);
676 (void) strcpy(wordbuf
+ 2 + slen
, "\\>");
678 if ((wordbuf
= strdup(s
)) == NULL
) {
679 (void) fprintf(stderr
,
680 gettext("%s: out of memory\n"),
685 pp
->pattern
= wordbuf
;
696 * Must do after all arguments read, in case later -i option.
702 int rv
, fix_pattern
, npatterns
;
705 * Fix the specified pattern if -x is specified.
707 fix_pattern
= !Fflag
&& xflag
;
709 for (npatterns
= 0, pp
= patterns
; pp
!= NULL
; pp
= pp
->next
) {
715 plen
= strlen(pp
->pattern
);
716 /* '^' pattern '$' */
717 nplen
= 1 + plen
+ 1 + 1;
718 if ((cp
= malloc(nplen
)) == NULL
) {
719 (void) fprintf(stderr
,
720 gettext("%s: out of memory\n"),
726 cq
= strcpy(cq
, pp
->pattern
) + plen
;
736 * Fflag && mblocale && iflag
737 * Fflag && mblocale && !xflag
740 n
= strlen(pp
->pattern
) + 1;
742 malloc(sizeof (wchar_t) * n
)) == NULL
) {
743 (void) fprintf(stderr
,
744 gettext("%s: out of memory\n"),
748 if (mbstowcs(pp
->wpattern
, pp
->pattern
, n
) ==
750 (void) fprintf(stderr
,
751 gettext("%s: failed to convert "
752 "\"%s\" to wide-characters\n"),
753 cmdname
, pp
->pattern
);
758 for (wp
= pp
->wpattern
; *wp
!= L
'\0';
760 *wp
= towlower((wint_t)*wp
);
766 * Fflag && mblocale && !iflag
767 * Fflag && !mblocale && iflag
768 * Fflag && !mblocale && !iflag
772 for (cp
= (unsigned char *)pp
->pattern
;
779 * fgrep: No regular expressions.
785 * For non-fgrep, compile the regular expression,
786 * give an informative error message, and exit if
789 if ((rv
= regcomp(&pp
->re
, pp
->pattern
, regflags
)) != 0) {
790 (void) regerror(rv
, &pp
->re
, errstr
, sizeof (errstr
));
791 (void) fprintf(stderr
,
792 gettext("%s: RE error in %s: %s\n"),
793 cmdname
, pp
->pattern
, errstr
);
800 * Decide if we are able to run the Boyer-Moore-Gosper algorithm.
801 * Use the Boyer-Moore-Gosper algorithm if:
803 * - singlebyte locale (!mblocale)
804 * - no ignoring case (!iflag)
805 * - no printing line numbers (!nflag)
806 * - no negating the output (nvflag)
807 * - only one pattern (npatterns == 1)
808 * - non zero length pattern (strlen(patterns->pattern) != 0)
809 * - no context required (conflag == 0)
811 * It's guaranteed patterns->pattern is still alive
812 * when Fflag && !mblocale.
814 use_bmg
= Fflag
&& !mblocale
&& !iflag
&& !nflag
&& nvflag
&&
815 (npatterns
== 1) && (strlen(patterns
->pattern
) != 0) &&
820 * Search a newline from the beginning of the string
823 find_nl(const char *ptr
, size_t len
)
826 if (*ptr
++ == '\n') {
827 return ((char *)--ptr
);
834 * Search a newline from the end of the string
837 rfind_nl(const char *ptr
, size_t len
)
839 const char *uptr
= ptr
+ len
;
841 if (*--uptr
== '\n') {
842 return ((char *)uptr
);
849 * Duplicate the specified string converting each character
853 istrdup(const char *s1
)
855 static size_t ibuflen
= 0;
856 static char *ibuf
= NULL
;
861 if (slen
>= ibuflen
) {
862 /* ibuf does not fit to s1 */
864 ibuf
= realloc(ibuf
, ibuflen
);
866 (void) fprintf(stderr
,
867 gettext("%s: out of memory\n"), cmdname
);
874 } while (*s1
++ != '\0');
879 * Do grep on a single file.
880 * Return true in any lines matched.
882 * We have two strategies:
883 * The fast one is used when we have a single pattern with
884 * a string known to occur in the pattern. We can then
885 * do a BMG match on the whole buffer.
886 * This is an order of magnitude faster.
887 * Otherwise we split the buffer into lines,
888 * and check for a match on each line.
891 grep(int fd
, const char *fn
)
894 off_t data_len
; /* length of the data chunk */
895 off_t line_len
; /* length of the current line */
896 off_t line_offset
; /* current line's offset from the beginning */
897 off_t blkoffset
; /* line_offset but context-compatible */
898 long long lineno
, linenum
;
899 long long matches
= 0; /* Number of matching lines */
900 long long conacnt
= 0, conbcnt
= 0; /* context line count */
901 int newlinep
; /* 0 if the last line of file has no newline */
902 char *ptr
, *ptrend
, *prntptr
, *prntptrend
;
903 char *nextptr
= NULL
, *nextend
= NULL
;
904 char *conptr
= NULL
, *conptrend
= NULL
;
905 char *matchptr
= NULL
;
906 int conaprnt
= 0, conbprnt
= 0, lastmatch
= 0;
907 boolean_t nearmatch
; /* w/in N+1 of last match */
908 boolean_t havematch
= B_FALSE
; /* have a match in context */
911 if (patterns
== NULL
)
912 return (0); /* no patterns to match -- just return */
917 bmgcomp(pp
->pattern
, strlen(pp
->pattern
));
920 if (use_wchar
&& outline
== NULL
) {
921 outbuflen
= BUFSIZE
+ 1;
922 outline
= malloc(sizeof (wchar_t) * outbuflen
);
923 if (outline
== NULL
) {
924 (void) fprintf(stderr
, gettext("%s: out of memory\n"),
930 if (prntbuf
== NULL
) {
931 prntbuflen
= BUFSIZE
;
932 if ((prntbuf
= malloc(prntbuflen
+ 1)) == NULL
) {
933 (void) fprintf(stderr
, gettext("%s: out of memory\n"),
939 if (conflag
!= 0 && (conbuf
== NULL
)) {
941 if ((conbuf
= malloc(BUFSIZE
+1)) == NULL
) {
942 (void) fprintf(stderr
, gettext("%s: out of memory\n"),
948 nearmatch
= (conmatches
!= 0);
949 blkoffset
= line_offset
= 0;
958 boolean_t last_ctx
= B_FALSE
, eof
= B_FALSE
;
962 * If no data in the buffer, reset ptr
965 if (conflag
!= 0 && conptr
== NULL
) {
967 conptrend
= conptr
- 1;
970 if (ptr
== prntbuf
) {
972 * The current data chunk starts from prntbuf.
973 * This means either the buffer has no data
974 * or the buffer has no newline.
975 * So, read more data from input.
977 count
= read(fd
, ptr
+ data_len
, prntbuflen
- data_len
);
981 if (outfn
&& !rflag
) {
982 (void) fprintf(stdout
,
985 if (!qflag
&& !rflag
) {
986 (void) fprintf(stdout
, "%lld\n",
991 } else if (count
== 0) {
996 /* end of file already reached */
998 if (conptrend
>= conptr
)
1006 /* last line of file has no newline */
1007 ptrend
= ptr
+ data_len
;
1009 goto L_start_process
;
1016 * Look for newline in the chunk
1017 * between ptr + offset and ptr + data_len - offset.
1019 ptrend
= find_nl(ptr
+ offset
, data_len
- offset
);
1020 if (ptrend
== NULL
) {
1021 /* no newline found in this chunk */
1022 if (ptr
> prntbuf
) {
1024 * Move remaining data to the beginning
1026 * Remaining data lie from ptr for
1029 (void) memmove(prntbuf
, ptr
, data_len
);
1031 if (data_len
== prntbuflen
) {
1033 * Not enough room in the buffer
1035 if (prntbuflen
> SIZE_MAX
- BUFSIZE
) {
1036 (void) fprintf(stderr
,
1037 gettext("%s: buflen would"
1043 prntbuflen
+= BUFSIZE
;
1044 prntbuf
= realloc(prntbuf
, prntbuflen
+ 1);
1045 if (prntbuf
== NULL
) {
1046 (void) fprintf(stderr
,
1047 gettext("%s: out of memory\n"),
1053 /* read the next input */
1059 * Beginning of the chunk: ptr
1060 * End of the chunk: ptr + data_len
1061 * Beginning of the line: ptr
1062 * End of the line: ptrend
1064 * conptr: Beginning of the context.
1065 * conptrend: If context is empty, conptr - 1 (invalid memory).
1066 * Otherwise, Last newline in the context.
1071 * Use Boyer-Moore-Gosper algorithm to find out if
1072 * this chunk (not this line) contains the specified
1073 * pattern. If not, restart from the last line
1077 bline
= bmgexec(ptr
, ptr
+ data_len
);
1078 if (bline
== NULL
) {
1080 * No pattern found in this chunk.
1081 * Need to find the last line
1084 ptrend
= rfind_nl(ptr
, data_len
);
1087 * When this chunk does not contain newline,
1088 * ptrend becomes NULL, which should happen
1089 * when the last line of file does not end
1090 * with a newline. At such a point,
1091 * newlinep should have been set to 0.
1092 * Therefore, just after jumping to
1093 * L_skip_line, the main for-loop quits,
1094 * and the line_len value won't be
1097 line_len
= ptrend
- ptr
;
1100 if (bline
> ptrend
) {
1102 * Pattern found not in the first line
1104 * Discard the first line.
1106 line_len
= ptrend
- ptr
;
1110 * Pattern found in the first line of this chunk.
1111 * Using this result.
1114 line_len
= ptrend
- ptr
;
1117 * before jumping to L_next_line,
1118 * need to handle xflag if specified
1120 if (xflag
&& (line_len
!= bmglen
||
1121 strcmp(bmgpat
, ptr
) != 0)) {
1125 pp
= patterns
; /* to make it happen */
1131 * Line starts from ptr and ends at ptrend.
1132 * line_len will be the length of the line.
1135 line_len
= ptrend
- ptr
;
1138 * From now, the process will be performed based
1139 * on the line from ptr to ptrend.
1144 if (line_len
>= outbuflen
) {
1145 outbuflen
= line_len
+ 1;
1146 outline
= realloc(outline
,
1147 sizeof (wchar_t) * outbuflen
);
1148 if (outline
== NULL
) {
1149 (void) fprintf(stderr
,
1150 gettext("%s: out of memory\n"),
1156 len
= mbstowcs(outline
, ptr
, line_len
);
1157 if (len
== (size_t)-1) {
1158 (void) fprintf(stderr
, gettext(
1159 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1160 cmdname
, fn
, lineno
);
1161 /* never match a line with invalid sequence */
1164 outline
[len
] = L
'\0';
1168 for (cp
= outline
; *cp
!= '\0'; cp
++) {
1169 *cp
= towlower((wint_t)*cp
);
1174 for (pp
= patterns
; pp
; pp
= pp
->next
) {
1175 if (outline
[0] == pp
->wpattern
[0] &&
1177 pp
->wpattern
) == 0) {
1183 for (pp
= patterns
; pp
; pp
= pp
->next
) {
1184 if (wcswcs(outline
, pp
->wpattern
)
1192 /* fgrep in byte-oriented handling */
1195 fptr
= istrdup(ptr
);
1201 for (pp
= patterns
; pp
; pp
= pp
->next
) {
1202 if (fptr
[0] == pp
->pattern
[0] &&
1203 strcmp(fptr
, pp
->pattern
) == 0) {
1209 for (pp
= patterns
; pp
; pp
= pp
->next
) {
1210 if (strstr(fptr
, pp
->pattern
) != NULL
) {
1218 for (pp
= patterns
; pp
; pp
= pp
->next
) {
1221 rv
= regexec(&pp
->re
, ptr
, 0, NULL
, 0);
1231 (void) fprintf(stderr
, gettext(
1232 "%s: input file \"%s\": line %lld: invalid multibyte character\n"),
1233 cmdname
, fn
, lineno
);
1236 (void) regerror(rv
, &pp
->re
, errstr
,
1238 (void) fprintf(stderr
, gettext(
1239 "%s: input file \"%s\": line %lld: %s\n"),
1240 cmdname
, fn
, lineno
, errstr
);
1247 * Context is set up as follows:
1248 * For a 'Before' context, we maintain a set of pointers
1249 * containing 'N' lines of context. If the current number of
1250 * lines contained is greater than N, and N isn't a match, the
1251 * start pointer is moved forward to the next newline.
1253 * If we ever find a match, we print out immediately.
1254 * 'nearmatch' tells us if we're within N+1 lines of the last
1255 * match ; if we are, and we find another match, we don't
1256 * separate the matches. 'nearmatch' becomes false when
1257 * a line gets rotated out of the context.
1259 * For an 'After' context, we simply wait until we've found a
1260 * match, then create a context N+1 lines big. If we don't find
1261 * a match within the context, we print out the current context.
1262 * Otherwise, we save a reference to the new matching line,
1263 * print out the other context, and reset our context pointers
1264 * to the new matching line.
1266 * 'nearmatch' becomes false when we find a non-matching line
1267 * that isn't a part of any context.
1269 * A full-context is implemented as a combination of the
1270 * 'Before' and 'After' context logic. Before we find a match,
1271 * we follow the Before logic. When we find a match, we
1272 * follow the After logic. 'nearmatch' is handled by the Before
1279 /* Do we have room to add this line to the context buffer? */
1280 if ((line_len
+ 1) > (conbuflen
-
1281 (conptrend
>= conptr
) ? conptrend
- conbuf
: 0)) {
1282 char *oldconbuf
= conbuf
;
1283 char *oldconptr
= conptr
;
1284 long tmp
= matchptr
- conptr
;
1286 if (conbuflen
> SIZE_MAX
- BUFSIZE
) {
1287 (void) fprintf(stderr
,
1288 gettext("%s: buflen would overflow\n"),
1293 conbuflen
+= BUFSIZE
;
1294 conbuf
= realloc(conbuf
, conbuflen
+ 1);
1295 if (conbuf
== NULL
) {
1296 (void) fprintf(stderr
,
1297 gettext("%s: out of memory\n"),
1302 conptr
= conbuf
+ (conptr
- oldconbuf
);
1303 conptrend
= conptr
+ (conptrend
- oldconptr
);
1305 matchptr
= conptr
+ tmp
;
1307 (void) memcpy(conptrend
+ 1, ptr
, line_len
);
1308 conptrend
+= line_len
+ 1;
1311 if (nvflag
== (pp
!= NULL
)) {
1314 if ((conflag
& AFTER
) != 0) {
1316 nextend
= conptrend
;
1317 conptrend
= conptr
+ lastmatch
;
1318 nextptr
= conptrend
+ 1;
1322 if (conflag
== AFTER
) {
1323 conptr
= conptrend
- (line_len
);
1326 blkoffset
= line_offset
-
1327 (conptrend
- conptr
- line_len
);
1330 if (conflag
== BEFORE
)
1333 lastmatch
= conptrend
- conptr
;
1339 if ((conflag
& BEFORE
) != 0) {
1340 if (conbcnt
>= conblen
) {
1342 conptr
= find_nl(conptr
,
1343 conptrend
- conptr
) + 1;
1345 blkoffset
+= conptr
- tmp
;
1352 if (conflag
== AFTER
)
1355 if (++conacnt
>= conalen
&& !conaprnt
&& conalen
)
1358 lastmatch
= conptrend
- conptr
;
1363 * Here, if pp points to non-NULL, something has been matched
1366 if (!last_ctx
&& nvflag
== (pp
!= NULL
)) {
1369 matchptr
= (conflag
!= 0) ? conptrend
: ptrend
;
1373 * Set up some print context so that we can treat
1374 * single-line matches as a zero-N context.
1375 * Apply CLI flags to each line of the context.
1377 * For context, we only print if we both have a match and are
1378 * either at the end of the data stream, or we've previously
1379 * declared that we want to print for a particular context.
1381 if (havematch
&& (eof
|| conaprnt
|| conbprnt
)) {
1384 * We'd normally do this earlier, but we had to
1385 * escape early because we reached the end of the data.
1388 conptrend
= nextend
;
1390 prntlen
= conptrend
- conptr
+ 1;
1392 if (conmatches
++ && nearmatch
&& !cflag
)
1393 (void) fwrite("--\n", 1, 3, stdout
);
1394 } else if (conflag
== 0 && nvflag
== (pp
!= NULL
)) {
1396 prntlen
= line_len
+ 1;
1399 blkoffset
= line_offset
;
1401 /* No match and no more data */
1404 /* No match, or we're not done building context */
1408 prntptrend
= prntptr
- 1;
1409 while ((prntptrend
= find_nl(prntptrend
+ 1,
1410 prntlen
)) != NULL
) {
1413 * GNU grep uses '-' for context lines and ':' for
1414 * matching lines, so replicate that here.
1416 if (prntptrend
== matchptr
) {
1417 if (eof
&& nextptr
) {
1429 * Handle q, l, and c flags.
1432 /* no need to continue */
1434 * End of this line is ptrend.
1435 * We have read up to ptr + data_len.
1438 pos
= ptr
+ data_len
- (ptrend
+ 1);
1439 (void) lseek(fd
, -pos
, SEEK_CUR
);
1443 (void) printf("%s\n", fn
);
1447 if (Hflag
|| outfn
) {
1448 (void) printf("%s%c", fn
, separate
);
1451 (void) printf("%lld%c", (offset_t
)
1452 (blkoffset
/ BSIZE
), separate
);
1455 (void) printf("%lld%c", linenum
,
1458 (void) fwrite(prntptr
, 1,
1459 prntptrend
- prntptr
+ 1, stdout
);
1461 if (ferror(stdout
)) {
1465 prntlen
-= prntptrend
- prntptr
+ 1;
1466 blkoffset
+= prntptrend
- prntptr
+ 1;
1467 prntptr
= prntptrend
+ 1;
1474 * Update context buffer and variables post-print
1478 conaprnt
= conbprnt
= 0;
1479 nearmatch
= B_FALSE
;
1480 conacnt
= conbcnt
= 0;
1483 (void) memmove(conbuf
, nextptr
,
1484 nextend
- nextptr
+ 1);
1485 blkoffset
+= nextptr
- conptrend
- 1;
1486 conptrend
= conptr
+ (nextend
- nextptr
);
1487 matchptr
= conptrend
;
1489 lastmatch
= conptrend
- conptr
;
1492 conptrend
= conptr
- 1;
1495 havematch
= B_FALSE
;
1497 nextptr
= nextend
= NULL
;
1504 data_len
-= line_len
+ 1;
1505 line_offset
+= line_len
+ 1;
1511 if (Hflag
|| outfn
) {
1512 (void) printf("%s:", fn
);
1515 (void) printf("%lld\n", matches
);
1518 return (matches
!= 0);
1522 * usage message for grep
1527 (void) fprintf(stderr
, gettext("usage: %5s"), cmdname
);
1528 if (!egrep
&& !fgrep
)
1529 (void) fprintf(stderr
, gettext(" [-E|-F]"));
1530 (void) fprintf(stderr
, gettext(" [-bchHilnqrRsvx] [-A num] [-B num] "
1531 "[-C num|-num]\n [-e pattern_list]... "
1532 "[-f pattern_file]... [pattern_list] [file]...\n"));
1538 * Compile literal pattern into BMG tables
1541 bmgcomp(char *pat
, int len
)
1545 unsigned char *uc
= (unsigned char *)pat
;
1550 for (i
= 0; i
< M_CSETSIZE
; i
++) {
1555 for (tlen
= len
, i
= 0; i
<= len
; i
++, tlen
--) {
1556 bmgtab
[*uc
++] = tlen
;
1564 bmgexec(char *str
, char *end
)
1569 k
= str
+ bmglen
- 1;
1571 return (memchr(str
, bmgpat
[0], end
- str
));
1574 /* inner loop, should be most optimized */
1575 while (k
< end
&& (t
= bmgtab
[(unsigned char)*k
]) != 0) {
1581 for (s
= k
, p
= bmgpat
+ bmglen
- 1; *--s
== *--p
; ) {