1 /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/types.h>
49 #ifndef WITHOUT_FASTMATCH
50 #include "fastmatch.h"
60 * Default messags to use when NLS is disabled or no catalogue
63 const char *errstr
[] = {
65 /* 1*/ "(standard input)",
66 /* 2*/ "cannot read bzip2 compressed file",
67 /* 3*/ "unknown %s option",
68 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n",
69 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
70 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
71 /* 7*/ "\t[--null] [pattern] [file ...]\n",
72 /* 8*/ "Binary file %s matches\n",
73 /* 9*/ "%s (BSD grep) %s\n",
74 /* 10*/ "%s (BSD grep, GNU compatible) %s\n",
77 /* Flags passed to regcomp() and regexec() */
78 int cflags
= REG_NOSUB
| REG_NEWLINE
;
79 int eflags
= REG_STARTEND
;
81 /* XXX TODO: Get rid of this flag.
82 * matchall is a gross hack that means that an empty pattern was passed to us.
83 * It is a necessary evil at the moment because our regex(3) implementation
84 * does not allow for empty patterns, as supported by POSIX's definition of
85 * grammar for BREs/EREs. When libregex becomes available, it would be wise
86 * to remove this and let regex(3) handle the dirty details of empty patterns.
90 /* Searching patterns */
91 unsigned int patterns
;
92 static unsigned int pattern_sz
;
95 #ifndef WITHOUT_FASTMATCH
96 fastmatch_t
*fg_pattern
;
99 /* Filename exclusion/inclusion patterns */
100 unsigned int fpatterns
, dpatterns
;
101 static unsigned int fpattern_sz
, dpattern_sz
;
102 struct epat
*dpattern
, *fpattern
;
104 /* For regex errors */
105 char re_error
[RE_ERROR_BUF
+ 1];
107 /* Command-line flags */
108 long long Aflag
; /* -A x: print x lines trailing each match */
109 long long Bflag
; /* -B x: print x lines leading each match */
110 bool Hflag
; /* -H: always print file name */
111 bool Lflag
; /* -L: only show names of files with no matches */
112 bool bflag
; /* -b: show block numbers for each match */
113 bool cflag
; /* -c: only show a count of matching lines */
114 bool hflag
; /* -h: don't print filename headers */
115 bool iflag
; /* -i: ignore case */
116 bool lflag
; /* -l: only show names of files with matches */
117 bool mflag
; /* -m x: stop reading the files after x matches */
118 long long mcount
; /* count for -m */
119 long long mlimit
; /* requested value for -m */
120 char fileeol
; /* indicator for eol */
121 bool nflag
; /* -n: show line numbers in front of matching lines */
122 bool oflag
; /* -o: print only matching part */
123 bool qflag
; /* -q: quiet mode (don't output anything) */
124 bool sflag
; /* -s: silent mode (ignore errors) */
125 bool vflag
; /* -v: only show non-matching lines */
126 bool wflag
; /* -w: pattern must start and end on word boundaries */
127 bool xflag
; /* -x: pattern must match entire line */
128 bool lbflag
; /* --line-buffered */
129 bool nullflag
; /* --null */
130 char *label
; /* --label */
131 const char *color
; /* --color */
132 int grepbehave
= GREP_BASIC
; /* -EFGP: type of the regex */
133 int binbehave
= BINFILE_BIN
; /* -aIU: handling of binary files */
134 int filebehave
= FILE_STDIO
; /* -JZ: normal, gzip or bzip2 file */
135 int devbehave
= DEV_READ
; /* -D: handling of devices */
136 int dirbehave
= DIR_READ
; /* -dRr: handling of directories */
137 int linkbehave
= LINK_READ
; /* -OpS: handling of symlinks */
139 bool dexclude
, dinclude
; /* --exclude-dir and --include-dir */
140 bool fexclude
, finclude
; /* --exclude and --include */
143 BIN_OPT
= CHAR_MAX
+ 1,
156 static inline const char *init_color(const char *);
159 bool file_err
; /* file reading error */
162 * Prints usage information and returns 2.
167 fprintf(stderr
, getstr(4), getprogname());
168 fprintf(stderr
, "%s", getstr(5));
169 fprintf(stderr
, "%s", getstr(6));
170 fprintf(stderr
, "%s", getstr(7));
174 static const char *optstr
= "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXyz";
176 static const struct option long_options
[] =
178 {"binary-files", required_argument
, NULL
, BIN_OPT
},
179 {"help", no_argument
, NULL
, HELP_OPT
},
180 {"mmap", no_argument
, NULL
, MMAP_OPT
},
181 {"line-buffered", no_argument
, NULL
, LINEBUF_OPT
},
182 {"label", required_argument
, NULL
, LABEL_OPT
},
183 {"null", no_argument
, NULL
, NULL_OPT
},
184 {"color", optional_argument
, NULL
, COLOR_OPT
},
185 {"colour", optional_argument
, NULL
, COLOR_OPT
},
186 {"exclude", required_argument
, NULL
, R_EXCLUDE_OPT
},
187 {"include", required_argument
, NULL
, R_INCLUDE_OPT
},
188 {"exclude-dir", required_argument
, NULL
, R_DEXCLUDE_OPT
},
189 {"include-dir", required_argument
, NULL
, R_DINCLUDE_OPT
},
190 {"after-context", required_argument
, NULL
, 'A'},
191 {"text", no_argument
, NULL
, 'a'},
192 {"before-context", required_argument
, NULL
, 'B'},
193 {"byte-offset", no_argument
, NULL
, 'b'},
194 {"context", optional_argument
, NULL
, 'C'},
195 {"count", no_argument
, NULL
, 'c'},
196 {"devices", required_argument
, NULL
, 'D'},
197 {"directories", required_argument
, NULL
, 'd'},
198 {"extended-regexp", no_argument
, NULL
, 'E'},
199 {"regexp", required_argument
, NULL
, 'e'},
200 {"fixed-strings", no_argument
, NULL
, 'F'},
201 {"file", required_argument
, NULL
, 'f'},
202 {"basic-regexp", no_argument
, NULL
, 'G'},
203 {"no-filename", no_argument
, NULL
, 'h'},
204 {"with-filename", no_argument
, NULL
, 'H'},
205 {"ignore-case", no_argument
, NULL
, 'i'},
206 {"bz2decompress", no_argument
, NULL
, 'J'},
207 {"files-with-matches", no_argument
, NULL
, 'l'},
208 {"files-without-match", no_argument
, NULL
, 'L'},
209 {"max-count", required_argument
, NULL
, 'm'},
210 {"lzma", no_argument
, NULL
, 'M'},
211 {"line-number", no_argument
, NULL
, 'n'},
212 {"only-matching", no_argument
, NULL
, 'o'},
213 {"quiet", no_argument
, NULL
, 'q'},
214 {"silent", no_argument
, NULL
, 'q'},
215 {"recursive", no_argument
, NULL
, 'r'},
216 {"no-messages", no_argument
, NULL
, 's'},
217 {"binary", no_argument
, NULL
, 'U'},
218 {"unix-byte-offsets", no_argument
, NULL
, 'u'},
219 {"invert-match", no_argument
, NULL
, 'v'},
220 {"version", no_argument
, NULL
, 'V'},
221 {"word-regexp", no_argument
, NULL
, 'w'},
222 {"line-regexp", no_argument
, NULL
, 'x'},
223 {"xz", no_argument
, NULL
, 'X'},
224 {"null-data", no_argument
, NULL
, 'z'},
225 {"decompress", no_argument
, NULL
, 'Z'},
226 {NULL
, no_argument
, NULL
, 0}
230 * Adds a searching pattern to the internal array.
233 add_pattern(char *pat
, size_t len
)
236 /* Do not add further pattern is we already match everything */
240 /* Check if we can do a shortcut */
243 for (unsigned int i
= 0; i
< patterns
; i
++) {
244 free(pattern
[i
].pat
);
246 pattern
= grep_realloc(pattern
, sizeof(struct pat
));
247 pattern
[0].pat
= NULL
;
252 /* Increase size if necessary */
253 if (patterns
== pattern_sz
) {
255 pattern
= grep_realloc(pattern
, ++pattern_sz
*
258 if (len
> 0 && pat
[len
- 1] == '\n')
260 /* pat may not be NUL-terminated */
261 pattern
[patterns
].pat
= grep_malloc(len
+ 1);
262 memcpy(pattern
[patterns
].pat
, pat
, len
);
263 pattern
[patterns
].len
= len
;
264 pattern
[patterns
].pat
[len
] = '\0';
269 * Adds a file include/exclude pattern to the internal array.
272 add_fpattern(const char *pat
, int mode
)
275 /* Increase size if necessary */
276 if (fpatterns
== fpattern_sz
) {
278 fpattern
= grep_realloc(fpattern
, ++fpattern_sz
*
279 sizeof(struct epat
));
281 fpattern
[fpatterns
].pat
= grep_strdup(pat
);
282 fpattern
[fpatterns
].mode
= mode
;
287 * Adds a directory include/exclude pattern to the internal array.
290 add_dpattern(const char *pat
, int mode
)
293 /* Increase size if necessary */
294 if (dpatterns
== dpattern_sz
) {
296 dpattern
= grep_realloc(dpattern
, ++dpattern_sz
*
297 sizeof(struct epat
));
299 dpattern
[dpatterns
].pat
= grep_strdup(pat
);
300 dpattern
[dpatterns
].mode
= mode
;
305 * Reads searching patterns from a file and adds them with add_pattern().
308 read_patterns(const char *fn
)
316 if ((f
= fopen(fn
, "r")) == NULL
)
318 if ((fstat(fileno(f
), &st
) == -1) || (S_ISDIR(st
.st_mode
))) {
324 while ((rlen
= getline(&line
, &len
, f
)) != -1) {
327 add_pattern(line
, line
[0] == '\n' ? 0 : (size_t)rlen
);
336 static inline const char *
337 init_color(const char *d
)
341 c
= getenv("GREP_COLOR");
342 return (c
!= NULL
&& c
[0] != '\0' ? c
: d
);
346 main(int argc
, char *argv
[])
348 char **aargv
, **eargv
, *eopts
;
352 unsigned int aargc
, eargc
, i
;
353 int c
, lastc
, needpattern
, newarg
, prevoptind
;
355 setlocale(LC_ALL
, "");
358 catalog
= catopen("grep", NL_CAT_LOCALE
);
361 /* Check what is the program name of the binary. In this
362 way we can have all the funcionalities in one binary
363 without the need of scripting and using ugly hacks. */
365 if (pn
[0] == 'b' && pn
[1] == 'z') {
366 filebehave
= FILE_BZIP
;
368 } else if (pn
[0] == 'x' && pn
[1] == 'z') {
369 filebehave
= FILE_XZ
;
371 } else if (pn
[0] == 'l' && pn
[1] == 'z') {
372 filebehave
= FILE_LZMA
;
374 } else if (pn
[0] == 'r') {
375 dirbehave
= DIR_RECURSE
;
377 } else if (pn
[0] == 'z') {
378 filebehave
= FILE_GZIP
;
383 grepbehave
= GREP_EXTENDED
;
386 grepbehave
= GREP_FIXED
;
396 eopts
= getenv("GREP_OPTIONS");
398 /* support for extra arguments in GREP_OPTIONS */
400 if (eopts
!= NULL
&& eopts
[0] != '\0') {
403 /* make an estimation of how many extra arguments we have */
404 for (unsigned int j
= 0; j
< strlen(eopts
); j
++)
408 eargv
= (char **)grep_malloc(sizeof(char *) * (eargc
+ 1));
411 /* parse extra arguments */
412 while ((str
= strsep(&eopts
, " ")) != NULL
)
414 eargv
[eargc
++] = grep_strdup(str
);
416 aargv
= (char **)grep_calloc(eargc
+ argc
+ 1,
420 for (i
= 0; i
< eargc
; i
++)
421 aargv
[i
+ 1] = eargv
[i
];
422 for (int j
= 1; j
< argc
; j
++, i
++)
423 aargv
[i
+ 1] = argv
[j
];
425 aargc
= eargc
+ argc
;
431 while (((c
= getopt_long(aargc
, aargv
, optstr
, long_options
, NULL
)) !=
434 case '0': case '1': case '2': case '3': case '4':
435 case '5': case '6': case '7': case '8': case '9':
436 if (newarg
|| !isdigit(lastc
))
438 else if (Aflag
> LLONG_MAX
/ 10 - 1) {
443 Aflag
= Bflag
= (Aflag
* 10) + (c
- '0');
446 if (optarg
== NULL
) {
455 l
= strtoll(optarg
, &ep
, 10);
456 if (errno
== ERANGE
|| errno
== EINVAL
)
458 else if (ep
[0] != '\0') {
463 err(2, "context argument must be non-negative");
474 binbehave
= BINFILE_TEXT
;
483 if (strcasecmp(optarg
, "skip") == 0)
484 devbehave
= DEV_SKIP
;
485 else if (strcasecmp(optarg
, "read") == 0)
486 devbehave
= DEV_READ
;
488 errx(2, getstr(3), "--devices");
491 if (strcasecmp("recurse", optarg
) == 0) {
493 dirbehave
= DIR_RECURSE
;
494 } else if (strcasecmp("skip", optarg
) == 0)
495 dirbehave
= DIR_SKIP
;
496 else if (strcasecmp("read", optarg
) == 0)
497 dirbehave
= DIR_READ
;
499 errx(2, getstr(3), "--directories");
502 grepbehave
= GREP_EXTENDED
;
507 char *string
= optarg
;
509 while ((token
= strsep(&string
, "\n")) != NULL
)
510 add_pattern(token
, strlen(token
));
515 grepbehave
= GREP_FIXED
;
518 read_patterns(optarg
);
522 grepbehave
= GREP_BASIC
;
532 binbehave
= BINFILE_SKIP
;
542 err(2, "bzip2 support was disabled at compile-time");
544 filebehave
= FILE_BZIP
;
557 mlimit
= mcount
= strtoll(optarg
, &ep
, 10);
558 if (((errno
== ERANGE
) && (mcount
== LLONG_MAX
)) ||
559 ((errno
== EINVAL
) && (mcount
== 0)))
561 else if (ep
[0] != '\0') {
567 filebehave
= FILE_LZMA
;
573 linkbehave
= LINK_EXPLICIT
;
577 cflags
&= ~REG_NOSUB
;
580 linkbehave
= LINK_SKIP
;
586 linkbehave
= LINK_READ
;
590 dirbehave
= DIR_RECURSE
;
597 binbehave
= BINFILE_BIN
;
601 filebehave
= FILE_MMAP
;
605 printf(getstr(10), getprogname(), VERSION
);
607 printf(getstr(9), getprogname(), VERSION
);
615 cflags
&= ~REG_NOSUB
;
619 cflags
&= ~REG_NOSUB
;
622 filebehave
= FILE_XZ
;
628 filebehave
= FILE_GZIP
;
631 if (strcasecmp("binary", optarg
) == 0)
632 binbehave
= BINFILE_BIN
;
633 else if (strcasecmp("without-match", optarg
) == 0)
634 binbehave
= BINFILE_SKIP
;
635 else if (strcasecmp("text", optarg
) == 0)
636 binbehave
= BINFILE_TEXT
;
638 errx(2, getstr(3), "--binary-files");
642 if (optarg
== NULL
|| strcasecmp("auto", optarg
) == 0 ||
643 strcasecmp("tty", optarg
) == 0 ||
644 strcasecmp("if-tty", optarg
) == 0) {
647 term
= getenv("TERM");
648 if (isatty(STDOUT_FILENO
) && term
!= NULL
&&
649 strcasecmp(term
, "dumb") != 0)
650 color
= init_color("01;31");
651 } else if (strcasecmp("always", optarg
) == 0 ||
652 strcasecmp("yes", optarg
) == 0 ||
653 strcasecmp("force", optarg
) == 0) {
654 color
= init_color("01;31");
655 } else if (strcasecmp("never", optarg
) != 0 &&
656 strcasecmp("none", optarg
) != 0 &&
657 strcasecmp("no", optarg
) != 0)
658 errx(2, getstr(3), "--color");
659 cflags
&= ~REG_NOSUB
;
672 add_fpattern(optarg
, INCL_PAT
);
676 add_fpattern(optarg
, EXCL_PAT
);
680 add_dpattern(optarg
, INCL_PAT
);
684 add_dpattern(optarg
, EXCL_PAT
);
691 newarg
= optind
!= prevoptind
;
697 /* Empty pattern file matches nothing */
698 if (!needpattern
&& (patterns
== 0))
701 /* Fail if we don't have any pattern */
702 if (aargc
== 0 && needpattern
)
705 /* Process patterns from command line */
706 if (aargc
!= 0 && needpattern
) {
708 char *string
= *aargv
;
710 while ((token
= strsep(&string
, "\n")) != NULL
)
711 add_pattern(token
, strlen(token
));
716 switch (grepbehave
) {
721 * regex(3) implementations that support fixed-string searches generally
722 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
723 * here. If neither are defined, GREP_FIXED later implies that the
724 * internal literal matcher should be used. Other cflags that have
725 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
726 * similarly added here, and grep.h should be amended to take this into
727 * consideration when defining WITH_INTERNAL_NOSPEC.
729 #if defined(REG_NOSPEC)
730 cflags
|= REG_NOSPEC
;
731 #elif defined(REG_LITERAL)
732 cflags
|= REG_LITERAL
;
736 cflags
|= REG_EXTENDED
;
743 #ifndef WITHOUT_FASTMATCH
744 fg_pattern
= grep_calloc(patterns
, sizeof(*fg_pattern
));
746 r_pattern
= grep_calloc(patterns
, sizeof(*r_pattern
));
748 /* Don't process any patterns if we have a blank one */
749 #ifdef WITH_INTERNAL_NOSPEC
750 if (!matchall
&& grepbehave
!= GREP_FIXED
) {
754 /* Check if cheating is allowed (always is for fgrep). */
755 for (i
= 0; i
< patterns
; ++i
) {
756 #ifndef WITHOUT_FASTMATCH
758 * Attempt compilation with fastmatch regex and
759 * fallback to regex(3) if it fails.
761 if (fastncomp(&fg_pattern
[i
], pattern
[i
].pat
,
762 pattern
[i
].len
, cflags
) == 0)
765 c
= regcomp(&r_pattern
[i
], pattern
[i
].pat
, cflags
);
767 regerror(c
, &r_pattern
[i
], re_error
,
769 errx(2, "%s", re_error
);
777 if ((aargc
== 0 || aargc
== 1) && !Hflag
)
780 if (aargc
== 0 && dirbehave
!= DIR_RECURSE
)
781 exit(!procfile("-"));
783 if (dirbehave
== DIR_RECURSE
)
784 c
= grep_tree(aargv
);
786 for (c
= 0; aargc
--; ++aargv
) {
787 if ((finclude
|| fexclude
) && !file_matching(*aargv
))
789 c
+= procfile(*aargv
);
796 /* Find out the correct return value according to the
797 results and the command line option. */
798 exit(c
? (file_err
? (qflag
? 0 : 2) : 0) : (file_err
? 2 : 1));