From FreeBSD:
[dragonfly/vkernel-mp.git] / gnu / usr.bin / grep / grep.c
blob7f9114f295fc660707ea50ce7e82797625ab401e
1 /* grep.c - main driver file for grep.
2 Copyright (C) 1992, 1997, 1998, 1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17 02111-1307, USA. */
19 /* Written July 1992 by Mike Haertel. */
20 /* Builtin decompression 1997 by Wolfram Schneider <wosch@FreeBSD.org>. */
22 /* $FreeBSD: src/gnu/usr.bin/grep/grep.c,v 1.20.2.1 2000/06/13 07:17:27 ru Exp $ */
23 /* $DragonFly: src/gnu/usr.bin/grep/grep.c,v 1.3 2005/10/08 13:28:23 corecode Exp $ */
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #if defined(HAVE_MMAP)
31 # include <sys/mman.h>
32 #endif
33 #if defined(HAVE_SETRLIMIT)
34 # include <sys/time.h>
35 # include <sys/resource.h>
36 #endif
37 #include <stdio.h>
38 #include "system.h"
39 #include "getopt.h"
40 #include "getpagesize.h"
41 #include "grep.h"
42 #include "savedir.h"
44 #undef MAX
45 #define MAX(A,B) ((A) > (B) ? (A) : (B))
47 struct stats
49 struct stats *parent;
50 struct stat stat;
53 /* base of chain of stat buffers, used to detect directory loops */
54 static struct stats stats_base;
56 /* if non-zero, display usage information and exit */
57 static int show_help;
59 /* If non-zero, print the version on standard output and exit. */
60 static int show_version;
62 /* If nonzero, use mmap if possible. */
63 static int mmap_option;
65 /* If zero, output nulls after filenames. */
66 static int filename_mask;
68 /* Short options. */
69 static char const short_options[] =
70 "0123456789A:B:C::EFGHIORUVX:abcd:e:f:hiLlnqrsuvwxyZz";
72 /* Non-boolean long options that have no corresponding short equivalents. */
73 enum
75 BINARY_FILES_OPTION = CHAR_MAX + 1
78 /* Long options equivalences. */
79 static struct option long_options[] =
81 {"after-context", required_argument, NULL, 'A'},
82 {"basic-regexp", no_argument, NULL, 'G'},
83 {"before-context", required_argument, NULL, 'B'},
84 {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
85 {"byte-offset", no_argument, NULL, 'b'},
86 {"context", optional_argument, NULL, 'C'},
87 {"count", no_argument, NULL, 'c'},
88 {"directories", required_argument, NULL, 'd'},
89 {"extended-regexp", no_argument, NULL, 'E'},
90 {"file", required_argument, NULL, 'f'},
91 {"files-with-matches", no_argument, NULL, 'l'},
92 {"files-without-match", no_argument, NULL, 'L'},
93 {"fixed-regexp", no_argument, NULL, 'F'},
94 {"fixed-strings", no_argument, NULL, 'F'},
95 {"help", no_argument, &show_help, 1},
96 {"ignore-case", no_argument, NULL, 'i'},
97 {"line-number", no_argument, NULL, 'n'},
98 {"line-regexp", no_argument, NULL, 'x'},
99 {"mmap", no_argument, &mmap_option, 1},
100 {"no-filename", no_argument, NULL, 'h'},
101 {"no-messages", no_argument, NULL, 's'},
102 #if HAVE_LIBZ > 0
103 {"decompress", no_argument, NULL, 'Z'},
104 {"null", no_argument, &filename_mask, 0},
105 #else
106 {"null", no_argument, NULL, 'Z'},
107 #endif
108 {"null-data", no_argument, NULL, 'z'},
109 {"only-files", no_argument, NULL, 'O'},
110 {"quiet", no_argument, NULL, 'q'},
111 {"recursive", no_argument, NULL, 'r'},
112 {"regexp", required_argument, NULL, 'e'},
113 {"invert-match", no_argument, NULL, 'v'},
114 {"silent", no_argument, NULL, 'q'},
115 {"text", no_argument, NULL, 'a'},
116 {"binary", no_argument, NULL, 'U'},
117 {"unix-byte-offsets", no_argument, NULL, 'u'},
118 {"version", no_argument, NULL, 'V'},
119 {"with-filename", no_argument, NULL, 'H'},
120 {"word-regexp", no_argument, NULL, 'w'},
121 {0, 0, 0, 0}
124 /* Define flags declared in grep.h. */
125 char const *matcher;
126 int match_icase;
127 int match_words;
128 int match_lines;
129 unsigned char eolbyte;
131 /* For error messages. */
132 static char *prog;
133 static char const *filename;
134 static int errseen;
136 /* How to handle directories. */
137 static enum
139 READ_DIRECTORIES,
140 RECURSE_DIRECTORIES,
141 SKIP_DIRECTORIES
142 } directories;
144 /* How to dir/device/links. */
145 static int only_files;
147 static int ck_atoi PARAMS ((char const *, int *));
148 static void usage PARAMS ((int)) __attribute__((noreturn));
149 static void error PARAMS ((const char *, int));
150 static void setmatcher PARAMS ((char const *));
151 static int install_matcher PARAMS ((char const *));
152 static int prepend_args PARAMS ((char const *, char *, char **));
153 static void prepend_default_options PARAMS ((char const *, int *, char ***));
154 static char *page_alloc PARAMS ((size_t, char **));
155 static int reset PARAMS ((int, char const *, struct stats *));
156 static int fillbuf PARAMS ((size_t, struct stats *));
157 static int grepbuf PARAMS ((char *, char *));
158 static void prtext PARAMS ((char *, char *, int *));
159 static void prpending PARAMS ((char *));
160 static void prline PARAMS ((char *, char *, int));
161 static void print_offset_sep PARAMS ((off_t, int));
162 static void nlscan PARAMS ((char *));
163 static int grep PARAMS ((int, char const *, struct stats *));
164 static int grepdir PARAMS ((char const *, struct stats *));
165 static int grepfile PARAMS ((char const *, struct stats *));
166 #if O_BINARY
167 static inline int undossify_input PARAMS ((register char *, size_t));
168 #endif
170 /* Functions we'll use to search. */
171 static void (*compile) PARAMS ((char *, size_t));
172 static char *(*execute) PARAMS ((char *, size_t, char **));
174 /* Print a message and possibly an error string. Remember
175 that something awful happened. */
176 static void
177 error (const char *mesg, int errnum)
179 if (errnum)
180 fprintf (stderr, "%s: %s: %s\n", prog, mesg, strerror (errnum));
181 else
182 fprintf (stderr, "%s: %s\n", prog, mesg);
183 errseen = 1;
186 /* Like error (), but die horribly after printing. */
187 void
188 fatal (const char *mesg, int errnum)
190 error (mesg, errnum);
191 exit (2);
194 /* Interface to handle errors and fix library lossage. */
195 char *
196 xmalloc (size_t size)
198 char *result;
200 result = malloc (size);
201 if (size && !result)
202 fatal (_("memory exhausted"), 0);
203 return result;
206 /* Interface to handle errors and fix some library lossage. */
207 char *
208 xrealloc (char *ptr, size_t size)
210 char *result;
212 if (ptr)
213 result = realloc (ptr, size);
214 else
215 result = malloc (size);
216 if (size && !result)
217 fatal (_("memory exhausted"), 0);
218 return result;
221 /* Convert STR to a positive integer, storing the result in *OUT.
222 If STR is not a valid integer, return -1 (otherwise 0). */
223 static int
224 ck_atoi (char const *str, int *out)
226 char const *p;
227 for (p = str; *p; p++)
228 if (*p < '0' || *p > '9')
229 return -1;
231 *out = atoi (optarg);
232 return 0;
236 /* Hairy buffering mechanism for grep. The intent is to keep
237 all reads aligned on a page boundary and multiples of the
238 page size. */
240 static char *ubuffer; /* Unaligned base of buffer. */
241 static char *buffer; /* Base of buffer. */
242 static size_t bufsalloc; /* Allocated size of buffer save region. */
243 static size_t bufalloc; /* Total buffer size. */
244 #define PREFERRED_SAVE_FACTOR 5 /* Preferred value of bufalloc / bufsalloc. */
245 static int bufdesc; /* File descriptor. */
246 static char *bufbeg; /* Beginning of user-visible stuff. */
247 static char *buflim; /* Limit of user-visible stuff. */
248 static size_t pagesize; /* alignment of memory pages */
249 static off_t bufoffset; /* Read offset; defined on regular files. */
251 #if defined(HAVE_MMAP)
252 static int bufmapped; /* True if buffer is memory-mapped. */
253 static off_t initial_bufoffset; /* Initial value of bufoffset. */
254 #endif
256 #if HAVE_LIBZ > 0
257 #include <zlib.h>
258 static gzFile gzbufdesc; /* zlib file descriptor. */
259 static int Zflag; /* uncompress before searching. */
260 #endif
262 /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
263 an integer or a pointer. Both args must be free of side effects. */
264 #define ALIGN_TO(val, alignment) \
265 ((size_t) (val) % (alignment) == 0 \
266 ? (val) \
267 : (val) + ((alignment) - (size_t) (val) % (alignment)))
269 /* Return the address of a page-aligned buffer of size SIZE,
270 reallocating it from *UP. Set *UP to the newly allocated (but
271 possibly unaligned) buffer used to build the aligned buffer. To
272 free the buffer, free (*UP). */
273 static char *
274 page_alloc (size_t size, char **up)
276 size_t asize = size + pagesize - 1;
277 if (size <= asize)
279 char *p = *up ? realloc (*up, asize) : malloc (asize);
280 if (p)
282 *up = p;
283 return ALIGN_TO (p, pagesize);
286 return NULL;
289 /* Reset the buffer for a new file, returning zero if we should skip it.
290 Initialize on the first time through. */
291 static int
292 reset (int fd, char const *file, struct stats *stats)
294 if (pagesize)
295 bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize);
296 else
298 size_t ubufsalloc;
299 pagesize = getpagesize ();
300 if (pagesize == 0)
301 abort ();
302 #ifndef BUFSALLOC
303 ubufsalloc = MAX (8192, pagesize);
304 #else
305 ubufsalloc = BUFSALLOC;
306 #endif
307 bufsalloc = ALIGN_TO (ubufsalloc, pagesize);
308 bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
309 /* The 1 byte of overflow is a kludge for dfaexec(), which
310 inserts a sentinel newline at the end of the buffer
311 being searched. There's gotta be a better way... */
312 if (bufsalloc < ubufsalloc
313 || bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc
314 || bufalloc + 1 < bufalloc
315 || ! (buffer = page_alloc (bufalloc + 1, &ubuffer)))
316 fatal (_("memory exhausted"), 0);
318 #if HAVE_LIBZ > 0
319 if (Zflag)
321 gzbufdesc = gzdopen(fd, "r");
322 if (gzbufdesc == NULL)
323 fatal(_("memory exhausted"), 0);
325 #endif
327 buflim = buffer;
328 bufdesc = fd;
330 if (fstat (fd, &stats->stat) != 0)
332 error ("fstat", errno);
333 return 0;
335 if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
336 return 0;
337 if (
338 #if HAVE_LIBZ > 0
339 Zflag ||
340 #endif
341 S_ISREG (stats->stat.st_mode))
343 if (file)
344 bufoffset = 0;
345 else
347 bufoffset = lseek (fd, 0, SEEK_CUR);
348 if (bufoffset < 0)
350 error ("lseek", errno);
351 return 0;
354 #ifdef HAVE_MMAP
355 initial_bufoffset = bufoffset;
356 bufmapped = mmap_option && bufoffset % pagesize == 0;
357 #endif
359 else
361 #ifdef HAVE_MMAP
362 bufmapped = 0;
363 #endif
365 return 1;
368 /* Read new stuff into the buffer, saving the specified
369 amount of old stuff. When we're done, 'bufbeg' points
370 to the beginning of the buffer contents, and 'buflim'
371 points just after the end. Return zero if there's an error. */
372 static int
373 fillbuf (size_t save, struct stats *stats)
375 size_t fillsize = 0;
376 int cc = 1;
377 size_t readsize;
379 /* Offset from start of unaligned buffer to start of old stuff
380 that we want to save. */
381 size_t saved_offset = buflim - ubuffer - save;
383 if (bufsalloc < save)
385 size_t aligned_save = ALIGN_TO (save, pagesize);
386 size_t maxalloc = (size_t) -1;
387 size_t newalloc;
389 if (S_ISREG (stats->stat.st_mode))
391 /* Calculate an upper bound on how much memory we should allocate.
392 We can't use ALIGN_TO here, since off_t might be longer than
393 size_t. Watch out for arithmetic overflow. */
394 off_t to_be_read = stats->stat.st_size - bufoffset;
395 size_t slop = to_be_read % pagesize;
396 off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0);
397 off_t maxalloc_off = aligned_save + aligned_to_be_read;
398 if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off)
399 maxalloc = maxalloc_off;
402 /* Grow bufsalloc until it is at least as great as `save'; but
403 if there is an overflow, just grow it to the next page boundary. */
404 while (bufsalloc < save)
405 if (bufsalloc < bufsalloc * 2)
406 bufsalloc *= 2;
407 else
409 bufsalloc = aligned_save;
410 break;
413 /* Grow the buffer size to be PREFERRED_SAVE_FACTOR times
414 bufsalloc.... */
415 newalloc = PREFERRED_SAVE_FACTOR * bufsalloc;
416 if (maxalloc < newalloc)
418 /* ... except don't grow it more than a pagesize past the
419 file size, as that might cause unnecessary memory
420 exhaustion if the file is large. */
421 newalloc = maxalloc;
422 bufsalloc = aligned_save;
425 /* Check that the above calculations made progress, which might
426 not occur if there is arithmetic overflow. If there's no
427 progress, or if the new buffer size is larger than the old
428 and buffer reallocation fails, report memory exhaustion. */
429 if (bufsalloc < save || newalloc < save
430 || (newalloc == save && newalloc != maxalloc)
431 || (bufalloc < newalloc
432 && ! (buffer
433 = page_alloc ((bufalloc = newalloc) + 1, &ubuffer))))
434 fatal (_("memory exhausted"), 0);
437 bufbeg = buffer + bufsalloc - save;
438 memmove (bufbeg, ubuffer + saved_offset, save);
439 readsize = bufalloc - bufsalloc;
441 #if defined(HAVE_MMAP)
442 if (bufmapped)
444 size_t mmapsize = readsize;
446 /* Don't mmap past the end of the file; some hosts don't allow this.
447 Use `read' on the last page. */
448 if (stats->stat.st_size - bufoffset < mmapsize)
450 mmapsize = stats->stat.st_size - bufoffset;
451 mmapsize -= mmapsize % pagesize;
454 if (mmapsize
455 && (mmap ((caddr_t) (buffer + bufsalloc), mmapsize,
456 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED,
457 bufdesc, bufoffset)
458 != (caddr_t) -1))
460 /* Do not bother to use madvise with MADV_SEQUENTIAL or
461 MADV_WILLNEED on the mmapped memory. One might think it
462 would help, but it slows us down about 30% on SunOS 4.1. */
463 fillsize = mmapsize;
465 else
467 /* Stop using mmap on this file. Synchronize the file
468 offset. Do not warn about mmap failures. On some hosts
469 (e.g. Solaris 2.5) mmap can fail merely because some
470 other process has an advisory read lock on the file.
471 There's no point alarming the user about this misfeature. */
472 bufmapped = 0;
473 if (bufoffset != initial_bufoffset
474 && lseek (bufdesc, bufoffset, SEEK_SET) < 0)
476 error ("lseek", errno);
477 cc = 0;
481 #endif /*HAVE_MMAP*/
483 if (! fillsize)
485 ssize_t bytesread;
487 #if HAVE_LIBZ > 0
488 if (Zflag)
489 bytesread = gzread (gzbufdesc, buffer + bufsalloc, readsize);
490 else
491 #endif
492 bytesread = read (bufdesc, buffer + bufsalloc, readsize);
493 while (bytesread < 0 && errno == EINTR);
494 if (bytesread < 0)
495 cc = 0;
496 else
497 fillsize = bytesread;
500 bufoffset += fillsize;
501 #if O_BINARY
502 if (fillsize)
503 fillsize = undossify_input (buffer + bufsalloc, fillsize);
504 #endif
505 buflim = buffer + bufsalloc + fillsize;
506 return cc;
509 /* Flags controlling the style of output. */
510 static enum
512 BINARY_BINARY_FILES,
513 TEXT_BINARY_FILES,
514 WITHOUT_MATCH_BINARY_FILES
515 } binary_files; /* How to handle binary files. */
516 static int out_quiet; /* Suppress all normal output. */
517 static int out_invert; /* Print nonmatching stuff. */
518 static int out_file; /* Print filenames. */
519 static int out_line; /* Print line numbers. */
520 static int out_byte; /* Print byte offsets. */
521 static int out_before; /* Lines of leading context. */
522 static int out_after; /* Lines of trailing context. */
523 static int count_matches; /* Count matching lines. */
524 static int list_files; /* List matching files. */
525 static int no_filenames; /* Suppress file names. */
526 static int suppress_errors; /* Suppress diagnostics. */
528 /* Internal variables to keep track of byte count, context, etc. */
529 static off_t totalcc; /* Total character count before bufbeg. */
530 static char *lastnl; /* Pointer after last newline counted. */
531 static char *lastout; /* Pointer after last character output;
532 NULL if no character has been output
533 or if it's conceptually before bufbeg. */
534 static off_t totalnl; /* Total newline count before lastnl. */
535 static int pending; /* Pending lines of output. */
536 static int done_on_match; /* Stop scanning file on first match */
538 #if O_BINARY
539 # include "dosbuf.c"
540 #endif
542 static void
543 nlscan (char *lim)
545 char *beg;
546 for (beg = lastnl; (beg = memchr (beg, eolbyte, lim - beg)); beg++)
547 totalnl++;
548 lastnl = lim;
551 static void
552 print_offset_sep (off_t pos, int sep)
554 /* Do not rely on printf to print pos, since off_t may be longer than long,
555 and long long is not portable. */
557 char buf[sizeof pos * CHAR_BIT];
558 char *p = buf + sizeof buf - 1;
559 *p = sep;
562 *--p = '0' + pos % 10;
563 while ((pos /= 10) != 0);
565 fwrite (p, 1, buf + sizeof buf - p, stdout);
568 static void
569 prline (char *beg, char *lim, int sep)
571 if (out_file)
572 printf ("%s%c", filename, sep & filename_mask);
573 if (out_line)
575 nlscan (beg);
576 print_offset_sep (++totalnl, sep);
577 lastnl = lim;
579 if (out_byte)
581 off_t pos = totalcc + (beg - bufbeg);
582 #if O_BINARY
583 pos = dossified_pos (pos);
584 #endif
585 print_offset_sep (pos, sep);
587 fwrite (beg, 1, lim - beg, stdout);
588 if (ferror (stdout))
589 error (_("writing output"), errno);
590 lastout = lim;
593 /* Print pending lines of trailing context prior to LIM. */
594 static void
595 prpending (char *lim)
597 char *nl;
599 if (!lastout)
600 lastout = bufbeg;
601 while (pending > 0 && lastout < lim)
603 --pending;
604 if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0)
605 ++nl;
606 else
607 nl = lim;
608 prline (lastout, nl, '-');
612 /* Print the lines between BEG and LIM. Deal with context crap.
613 If NLINESP is non-null, store a count of lines between BEG and LIM. */
614 static void
615 prtext (char *beg, char *lim, int *nlinesp)
617 static int used; /* avoid printing "--" before any output */
618 char *bp, *p, *nl;
619 char eol = eolbyte;
620 int i, n;
622 if (!out_quiet && pending > 0)
623 prpending (beg);
625 p = beg;
627 if (!out_quiet)
629 /* Deal with leading context crap. */
631 bp = lastout ? lastout : bufbeg;
632 for (i = 0; i < out_before; ++i)
633 if (p > bp)
635 --p;
636 while (p > bp && p[-1] != eol);
638 /* We only print the "--" separator if our output is
639 discontiguous from the last output in the file. */
640 if ((out_before || out_after) && used && p != lastout)
641 puts ("--");
643 while (p < beg)
645 nl = memchr (p, eol, beg - p);
646 prline (p, nl + 1, '-');
647 p = nl + 1;
651 if (nlinesp)
653 /* Caller wants a line count. */
654 for (n = 0; p < lim; ++n)
656 if ((nl = memchr (p, eol, lim - p)) != 0)
657 ++nl;
658 else
659 nl = lim;
660 if (!out_quiet)
661 prline (p, nl, ':');
662 p = nl;
664 *nlinesp = n;
666 else
667 if (!out_quiet)
668 prline (beg, lim, ':');
670 pending = out_quiet ? 0 : out_after;
671 used = 1;
674 /* Scan the specified portion of the buffer, matching lines (or
675 between matching lines if OUT_INVERT is true). Return a count of
676 lines printed. */
677 static int
678 grepbuf (char *beg, char *lim)
680 int nlines, n;
681 register char *p, *b;
682 char *endp;
683 char eol = eolbyte;
685 nlines = 0;
686 p = beg;
687 while ((b = (*execute)(p, lim - p, &endp)) != 0)
689 /* Avoid matching the empty line at the end of the buffer. */
690 if (b == lim && ((b > beg && b[-1] == eol) || b == beg))
691 break;
692 if (!out_invert)
694 prtext (b, endp, (int *) 0);
695 nlines += 1;
696 if (done_on_match)
697 return nlines;
699 else if (p < b)
701 prtext (p, b, &n);
702 nlines += n;
704 p = endp;
706 if (out_invert && p < lim)
708 prtext (p, lim, &n);
709 nlines += n;
711 return nlines;
714 /* Search a given file. Normally, return a count of lines printed;
715 but if the file is a directory and we search it recursively, then
716 return -2 if there was a match, and -1 otherwise. */
717 static int
718 grep (int fd, char const *file, struct stats *stats)
720 int nlines, i;
721 int not_text;
722 size_t residue, save;
723 char *beg, *lim;
724 char eol = eolbyte;
726 if (!reset (fd, file, stats))
727 return 0;
729 if (file && directories == RECURSE_DIRECTORIES
730 && S_ISDIR (stats->stat.st_mode))
732 /* Close fd now, so that we don't open a lot of file descriptors
733 when we recurse deeply. */
734 #if HAVE_LIBZ > 0
735 if (Zflag)
736 gzclose(gzbufdesc);
737 else
738 #endif
739 if (close (fd) != 0)
740 error (file, errno);
741 return grepdir (file, stats) - 2;
744 totalcc = 0;
745 lastout = 0;
746 totalnl = 0;
747 pending = 0;
749 nlines = 0;
750 residue = 0;
751 save = 0;
753 if (! fillbuf (save, stats))
755 if (! (is_EISDIR (errno, file) && suppress_errors))
756 error (filename, errno);
757 return 0;
760 not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet)
761 || binary_files == WITHOUT_MATCH_BINARY_FILES)
762 && memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg));
763 if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES)
764 return 0;
765 done_on_match += not_text;
766 out_quiet += not_text;
768 for (;;)
770 lastnl = bufbeg;
771 if (lastout)
772 lastout = bufbeg;
773 if (buflim - bufbeg == save)
774 break;
775 beg = bufbeg + save - residue;
776 for (lim = buflim; lim > beg && lim[-1] != eol; --lim)
778 residue = buflim - lim;
779 if (beg < lim)
781 nlines += grepbuf (beg, lim);
782 if (pending)
783 prpending (lim);
784 if (nlines && done_on_match && !out_invert)
785 goto finish_grep;
787 i = 0;
788 beg = lim;
789 while (i < out_before && beg > bufbeg && beg != lastout)
791 ++i;
793 --beg;
794 while (beg > bufbeg && beg[-1] != eol);
796 if (beg != lastout)
797 lastout = 0;
798 save = residue + lim - beg;
799 totalcc += buflim - bufbeg - save;
800 if (out_line)
801 nlscan (beg);
802 if (! fillbuf (save, stats))
804 if (! (is_EISDIR (errno, file) && suppress_errors))
805 error (filename, errno);
806 goto finish_grep;
809 if (residue)
811 *buflim++ = eol;
812 nlines += grepbuf (bufbeg + save - residue, buflim);
813 if (pending)
814 prpending (buflim);
817 finish_grep:
818 done_on_match -= not_text;
819 out_quiet -= not_text;
820 if ((not_text & ~out_quiet) && nlines != 0)
821 printf (_("Binary file %s matches\n"), filename);
822 return nlines;
825 static int
826 grepfile (char const *file, struct stats *stats)
828 int desc;
829 int count;
830 int status;
832 if (! file)
834 desc = 0;
835 filename = _("(standard input)");
837 else
839 if (only_files)
841 if (stat(file, &stats->stat) != 0)
842 return 1;
843 if (S_ISDIR(stats->stat.st_mode))
845 if (directories != RECURSE_DIRECTORIES)
846 return 1;
847 if (lstat(file, &stats->stat) != 0)
848 return 1;
849 if (!S_ISDIR(stats->stat.st_mode))
850 return 1;
852 else if (!S_ISREG(stats->stat.st_mode))
853 return 1;
855 while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
856 continue;
858 if (desc < 0)
860 int e = errno;
862 if (is_EISDIR (e, file) && directories == RECURSE_DIRECTORIES)
864 return grepdir (file, stats);
867 if (!suppress_errors)
869 if (directories == SKIP_DIRECTORIES)
870 switch (e)
872 #ifdef EISDIR
873 case EISDIR:
874 return 1;
875 #endif
876 case EACCES:
877 /* When skipping directories, don't worry about
878 directories that can't be opened. */
879 if (stat (file, &stats->stat) == 0
880 && S_ISDIR (stats->stat.st_mode))
881 return 1;
882 break;
885 error (file, e);
888 return 1;
891 filename = file;
894 #if O_BINARY
895 /* Set input to binary mode. Pipes are simulated with files
896 on DOS, so this includes the case of "foo | grep bar". */
897 if (!isatty (desc))
898 SET_BINARY (desc);
899 #endif
901 count = grep (desc, file, stats);
902 if (count < 0)
903 status = count + 2;
904 else
906 if (count_matches)
908 if (out_file)
909 printf ("%s%c", filename, ':' & filename_mask);
910 printf ("%d\n", count);
913 status = !count;
914 if (list_files == 1 - 2 * status)
915 printf ("%s%c", filename, '\n' & filename_mask);
917 #if HAVE_LIBZ > 0
918 if (Zflag)
919 gzclose(gzbufdesc);
920 else
921 #endif
922 if (file)
923 while (close (desc) != 0)
924 if (errno != EINTR)
926 error (file, errno);
927 break;
931 return status;
934 static int
935 grepdir (char const *dir, struct stats *stats)
937 int status = 1;
938 struct stats *ancestor;
939 char *name_space;
941 for (ancestor = stats; (ancestor = ancestor->parent) != 0; )
942 if (ancestor->stat.st_ino == stats->stat.st_ino
943 && ancestor->stat.st_dev == stats->stat.st_dev)
945 if (!suppress_errors)
946 fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir,
947 _("recursive directory loop"));
948 return 1;
951 name_space = savedir (dir, (unsigned) stats->stat.st_size);
953 if (! name_space)
955 if (errno)
957 if (!suppress_errors)
958 error (dir, errno);
960 else
961 fatal (_("Memory exhausted"), 0);
963 else
965 size_t dirlen = strlen (dir);
966 int needs_slash = ! (dirlen == FILESYSTEM_PREFIX_LEN (dir)
967 || IS_SLASH (dir[dirlen - 1]));
968 char *file = NULL;
969 char *namep = name_space;
970 struct stats child;
971 child.parent = stats;
972 out_file += !no_filenames;
973 while (*namep)
975 size_t namelen = strlen (namep);
976 file = xrealloc (file, dirlen + 1 + namelen + 1);
977 strcpy (file, dir);
978 file[dirlen] = '/';
979 strcpy (file + dirlen + needs_slash, namep);
980 namep += namelen + 1;
981 status &= grepfile (file, &child);
983 out_file -= !no_filenames;
984 if (file)
985 free (file);
986 free (name_space);
989 return status;
992 static void
993 usage (int status)
995 if (status != 0)
997 fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), prog);
998 fprintf (stderr, _("Try `%s --help' for more information.\n"), prog);
1000 else
1002 printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog);
1003 printf (_("\
1004 Search for PATTERN in each FILE or standard input.\n\
1005 Example: %s -i 'hello world' menu.h main.c\n\
1007 Regexp selection and interpretation:\n"), prog);
1008 printf (_("\
1009 -E, --extended-regexp PATTERN is an extended regular expression\n\
1010 -F, --fixed-strings PATTERN is a set of newline-separated strings\n\
1011 -G, --basic-regexp PATTERN is a basic regular expression\n"));
1012 printf (_("\
1013 -e, --regexp=PATTERN use PATTERN as a regular expression\n\
1014 -f, --file=FILE obtain PATTERN from FILE\n\
1015 -i, --ignore-case ignore case distinctions\n\
1016 -w, --word-regexp force PATTERN to match only whole words\n\
1017 -x, --line-regexp force PATTERN to match only whole lines\n\
1018 -z, --null-data a data line ends in 0 byte, not newline\n"));
1019 printf (_("\
1021 Miscellaneous:\n\
1022 -s, --no-messages suppress error messages\n\
1023 -v, --invert-match select non-matching lines\n\
1024 -V, --version print version information and exit\n\
1025 --help display this help and exit\n\
1026 -Z, --decompress decompress input before searching (HAVE_LIBZ=1)\n\
1027 --mmap use memory-mapped input if possible\n"));
1028 printf (_("\
1030 Output control:\n\
1031 -b, --byte-offset print the byte offset with output lines\n\
1032 -n, --line-number print line number with output lines\n\
1033 -H, --with-filename print the filename for each match\n\
1034 -h, --no-filename suppress the prefixing filename on output\n\
1035 -q, --quiet, --silent suppress all normal output\n\
1036 --binary-files=TYPE assume that binary files are TYPE\n\
1037 TYPE is 'binary', 'text', or 'without-match'.\n\
1038 -a, --text equivalent to --binary-files=text\n\
1039 -I equivalent to --binary-files=without-match\n\
1040 -d, --directories=ACTION how to handle directories\n\
1041 ACTION is 'read', 'recurse', or 'skip'.\n\
1042 -r, --recursive equivalent to --directories=recurse.\n\
1043 -O, --only-files Ignore special files, except symlinks.\n\
1044 When recursing into directories, ignore\n\
1045 symlinked directories as well.\n\
1046 -L, --files-without-match only print FILE names containing no match\n\
1047 -l, --files-with-matches only print FILE names containing matches\n\
1048 -c, --count only print a count of matching lines per FILE\n\
1049 --null print 0 byte after FILE name\n"));
1050 printf (_("\
1052 Context control:\n\
1053 -B, --before-context=NUM print NUM lines of leading context\n\
1054 -A, --after-context=NUM print NUM lines of trailing context\n\
1055 -C, --context[=NUM] print NUM (default 2) lines of output context\n\
1056 unless overridden by -A or -B\n\
1057 -NUM same as --context=NUM\n\
1058 -U, --binary do not strip CR characters at EOL (MSDOS)\n\
1059 -u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\
1061 `egrep' means `grep -E'. `fgrep' means `grep -F'.\n\
1062 With no FILE, or when FILE is -, read standard input. If less than\n\
1063 two FILEs given, assume -h. Exit status is 0 if match, 1 if no match,\n\
1064 and 2 if trouble.\n"));
1065 printf (_("\nReport bugs to <bug-gnu-utils@gnu.org>.\n"));
1067 exit (status);
1070 /* Set the matcher to M, reporting any conflicts. */
1071 static void
1072 setmatcher (char const *m)
1074 if (matcher && strcmp (matcher, m) != 0)
1075 fatal (_("conflicting matchers specified"), 0);
1076 matcher = m;
1079 /* Go through the matchers vector and look for the specified matcher.
1080 If we find it, install it in compile and execute, and return 1. */
1081 static int
1082 install_matcher (char const *name)
1084 int i;
1085 #ifdef HAVE_SETRLIMIT
1086 struct rlimit rlim;
1087 #endif
1089 for (i = 0; matchers[i].name; ++i)
1090 if (strcmp (name, matchers[i].name) == 0)
1092 compile = matchers[i].compile;
1093 execute = matchers[i].execute;
1094 #if HAVE_SETRLIMIT && defined(RLIMIT_STACK)
1095 /* I think every platform needs to do this, so that regex.c
1096 doesn't oveflow the stack. The default value of
1097 `re_max_failures' is too large for some platforms: it needs
1098 more than 3MB-large stack.
1100 The test for HAVE_SETRLIMIT should go into `configure'. */
1101 if (!getrlimit (RLIMIT_STACK, &rlim))
1103 long newlim;
1104 extern long int re_max_failures; /* from regex.c */
1106 /* Approximate the amount regex.c needs, plus some more. */
1107 newlim = re_max_failures * 2 * 20 * sizeof (char *);
1108 if (newlim > rlim.rlim_max)
1110 newlim = rlim.rlim_max;
1111 re_max_failures = newlim / (2 * 20 * sizeof (char *));
1113 if (rlim.rlim_cur < newlim)
1114 rlim.rlim_cur = newlim;
1116 setrlimit (RLIMIT_STACK, &rlim);
1118 #endif
1119 return 1;
1121 return 0;
1124 /* Find the white-space-separated options specified by OPTIONS, and
1125 using BUF to store copies of these options, set ARGV[0], ARGV[1],
1126 etc. to the option copies. Return the number N of options found.
1127 Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
1128 etc. Backslash can be used to escape whitespace (and backslashes). */
1129 static int
1130 prepend_args (char const *options, char *buf, char **argv)
1132 char const *o = options;
1133 char *b = buf;
1134 int n = 0;
1136 for (;;)
1138 while (ISSPACE ((unsigned char) *o))
1139 o++;
1140 if (!*o)
1141 return n;
1142 if (argv)
1143 argv[n] = b;
1144 n++;
1147 if ((*b++ = *o++) == '\\' && *o)
1148 b[-1] = *o++;
1149 while (*o && ! ISSPACE ((unsigned char) *o));
1151 *b++ = '\0';
1155 /* Prepend the whitespace-separated options in OPTIONS to the argument
1156 vector of a main program with argument count *PARGC and argument
1157 vector *PARGV. */
1158 static void
1159 prepend_default_options (char const *options, int *pargc, char ***pargv)
1161 if (options)
1163 char *buf = xmalloc (strlen (options) + 1);
1164 int prepended = prepend_args (options, buf, (char **) NULL);
1165 int argc = *pargc;
1166 char * const *argv = *pargv;
1167 char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp);
1168 *pargc = prepended + argc;
1169 *pargv = pp;
1170 *pp++ = *argv++;
1171 pp += prepend_args (options, buf, pp);
1172 while ((*pp++ = *argv++))
1173 continue;
1178 main (int argc, char **argv)
1180 char *keys;
1181 size_t keycc, oldcc, keyalloc;
1182 int with_filenames;
1183 int opt, cc, status;
1184 int default_context;
1185 unsigned digit_args_val;
1186 FILE *fp;
1187 extern char *optarg;
1188 extern int optind;
1190 initialize_main (&argc, &argv);
1191 prog = argv[0];
1192 if (prog && strrchr (prog, '/'))
1193 prog = strrchr (prog, '/') + 1;
1195 #if HAVE_LIBZ > 0
1196 if (prog[0] == 'z') {
1197 Zflag = 1;
1198 ++prog;
1200 #endif
1202 #if defined(__MSDOS__) || defined(_WIN32)
1203 /* DOS and MS-Windows use backslashes as directory separators, and usually
1204 have an .exe suffix. They also have case-insensitive filesystems. */
1205 if (prog)
1207 char *p = prog;
1208 char *bslash = strrchr (argv[0], '\\');
1210 if (bslash && bslash >= prog) /* for mixed forward/backslash case */
1211 prog = bslash + 1;
1212 else if (prog == argv[0]
1213 && argv[0][0] && argv[0][1] == ':') /* "c:progname" */
1214 prog = argv[0] + 2;
1216 /* Collapse the letter-case, so `strcmp' could be used hence. */
1217 for ( ; *p; p++)
1218 if (*p >= 'A' && *p <= 'Z')
1219 *p += 'a' - 'A';
1221 /* Remove the .exe extension, if any. */
1222 if ((p = strrchr (prog, '.')) && strcmp (p, ".exe") == 0)
1223 *p = '\0';
1225 #endif
1227 keys = NULL;
1228 keycc = 0;
1229 with_filenames = 0;
1230 eolbyte = '\n';
1231 filename_mask = ~0;
1233 /* The value -1 means to use DEFAULT_CONTEXT. */
1234 out_after = out_before = -1;
1235 /* Default before/after context: chaged by -C/-NUM options */
1236 default_context = 0;
1237 /* Accumulated value of individual digits in a -NUM option */
1238 digit_args_val = 0;
1241 /* Internationalization. */
1242 #if HAVE_SETLOCALE
1243 setlocale (LC_ALL, "");
1244 #endif
1245 #if ENABLE_NLS
1246 bindtextdomain (PACKAGE, LOCALEDIR);
1247 textdomain (PACKAGE);
1248 #endif
1250 prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
1252 while ((opt = getopt_long (argc, argv, short_options, long_options, NULL))
1253 != -1)
1254 switch (opt)
1256 case '0':
1257 case '1':
1258 case '2':
1259 case '3':
1260 case '4':
1261 case '5':
1262 case '6':
1263 case '7':
1264 case '8':
1265 case '9':
1266 digit_args_val = 10 * digit_args_val + opt - '0';
1267 default_context = digit_args_val;
1268 break;
1269 case 'A':
1270 if (optarg)
1272 if (ck_atoi (optarg, &out_after))
1273 fatal (_("invalid context length argument"), 0);
1275 break;
1276 case 'B':
1277 if (optarg)
1279 if (ck_atoi (optarg, &out_before))
1280 fatal (_("invalid context length argument"), 0);
1282 break;
1283 case 'C':
1284 /* Set output match context, but let any explicit leading or
1285 trailing amount specified with -A or -B stand. */
1286 if (optarg)
1288 if (ck_atoi (optarg, &default_context))
1289 fatal (_("invalid context length argument"), 0);
1291 else
1292 default_context = 2;
1293 break;
1294 case 'E':
1295 setmatcher ("egrep");
1296 break;
1297 case 'F':
1298 setmatcher ("fgrep");
1299 break;
1300 case 'G':
1301 setmatcher ("grep");
1302 break;
1303 case 'H':
1304 with_filenames = 1;
1305 break;
1306 case 'I':
1307 binary_files = WITHOUT_MATCH_BINARY_FILES;
1308 break;
1309 case 'O':
1310 only_files = 1;
1311 break;
1312 case 'U':
1313 #if O_BINARY
1314 dos_use_file_type = DOS_BINARY;
1315 #endif
1316 break;
1317 case 'u':
1318 #if O_BINARY
1319 dos_report_unix_offset = 1;
1320 #endif
1321 break;
1322 case 'V':
1323 show_version = 1;
1324 break;
1325 case 'X':
1326 setmatcher (optarg);
1327 break;
1328 case 'a':
1329 binary_files = TEXT_BINARY_FILES;
1330 break;
1331 case 'b':
1332 out_byte = 1;
1333 break;
1334 case 'c':
1335 out_quiet = 1;
1336 count_matches = 1;
1337 break;
1338 case 'd':
1339 if (strcmp (optarg, "read") == 0)
1340 directories = READ_DIRECTORIES;
1341 else if (strcmp (optarg, "skip") == 0)
1342 directories = SKIP_DIRECTORIES;
1343 else if (strcmp (optarg, "recurse") == 0)
1344 directories = RECURSE_DIRECTORIES;
1345 else
1346 fatal (_("unknown directories method"), 0);
1347 break;
1348 case 'e':
1349 cc = strlen (optarg);
1350 keys = xrealloc (keys, keycc + cc + 1);
1351 strcpy (&keys[keycc], optarg);
1352 keycc += cc;
1353 keys[keycc++] = '\n';
1354 break;
1355 case 'f':
1356 fp = strcmp (optarg, "-") != 0 ? fopen (optarg, "r") : stdin;
1357 if (!fp)
1358 fatal (optarg, errno);
1359 for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
1361 keys = xrealloc (keys, keyalloc);
1362 oldcc = keycc;
1363 while (!feof (fp)
1364 && (cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) > 0)
1366 keycc += cc;
1367 if (keycc == keyalloc - 1)
1368 keys = xrealloc (keys, keyalloc *= 2);
1370 if (fp != stdin)
1371 fclose(fp);
1372 /* Append final newline if file ended in non-newline. */
1373 if (oldcc != keycc && keys[keycc - 1] != '\n')
1374 keys[keycc++] = '\n';
1375 break;
1376 case 'h':
1377 no_filenames = 1;
1378 break;
1379 case 'i':
1380 case 'y': /* For old-timers . . . */
1381 match_icase = 1;
1382 break;
1383 case 'L':
1384 /* Like -l, except list files that don't contain matches.
1385 Inspired by the same option in Hume's gre. */
1386 out_quiet = 1;
1387 list_files = -1;
1388 done_on_match = 1;
1389 break;
1390 case 'l':
1391 out_quiet = 1;
1392 list_files = 1;
1393 done_on_match = 1;
1394 break;
1395 case 'n':
1396 out_line = 1;
1397 break;
1398 case 'q':
1399 done_on_match = 1;
1400 out_quiet = 1;
1401 break;
1402 case 'R':
1403 case 'r':
1404 directories = RECURSE_DIRECTORIES;
1405 break;
1406 case 's':
1407 suppress_errors = 1;
1408 break;
1409 case 'v':
1410 out_invert = 1;
1411 break;
1412 case 'w':
1413 match_words = 1;
1414 break;
1415 case 'x':
1416 match_lines = 1;
1417 break;
1418 case 'Z':
1419 #if HAVE_LIBZ > 0
1420 Zflag = 1;
1421 #else
1422 filename_mask = 0;
1423 #endif
1424 break;
1425 case 'z':
1426 eolbyte = '\0';
1427 break;
1428 case BINARY_FILES_OPTION:
1429 if (strcmp (optarg, "binary") == 0)
1430 binary_files = BINARY_BINARY_FILES;
1431 else if (strcmp (optarg, "text") == 0)
1432 binary_files = TEXT_BINARY_FILES;
1433 else if (strcmp (optarg, "without-match") == 0)
1434 binary_files = WITHOUT_MATCH_BINARY_FILES;
1435 else
1436 fatal (_("unknown binary-files type"), 0);
1437 break;
1438 case 0:
1439 /* long options */
1440 break;
1441 default:
1442 usage (2);
1443 break;
1446 if (out_after < 0)
1447 out_after = default_context;
1448 if (out_before < 0)
1449 out_before = default_context;
1451 if (! matcher)
1452 matcher = prog;
1454 if (show_version)
1456 printf (_("%s (GNU grep) %s\n"), matcher, VERSION);
1457 printf ("\n");
1458 printf (_("\
1459 Copyright (C) 1988, 1992-1998, 1999 Free Software Foundation, Inc.\n"));
1460 printf (_("\
1461 This is free software; see the source for copying conditions. There is NO\n\
1462 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"));
1463 printf ("\n");
1464 exit (0);
1467 if (show_help)
1468 usage (0);
1470 if (keys)
1472 if (keycc == 0)
1473 /* No keys were specified (e.g. -f /dev/null). Match nothing. */
1474 out_invert ^= 1;
1475 else
1476 /* Strip trailing newline. */
1477 --keycc;
1479 else
1480 if (optind < argc)
1482 keys = argv[optind++];
1483 keycc = strlen (keys);
1485 else
1486 usage (2);
1488 if (!install_matcher (matcher) && !install_matcher ("default"))
1489 abort ();
1491 (*compile)(keys, keycc);
1493 if ((argc - optind > 1 && !no_filenames) || with_filenames)
1494 out_file = 1;
1496 #if O_BINARY
1497 /* Output is set to binary mode because we shouldn't convert
1498 NL to CR-LF pairs, especially when grepping binary files. */
1499 if (!isatty (1))
1500 SET_BINARY (1);
1501 #endif
1504 if (optind < argc)
1506 status = 1;
1509 char *file = argv[optind];
1510 status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file,
1511 &stats_base);
1513 while ( ++optind < argc);
1515 else
1516 status = grepfile ((char *) NULL, &stats_base);
1518 if (fclose (stdout) == EOF)
1519 error (_("writing output"), errno);
1521 exit (errseen ? 2 : status);