1 /* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */
3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org>
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 #include <sys/types.h>
50 #ifndef WITHOUT_FASTMATCH
51 #include "fastmatch.h"
54 #include "freebsd-compat.h"
56 static bool first_match
= true;
59 * Parsing context; used to hold things like matches made and
63 regmatch_t matches
[MAX_MATCHES
]; /* Matches made */
64 struct str ln
; /* Current line */
65 size_t lnstart
; /* Position in line */
66 size_t matchidx
; /* Latest match index */
67 int printed
; /* Metadata printed? */
68 bool binary
; /* Binary file? */
71 #ifdef WITH_INTERNAL_NOSPEC
72 static int litexec(const struct pat
*pat
, const char *string
,
73 size_t nmatch
, regmatch_t pmatch
[]);
75 static int procline(struct parsec
*pc
);
76 static void printline(struct parsec
*pc
, int sep
);
77 static void printline_metadata(struct str
*line
, int sep
);
80 file_matching(const char *fname
)
82 char *fname_base
, *fname_buf
;
85 ret
= finclude
? false : true;
86 fname_buf
= strdup(fname
);
87 if (fname_buf
== NULL
)
89 fname_base
= basename(fname_buf
);
91 for (unsigned int i
= 0; i
< fpatterns
; ++i
) {
92 if (fnmatch(fpattern
[i
].pat
, fname
, 0) == 0 ||
93 fnmatch(fpattern
[i
].pat
, fname_base
, 0) == 0) {
94 if (fpattern
[i
].mode
== EXCL_PAT
) {
106 dir_matching(const char *dname
)
110 ret
= dinclude
? false : true;
112 for (unsigned int i
= 0; i
< dpatterns
; ++i
) {
114 fnmatch(dpattern
[i
].pat
, dname
, 0) == 0) {
115 if (dpattern
[i
].mode
== EXCL_PAT
)
125 * Processes a directory when a recursive search is performed with
126 * the -R option. Each appropriate file is passed to procfile().
129 grep_tree(char **argv
)
135 const char *wd
[] = { ".", NULL
};
141 fts_flags
= FTS_COMFOLLOW
;
144 fts_flags
= FTS_PHYSICAL
;
147 fts_flags
= FTS_LOGICAL
;
151 fts_flags
|= FTS_NOSTAT
| FTS_NOCHDIR
;
153 fts
= fts_open((argv
[0] == NULL
) ?
154 __DECONST(char * const *, wd
) : argv
, fts_flags
, NULL
);
157 while ((p
= fts_read(fts
)) != NULL
) {
158 switch (p
->fts_info
) {
164 warnx("%s: %s", p
->fts_path
, strerror(p
->fts_errno
));
169 if (dexclude
|| dinclude
)
170 if (!dir_matching(p
->fts_name
) ||
171 !dir_matching(p
->fts_path
))
172 fts_set(fts
, p
, FTS_SKIP
);
175 /* Print a warning for recursive directory loop */
176 warnx("warning: %s: recursive directory loop",
180 /* Check for file exclusion/inclusion */
182 if (fexclude
|| finclude
)
183 ok
&= file_matching(p
->fts_path
);
186 c
+= procfile(p
->fts_path
);
196 * Opens a file and processes it. Each file is processed line-by-line
197 * passing the lines to procline().
200 procfile(const char *fn
)
208 int c
, last_outed
, t
;
209 bool doctx
, printmatch
, same_file
;
211 if (strcmp(fn
, "-") == 0) {
212 fn
= label
!= NULL
? label
: getstr(1);
215 if (!stat(fn
, &sb
)) {
216 /* Check if we need to process the file */
217 s
= sb
.st_mode
& S_IFMT
;
218 if (s
== S_IFDIR
&& dirbehave
== DIR_SKIP
)
220 if ((s
== S_IFIFO
|| s
== S_IFCHR
|| s
== S_IFBLK
221 || s
== S_IFSOCK
) && devbehave
== DEV_SKIP
)
235 pc
.ln
.file
= grep_malloc(strlen(fn
) + 1);
236 strcpy(pc
.ln
.file
, fn
);
241 pc
.binary
= f
->binary
;
248 if ((pc
.binary
&& binbehave
== BINFILE_BIN
) || cflag
|| qflag
||
251 if (printmatch
&& (Aflag
!= 0 || Bflag
!= 0))
255 for (c
= 0; c
== 0 || !(lflag
|| qflag
); ) {
256 /* Reset per-line statistics */
261 pc
.ln
.off
+= pc
.ln
.len
+ 1;
262 if ((pc
.ln
.dat
= grep_fgetln(f
, &pc
.ln
.len
)) == NULL
||
266 if (pc
.ln
.len
> 0 && pc
.ln
.dat
[pc
.ln
.len
- 1] == fileeol
)
270 /* Return if we need to skip a binary file */
271 if (pc
.binary
&& binbehave
== BINFILE_SKIP
) {
278 if ((t
= procline(&pc
)) == 0)
281 /* Deal with any -B context or context separators */
282 if (t
== 0 && doctx
) {
283 if (!first_match
&& (!same_file
|| last_outed
> 0))
289 /* Print the matching line, but only if not quiet/binary */
290 if (t
== 0 && printmatch
) {
292 while (pc
.matchidx
>= MAX_MATCHES
) {
293 /* Reset matchidx and try again */
295 if (procline(&pc
) == 0)
304 if (t
!= 0 && doctx
) {
305 /* Deal with any -A context */
307 grep_printline(&pc
.ln
, '-');
313 * Enqueue non-matching lines for -B context.
314 * If we're not actually doing -B context or if
315 * the enqueue resulted in a line being rotated
316 * out, then go ahead and increment last_outed
317 * to signify a gap between context/match.
319 if (Bflag
== 0 || (Bflag
> 0 && enqueue(ln
)))
324 /* Count the matches if we have a match limit */
325 if (t
== 0 && mflag
) {
327 if (mflag
&& mcount
<= 0)
338 printf("%s:", pc
.ln
.file
);
341 if (lflag
&& !qflag
&& c
!= 0)
342 printf("%s%c", fn
, nullflag
? 0 : '\n');
343 if (Lflag
&& !qflag
&& c
== 0)
344 printf("%s%c", fn
, nullflag
? 0 : '\n');
345 if (c
&& !cflag
&& !lflag
&& !Lflag
&&
346 binbehave
== BINFILE_BIN
&& f
->binary
&& !qflag
)
347 printf(getstr(8), fn
);
354 #ifdef WITH_INTERNAL_NOSPEC
356 * Internal implementation of literal string search within a string, modeled
357 * after regexec(3), for use when the regex(3) implementation doesn't offer
358 * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD
359 * config, but in other scenarios such as building against libgnuregex or on
360 * some non-FreeBSD OSes.
363 litexec(const struct pat
*pat
, const char *string
, size_t nmatch
,
366 char *(*strstr_fn
)(const char *, const char *);
369 size_t idx
, n
, ofs
, stringlen
;
371 if (cflags
& REG_ICASE
)
372 strstr_fn
= strcasestr
;
376 ofs
= pmatch
[0].rm_so
;
377 stringlen
= pmatch
[0].rm_eo
;
378 if (ofs
>= stringlen
)
379 return (REG_NOMATCH
);
380 subject
= strndup(string
, stringlen
);
383 for (n
= 0; ofs
< stringlen
;) {
384 search
= (subject
+ ofs
);
385 if ((unsigned long)pat
->len
> strlen(search
))
387 sub
= strstr_fn(search
, pat
->pat
);
389 * Ignoring the empty string possibility due to context: grep optimizes
390 * for empty patterns and will never reach this point.
395 /* Fill in pmatch if necessary */
397 pmatch
[idx
].rm_so
= ofs
+ (sub
- search
);
398 pmatch
[idx
].rm_eo
= pmatch
[idx
].rm_so
+ pat
->len
;
401 ofs
= pmatch
[idx
].rm_so
+ 1;
403 /* We only needed to know if we match or not */
407 if (n
> 0 && nmatch
> 0)
408 for (n
= idx
; n
< nmatch
; ++n
)
409 pmatch
[n
].rm_so
= pmatch
[n
].rm_eo
= -1;
411 return (n
> 0 ? 0 : REG_NOMATCH
);
413 #endif /* WITH_INTERNAL_NOSPEC */
415 #define iswword(x) (iswalnum((x)) || (x) == L'_')
418 * Processes a line comparing it with the specified patterns. Each pattern
419 * is looped to be compared along with the full string, saving each and every
420 * match, which is necessary to colorize the output and to count the
421 * matches. The matching lines are passed to printline() to display the
422 * appropriate output.
425 procline(struct parsec
*pc
)
427 regmatch_t pmatch
, lastmatch
, chkmatch
;
428 wchar_t wbegin
, wend
;
431 int c
= 0, r
= 0, lastmatches
= 0, leflags
= eflags
;
432 size_t startm
= 0, matchidx
;
435 matchidx
= pc
->matchidx
;
437 /* Special case: empty pattern with -w flag, check first character */
438 if (matchall
&& wflag
) {
442 if (sscanf(&pc
->ln
.dat
[0], "%lc", &wend
) != 1 || iswword(wend
))
451 /* Initialize to avoid a false positive warning from GCC. */
452 lastmatch
.rm_so
= lastmatch
.rm_eo
= 0;
454 /* Loop to process the whole line */
455 while (st
<= pc
->ln
.len
) {
459 if (st
> 0 && pc
->ln
.dat
[st
- 1] != fileeol
)
460 leflags
|= REG_NOTBOL
;
461 /* Loop to compare with all the patterns */
462 for (i
= 0; i
< patterns
; i
++) {
464 pmatch
.rm_eo
= pc
->ln
.len
;
465 #ifdef WITH_INTERNAL_NOSPEC
466 if (grepbehave
== GREP_FIXED
)
467 r
= litexec(&pattern
[i
], pc
->ln
.dat
, 1, &pmatch
);
470 #ifndef WITHOUT_FASTMATCH
471 if (fg_pattern
[i
].pattern
)
472 r
= fastexec(&fg_pattern
[i
],
473 pc
->ln
.dat
, 1, &pmatch
, leflags
);
476 r
= regexec(&r_pattern
[i
], pc
->ln
.dat
, 1,
480 /* Check for full match */
481 if (xflag
&& (pmatch
.rm_so
!= 0 ||
482 (size_t)pmatch
.rm_eo
!= pc
->ln
.len
))
484 /* Check for whole word match */
485 #ifndef WITHOUT_FASTMATCH
486 if (wflag
|| fg_pattern
[i
].word
) {
490 wbegin
= wend
= L
' ';
491 if (pmatch
.rm_so
!= 0 &&
492 sscanf(&pc
->ln
.dat
[pmatch
.rm_so
- 1],
493 "%lc", &wbegin
) != 1)
495 else if ((size_t)pmatch
.rm_eo
!=
497 sscanf(&pc
->ln
.dat
[pmatch
.rm_eo
],
500 else if (iswword(wbegin
) ||
504 * If we're doing whole word matching and we
505 * matched once, then we should try the pattern
506 * again after advancing just past the start of
507 * the earliest match. This allows the pattern
508 * to match later on in the line and possibly
509 * still match a whole word.
511 if (r
== REG_NOMATCH
&&
512 (retry
== pc
->lnstart
||
513 (unsigned int)pmatch
.rm_so
+ 1 < retry
))
514 retry
= pmatch
.rm_so
+ 1;
515 if (r
== REG_NOMATCH
)
525 * Replace previous match if the new one is earlier
526 * and/or longer. This will lead to some amount of
527 * extra work if -o/--color are specified, but it's
528 * worth it from a correctness point of view.
530 if (matchidx
> startm
) {
531 chkmatch
= pc
->matches
[matchidx
- 1];
532 if (pmatch
.rm_so
< chkmatch
.rm_so
||
533 (pmatch
.rm_so
== chkmatch
.rm_so
&&
534 (pmatch
.rm_eo
- pmatch
.rm_so
) >
535 (chkmatch
.rm_eo
- chkmatch
.rm_so
))) {
536 pc
->matches
[matchidx
- 1] = pmatch
;
540 /* Advance as normal if not */
541 pc
->matches
[matchidx
++] = pmatch
;
544 /* avoid excessive matching - skip further patterns */
545 if ((color
== NULL
&& !oflag
) || qflag
|| lflag
||
546 matchidx
>= MAX_MATCHES
) {
554 * Advance to just past the start of the earliest match, try
555 * again just in case we still have a chance to match later in
558 if (lastmatches
== 0 && retry
> pc
->lnstart
) {
563 /* One pass if we are not recording matches */
564 if (!wflag
&& ((color
== NULL
&& !oflag
) || qflag
|| lflag
|| Lflag
))
567 /* If we didn't have any matches or REG_NOSUB set */
568 if (lastmatches
== 0 || (cflags
& REG_NOSUB
))
571 if (lastmatches
== 0)
574 else if (st
== nst
&& lastmatch
.rm_so
== lastmatch
.rm_eo
)
575 /* Zero-length match -- advance one more so we don't get stuck */
578 /* Advance st based on previous matches */
583 /* Reflect the new matchidx in the context */
584 pc
->matchidx
= matchidx
;
591 * Safe malloc() for internal use.
594 grep_malloc(size_t size
)
598 if ((ptr
= malloc(size
)) == NULL
)
604 * Safe calloc() for internal use.
607 grep_calloc(size_t nmemb
, size_t size
)
611 if ((ptr
= calloc(nmemb
, size
)) == NULL
)
617 * Safe realloc() for internal use.
620 grep_realloc(void *ptr
, size_t size
)
623 if ((ptr
= realloc(ptr
, size
)) == NULL
)
629 * Safe strdup() for internal use.
632 grep_strdup(const char *str
)
636 if ((ret
= strdup(str
)) == NULL
)
642 * Print an entire line as-is, there are no inline matches to consider. This is
643 * used for printing context.
645 void grep_printline(struct str
*line
, int sep
) {
646 printline_metadata(line
, sep
);
647 fwrite(line
->dat
, line
->len
, 1, stdout
);
652 printline_metadata(struct str
*line
, int sep
)
659 fputs(line
->file
, stdout
);
662 printf("%s", line
->file
);
669 printf("%d", line
->line_no
);
675 printf("%lld", (long long)(line
->off
+ line
->boff
));
683 * Prints a matching line according to the command line options.
686 printline(struct parsec
*pc
, int sep
)
692 /* If matchall, everything matches but don't actually print for -o */
693 if (oflag
&& matchall
)
696 matchidx
= pc
->matchidx
;
699 if ((oflag
|| color
) && matchidx
> 0) {
700 /* Only print metadata once per line if --color */
701 if (!oflag
&& pc
->printed
== 0)
702 printline_metadata(&pc
->ln
, sep
);
703 for (i
= 0; i
< matchidx
; i
++) {
704 match
= pc
->matches
[i
];
705 /* Don't output zero length matches */
706 if (match
.rm_so
== match
.rm_eo
)
709 * Metadata is printed on a per-line basis, so every
710 * match gets file metadata with the -o flag.
713 pc
->ln
.boff
= match
.rm_so
;
714 printline_metadata(&pc
->ln
, sep
);
716 fwrite(pc
->ln
.dat
+ a
, match
.rm_so
- a
, 1,
719 fprintf(stdout
, "\33[%sm\33[K", color
);
720 fwrite(pc
->ln
.dat
+ match
.rm_so
,
721 match
.rm_eo
- match
.rm_so
, 1, stdout
);
723 fprintf(stdout
, "\33[m\33[K");
729 if (pc
->ln
.len
- a
> 0)
730 fwrite(pc
->ln
.dat
+ a
, pc
->ln
.len
- a
, 1,
735 grep_printline(&pc
->ln
, sep
);