1 /* exclude.c -- exclude file names
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2020 Free Software
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
46 # include "unlocked-io.h"
49 /* Non-GNU systems lack these options, so we don't need to check them. */
51 # define FNM_CASEFOLD 0
54 # define FNM_EXTMATCH 0
56 #ifndef FNM_LEADING_DIR
57 # define FNM_LEADING_DIR 0
60 verify (((EXCLUDE_ANCHORED
| EXCLUDE_INCLUDE
| EXCLUDE_WILDCARDS
)
61 & (FNM_PATHNAME
| FNM_NOESCAPE
| FNM_PERIOD
| FNM_LEADING_DIR
62 | FNM_CASEFOLD
| FNM_EXTMATCH
))
66 /* Exclusion patterns are grouped into a singly-linked list of
67 "exclusion segments". Each segment represents a set of patterns
68 that can be matches using the same algorithm. Non-wildcard
69 patterns are kept in hash tables, to speed up searches. Wildcard
70 patterns are stored as arrays of patterns. */
73 /* An exclude pattern-options pair. The options are fnmatch options
74 ORed with EXCLUDE_* options. */
86 /* An array of pattern-options pairs. */
88 struct exclude_pattern
90 struct patopts
*exclude
;
97 exclude_hash
, /* a hash table of excluded names */
98 exclude_pattern
/* an array of exclude patterns */
101 struct exclude_segment
103 struct exclude_segment
*next
; /* next segment in list */
104 enum exclude_type type
; /* type of this segment */
105 int options
; /* common options for this segment */
108 Hash_table
*table
; /* for type == exclude_hash */
109 struct exclude_pattern pat
; /* for type == exclude_pattern */
113 struct pattern_buffer
115 struct pattern_buffer
*next
;
119 /* The exclude structure keeps a singly-linked list of exclude segments,
120 maintained in reverse order. */
123 struct exclude_segment
*head
;
124 struct pattern_buffer
*patbuf
;
127 /* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
128 add_exclude_file and add_exclude_fp below) can use this function
129 if it modifies the pattern, to ensure the allocated memory will be
130 properly reclaimed upon calling free_exclude. */
132 exclude_add_pattern_buffer (struct exclude
*ex
, char *buf
)
134 struct pattern_buffer
*pbuf
= xmalloc (sizeof *pbuf
);
136 pbuf
->next
= ex
->patbuf
;
140 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
141 Return false if STR definitely does not have wildcards. */
143 fnmatch_pattern_has_wildcards (const char *str
, int options
)
154 if (options
& EXCLUDE_REGEX
)
159 if (options
& EXCLUDE_REGEX
)
162 str
+= ! (options
& FNM_NOESCAPE
) && *str
;
165 case '+': case '@': case '!':
166 if (options
& FNM_EXTMATCH
&& *str
== '(')
170 case '?': case '*': case '[':
180 unescape_pattern (char *str
)
184 q
+= *q
== '\\' && q
[1];
185 while ((*str
++ = *q
++));
188 /* Return a newly allocated and empty exclude list. */
193 return xzalloc (sizeof *new_exclude ());
196 /* Calculate the hash of string. */
198 string_hasher (void const *data
, size_t n_buckets
)
200 char const *p
= data
;
201 return hash_string (p
, n_buckets
);
204 /* Ditto, for case-insensitive hashes */
206 string_hasher_ci (void const *data
, size_t n_buckets
)
208 char const *p
= data
;
209 mbui_iterator_t iter
;
212 for (mbui_init (iter
, p
); mbui_avail (iter
); mbui_advance (iter
))
214 mbchar_t m
= mbui_cur (iter
);
218 wc
= towlower (m
.wc
);
222 value
= (value
* 31 + wc
) % n_buckets
;
228 /* compare two strings for equality */
230 string_compare (void const *data1
, void const *data2
)
232 char const *p1
= data1
;
233 char const *p2
= data2
;
234 return strcmp (p1
, p2
) == 0;
237 /* compare two strings for equality, case-insensitive */
239 string_compare_ci (void const *data1
, void const *data2
)
241 char const *p1
= data1
;
242 char const *p2
= data2
;
243 return mbscasecmp (p1
, p2
) == 0;
247 string_free (void *data
)
252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
253 to the head of EX. */
255 new_exclude_segment (struct exclude
*ex
, enum exclude_type type
, int options
)
257 struct exclude_segment
*sp
= xzalloc (sizeof (struct exclude_segment
));
259 sp
->options
= options
;
262 case exclude_pattern
:
266 sp
->v
.table
= hash_initialize (0, NULL
,
267 (options
& FNM_CASEFOLD
) ?
270 (options
& FNM_CASEFOLD
) ?
280 /* Free a single exclude segment */
282 free_exclude_segment (struct exclude_segment
*seg
)
288 case exclude_pattern
:
289 for (i
= 0; i
< seg
->v
.pat
.exclude_count
; i
++)
291 if (seg
->v
.pat
.exclude
[i
].options
& EXCLUDE_REGEX
)
292 regfree (&seg
->v
.pat
.exclude
[i
].v
.re
);
294 free (seg
->v
.pat
.exclude
);
298 hash_free (seg
->v
.table
);
304 /* Free the storage associated with an exclude list. */
306 free_exclude (struct exclude
*ex
)
308 struct exclude_segment
*seg
;
309 struct pattern_buffer
*pbuf
;
311 for (seg
= ex
->head
; seg
; )
313 struct exclude_segment
*next
= seg
->next
;
314 free_exclude_segment (seg
);
318 for (pbuf
= ex
->patbuf
; pbuf
; )
320 struct pattern_buffer
*next
= pbuf
->next
;
329 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
330 (unlike fnmatch) wildcards are disabled in PATTERN. */
333 fnmatch_no_wildcards (char const *pattern
, char const *f
, int options
)
335 if (! (options
& FNM_LEADING_DIR
))
336 return ((options
& FNM_CASEFOLD
)
337 ? mbscasecmp (pattern
, f
)
338 : strcmp (pattern
, f
));
339 else if (! (options
& FNM_CASEFOLD
))
341 size_t patlen
= strlen (pattern
);
342 int r
= strncmp (pattern
, f
, patlen
);
353 /* Walk through a copy of F, seeing whether P matches any prefix
356 FIXME: This is an O(N**2) algorithm; it should be O(N).
357 Also, the copy should not be necessary. However, fixing this
358 will probably involve a change to the mbs* API. */
360 char *fcopy
= xstrdup (f
);
363 for (p
= fcopy
; ; *p
++ = '/')
368 r
= mbscasecmp (pattern
, fcopy
);
378 exclude_fnmatch (char const *pattern
, char const *f
, int options
)
380 int (*matcher
) (char const *, char const *, int) =
381 (options
& EXCLUDE_WILDCARDS
383 : fnmatch_no_wildcards
);
384 bool matched
= ((*matcher
) (pattern
, f
, options
) == 0);
387 if (! (options
& EXCLUDE_ANCHORED
))
388 for (p
= f
; *p
&& ! matched
; p
++)
389 if (*p
== '/' && p
[1] != '/')
390 matched
= ((*matcher
) (pattern
, p
+ 1, options
) == 0);
396 exclude_patopts (struct patopts
const *opts
, char const *f
)
398 int options
= opts
->options
;
400 return (options
& EXCLUDE_REGEX
)
401 ? regexec (&opts
->v
.re
, f
, 0, NULL
, 0) == 0
402 : exclude_fnmatch (opts
->v
.pattern
, f
, options
);
405 /* Return true if the exclude_pattern segment SEG matches F. */
408 file_pattern_matches (struct exclude_segment
const *seg
, char const *f
)
410 size_t exclude_count
= seg
->v
.pat
.exclude_count
;
411 struct patopts
const *exclude
= seg
->v
.pat
.exclude
;
414 for (i
= 0; i
< exclude_count
; i
++)
416 if (exclude_patopts (exclude
+ i
, f
))
422 /* Return true if the exclude_hash segment SEG matches F.
423 BUFFER is an auxiliary storage of the same length as F (with nul
424 terminator included) */
426 file_name_matches (struct exclude_segment
const *seg
, char const *f
,
429 int options
= seg
->options
;
430 Hash_table
*table
= seg
->v
.table
;
434 /* initialize the pattern */
439 if (hash_lookup (table
, buffer
))
441 if (options
& FNM_LEADING_DIR
)
443 char *p
= strrchr (buffer
, '/');
453 if (!(options
& EXCLUDE_ANCHORED
))
467 /* Return true if EX excludes F. */
470 excluded_file_name (struct exclude
const *ex
, char const *f
)
472 struct exclude_segment
*seg
;
474 char *filename
= NULL
;
476 /* If no patterns are given, the default is to include. */
480 /* Scan through the segments, reporting the status of the first match.
481 The segments are in reverse order, so this reports the status of
482 the last match in the original option list. */
483 for (seg
= ex
->head
; ; seg
= seg
->next
)
485 if (seg
->type
== exclude_hash
)
488 filename
= xmalloc (strlen (f
) + 1);
489 if (file_name_matches (seg
, f
, filename
))
494 if (file_pattern_matches (seg
, f
))
500 /* If patterns are given but none match, the default is the
501 opposite of the last segment (i.e., the first in the
502 original option list). For example, in the command
503 'grep -r --exclude="a*" --include="*b" pat dir', the
504 first option is --exclude so any file name matching
505 neither a* nor *b is included. */
512 return invert
^ ! (seg
->options
& EXCLUDE_INCLUDE
);
515 /* Append to EX the exclusion PATTERN with OPTIONS. */
518 add_exclude (struct exclude
*ex
, char const *pattern
, int options
)
520 struct exclude_segment
*seg
;
521 struct exclude_pattern
*pat
;
522 struct patopts
*patopts
;
524 if ((options
& (EXCLUDE_REGEX
|EXCLUDE_WILDCARDS
))
525 && fnmatch_pattern_has_wildcards (pattern
, options
))
527 if (! (ex
->head
&& ex
->head
->type
== exclude_pattern
528 && ((ex
->head
->options
& EXCLUDE_INCLUDE
)
529 == (options
& EXCLUDE_INCLUDE
))))
530 new_exclude_segment (ex
, exclude_pattern
, options
);
535 if (pat
->exclude_count
== pat
->exclude_alloc
)
536 pat
->exclude
= x2nrealloc (pat
->exclude
, &pat
->exclude_alloc
,
537 sizeof *pat
->exclude
);
538 patopts
= &pat
->exclude
[pat
->exclude_count
++];
540 patopts
->options
= options
;
541 if (options
& EXCLUDE_REGEX
)
544 int cflags
= REG_NOSUB
|REG_EXTENDED
|
545 ((options
& FNM_CASEFOLD
) ? REG_ICASE
: 0);
547 if (options
& FNM_LEADING_DIR
)
550 size_t len
= strlen (pattern
);
552 while (len
> 0 && ISSLASH (pattern
[len
-1]))
559 tmp
= xmalloc (len
+ 7);
560 memcpy (tmp
, pattern
, len
);
561 strcpy (tmp
+ len
, "(/.*)?");
562 rc
= regcomp (&patopts
->v
.re
, tmp
, cflags
);
567 rc
= regcomp (&patopts
->v
.re
, pattern
, cflags
);
571 pat
->exclude_count
--;
577 if (options
& EXCLUDE_ALLOC
)
579 pattern
= xstrdup (pattern
);
580 exclude_add_pattern_buffer (ex
, (char*) pattern
);
582 patopts
->v
.pattern
= pattern
;
588 int exclude_hash_flags
= (EXCLUDE_INCLUDE
| EXCLUDE_ANCHORED
589 | FNM_LEADING_DIR
| FNM_CASEFOLD
);
590 if (! (ex
->head
&& ex
->head
->type
== exclude_hash
591 && ((ex
->head
->options
& exclude_hash_flags
)
592 == (options
& exclude_hash_flags
))))
593 new_exclude_segment (ex
, exclude_hash
, options
);
596 str
= xstrdup (pattern
);
597 if ((options
& (EXCLUDE_WILDCARDS
| FNM_NOESCAPE
)) == EXCLUDE_WILDCARDS
)
598 unescape_pattern (str
);
599 p
= hash_insert (seg
->v
.table
, str
);
605 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
606 OPTIONS. LINE_END terminates each pattern in the file. If
607 LINE_END is a space character, ignore trailing spaces and empty
608 lines in FP. Return -1 on failure, 0 on success. */
611 add_exclude_fp (void (*add_func
) (struct exclude
*, char const *, int, void *),
612 struct exclude
*ex
, FILE *fp
, int options
,
620 size_t buf_alloc
= 0;
621 size_t buf_count
= 0;
625 while ((c
= getc (fp
)) != EOF
)
627 if (buf_count
== buf_alloc
)
628 buf
= x2realloc (buf
, &buf_alloc
);
629 buf
[buf_count
++] = c
;
635 buf
= xrealloc (buf
, buf_count
+ 1);
636 buf
[buf_count
] = line_end
;
637 lim
= buf
+ buf_count
+ ! (buf_count
== 0 || buf
[buf_count
- 1] == line_end
);
639 exclude_add_pattern_buffer (ex
, buf
);
643 for (p
= buf
; p
< lim
; p
++)
646 char *pattern_end
= p
;
648 if (isspace ((unsigned char) line_end
))
650 for (; ; pattern_end
--)
651 if (pattern_end
== pattern
)
653 else if (! isspace ((unsigned char) pattern_end
[-1]))
658 (*add_func
) (ex
, pattern
, options
, data
);
669 call_addfn (struct exclude
*ex
, char const *pattern
, int options
, void *data
)
671 void (**addfnptr
) (struct exclude
*, char const *, int) = data
;
672 (*addfnptr
) (ex
, pattern
, options
);
676 add_exclude_file (void (*add_func
) (struct exclude
*, char const *, int),
677 struct exclude
*ex
, char const *file_name
, int options
,
680 bool use_stdin
= file_name
[0] == '-' && !file_name
[1];
686 else if (! (in
= fopen (file_name
, "re")))
689 rc
= add_exclude_fp (call_addfn
, ex
, in
, options
, line_end
, &add_func
);
691 if (!use_stdin
&& fclose (in
) != 0)