exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / exclude.c
blob568d1ec1c677a87a87757c827311358fa9226e1c
1 /* exclude.c -- exclude file names
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2024 Free Software
4 Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
24 #include <config.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <regex.h>
29 #include <stddef.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <uchar.h>
35 #include "exclude.h"
36 #include "filename.h"
37 #include <fnmatch.h>
38 #include "hash.h"
39 #if GNULIB_MCEL_PREFER
40 # include "mcel.h"
41 #else
42 # include "mbuiter.h"
43 #endif
44 #include "xalloc.h"
46 #if GNULIB_EXCLUDE_SINGLE_THREAD
47 # include "unlocked-io.h"
48 #endif
50 /* Non-GNU systems lack these options, so we don't need to check them. */
51 #ifndef FNM_CASEFOLD
52 # define FNM_CASEFOLD 0
53 #endif
54 #ifndef FNM_EXTMATCH
55 # define FNM_EXTMATCH 0
56 #endif
57 #ifndef FNM_LEADING_DIR
58 # define FNM_LEADING_DIR 0
59 #endif
61 static_assert (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
62 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
63 | FNM_CASEFOLD | FNM_EXTMATCH))
64 == 0);
67 /* Exclusion patterns are grouped into a singly-linked list of
68 "exclusion segments". Each segment represents a set of patterns
69 that can be matches using the same algorithm. Non-wildcard
70 patterns are kept in hash tables, to speed up searches. Wildcard
71 patterns are stored as arrays of patterns. */
74 /* An exclude pattern-options pair. The options are fnmatch options
75 ORed with EXCLUDE_* options. */
77 struct patopts
79 int options;
80 union
82 char const *pattern;
83 regex_t re;
84 } v;
87 /* An array of pattern-options pairs. */
89 struct exclude_pattern
91 struct patopts *exclude;
92 idx_t exclude_alloc;
93 idx_t exclude_count;
96 enum exclude_type
98 exclude_hash, /* a hash table of excluded names */
99 exclude_pattern /* an array of exclude patterns */
102 struct exclude_segment
104 struct exclude_segment *next; /* next segment in list */
105 enum exclude_type type; /* type of this segment */
106 int options; /* common options for this segment */
107 union
109 Hash_table *table; /* for type == exclude_hash */
110 struct exclude_pattern pat; /* for type == exclude_pattern */
111 } v;
114 struct pattern_buffer
116 struct pattern_buffer *next;
117 char *base;
120 /* The exclude structure keeps a singly-linked list of exclude segments,
121 maintained in reverse order. */
122 struct exclude
124 struct exclude_segment *head;
125 struct pattern_buffer *patbuf;
128 /* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
129 add_exclude_file and add_exclude_fp below) can use this function
130 if it modifies the pattern, to ensure the allocated memory will be
131 properly reclaimed upon calling free_exclude. */
132 void
133 exclude_add_pattern_buffer (struct exclude *ex, char *buf)
135 struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
136 pbuf->base = buf;
137 pbuf->next = ex->patbuf;
138 ex->patbuf = pbuf;
141 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
142 Return false if STR definitely does not have wildcards. */
143 bool
144 fnmatch_pattern_has_wildcards (const char *str, int options)
146 while (true)
148 switch (*str++)
150 case '.':
151 case '{':
152 case '}':
153 case '(':
154 case ')':
155 if (options & EXCLUDE_REGEX)
156 return true;
157 break;
159 case '\\':
160 if (options & EXCLUDE_REGEX)
161 continue;
162 else
163 str += ! (options & FNM_NOESCAPE) && *str;
164 break;
166 case '+': case '@': case '!':
167 if (options & FNM_EXTMATCH && *str == '(')
168 return true;
169 break;
171 case '?': case '*': case '[':
172 return true;
174 case '\0':
175 return false;
180 static void
181 unescape_pattern (char *str)
183 char const *q = str;
185 q += *q == '\\' && q[1];
186 while ((*str++ = *q++));
189 /* Return a newly allocated and empty exclude list. */
191 struct exclude *
192 new_exclude (void)
194 return xzalloc (sizeof *new_exclude ());
197 /* Calculate the hash of string. */
198 static size_t
199 string_hasher (void const *data, size_t n_buckets)
201 return hash_string (data, n_buckets);
204 /* Ditto, for case-insensitive hashes */
205 static size_t
206 string_hasher_ci (void const *data, size_t n_buckets)
208 char const *p = data;
209 size_t value = 0;
211 #if GNULIB_MCEL_PREFER
212 while (*p)
214 mcel_t g = mcel_scanz (p);
215 value = value * 31 + (c32tolower (g.ch) - g.err);
216 p += g.len;
218 #else
219 mbui_iterator_t iter;
221 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
223 mbchar_t m = mbui_cur (iter);
224 char32_t wc;
226 if (m.wc_valid)
227 wc = c32tolower (m.wc);
228 else
229 wc = *m.ptr;
231 value = value * 31 + wc;
233 #endif
235 return value % n_buckets;
238 /* compare two strings for equality */
239 static bool
240 string_compare (void const *data1, void const *data2)
242 return strcmp (data1, data2) == 0;
245 /* compare two strings for equality, case-insensitive */
246 static bool
247 string_compare_ci (void const *data1, void const *data2)
249 return mbscasecmp (data1, data2) == 0;
252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
253 to the head of EX. */
254 static void
255 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
257 struct exclude_segment *sp = xmalloc (sizeof (struct exclude_segment));
258 sp->type = type;
259 sp->options = options;
260 switch (type)
262 case exclude_pattern:
263 sp->v.pat = (struct exclude_pattern) {0};
264 break;
266 case exclude_hash:
267 sp->v.table = hash_initialize (0, nullptr,
268 (options & FNM_CASEFOLD
269 ? string_hasher_ci
270 : string_hasher),
271 (options & FNM_CASEFOLD
272 ? string_compare_ci
273 : string_compare),
274 free);
275 break;
277 sp->next = ex->head;
278 ex->head = sp;
281 /* Free a single exclude segment */
282 static void
283 free_exclude_segment (struct exclude_segment *seg)
285 switch (seg->type)
287 case exclude_pattern:
288 for (idx_t i = 0; i < seg->v.pat.exclude_count; i++)
289 if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
290 regfree (&seg->v.pat.exclude[i].v.re);
291 free (seg->v.pat.exclude);
292 break;
294 case exclude_hash:
295 hash_free (seg->v.table);
296 break;
298 free (seg);
301 /* Free the storage associated with an exclude list. */
302 void
303 free_exclude (struct exclude *ex)
305 for (struct exclude_segment *seg = ex->head; seg; )
307 struct exclude_segment *next = seg->next;
308 free_exclude_segment (seg);
309 seg = next;
312 for (struct pattern_buffer *pbuf = ex->patbuf; pbuf; )
314 struct pattern_buffer *next = pbuf->next;
315 free (pbuf->base);
316 free (pbuf);
317 pbuf = next;
320 free (ex);
323 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
324 (unlike fnmatch) wildcards are disabled in PATTERN. */
326 static int
327 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
329 if (! (options & FNM_LEADING_DIR))
330 return ((options & FNM_CASEFOLD)
331 ? mbscasecmp (pattern, f)
332 : strcmp (pattern, f));
333 else if (! (options & FNM_CASEFOLD))
335 idx_t patlen = strlen (pattern);
336 int r = strncmp (pattern, f, patlen);
337 if (! r)
339 r = f[patlen];
340 if (r == '/')
341 r = 0;
343 return r;
345 else
347 /* Walk through a copy of F, seeing whether P matches any prefix
348 of F.
350 FIXME: This is an O(N**2) algorithm; it should be O(N).
351 Also, the copy should not be necessary. However, fixing this
352 will probably involve a change to the mbs* API. */
354 char *fcopy = xstrdup (f);
355 int r;
356 for (char *p = fcopy; ; *p++ = '/')
358 p = strchr (p, '/');
359 if (p)
360 *p = '\0';
361 r = mbscasecmp (pattern, fcopy);
362 if (!p || r <= 0)
363 break;
365 free (fcopy);
366 return r;
370 bool
371 exclude_fnmatch (char const *pattern, char const *f, int options)
373 int (*matcher) (char const *, char const *, int) =
374 (options & EXCLUDE_WILDCARDS
375 ? fnmatch
376 : fnmatch_no_wildcards);
377 bool matched = matcher (pattern, f, options) == 0;
379 if (! (options & EXCLUDE_ANCHORED))
380 for (char const *p = f; *p && ! matched; p++)
381 if (*p == '/' && p[1] != '/')
382 matched = matcher (pattern, p + 1, options) == 0;
384 return matched;
387 static bool
388 exclude_patopts (struct patopts const *opts, char const *f)
390 int options = opts->options;
392 return (options & EXCLUDE_REGEX
393 ? regexec (&opts->v.re, f, 0, nullptr, 0) == 0
394 : exclude_fnmatch (opts->v.pattern, f, options));
397 /* Return true if the exclude_pattern segment SEG matches F. */
399 static bool
400 file_pattern_matches (struct exclude_segment const *seg, char const *f)
402 idx_t exclude_count = seg->v.pat.exclude_count;
403 struct patopts const *exclude = seg->v.pat.exclude;
405 for (idx_t i = 0; i < exclude_count; i++)
406 if (exclude_patopts (exclude + i, f))
407 return true;
408 return false;
411 /* Return true if the exclude_hash segment SEG matches F.
412 BUFFER is an auxiliary storage of the same length as F (with nul
413 terminator included) */
414 static bool
415 file_name_matches (struct exclude_segment const *seg, char const *f,
416 char *buffer)
418 int options = seg->options;
419 Hash_table *table = seg->v.table;
423 /* initialize the pattern */
424 strcpy (buffer, f);
426 while (true)
428 if (hash_lookup (table, buffer))
429 return true;
430 if (options & FNM_LEADING_DIR)
432 char *p = strrchr (buffer, '/');
433 if (p)
435 *p = '\0';
436 continue;
439 break;
442 if (!(options & EXCLUDE_ANCHORED))
444 f = strchr (f, '/');
445 if (f)
446 f++;
448 else
449 break;
451 while (f);
453 return false;
456 /* Return true if EX excludes F. */
458 bool
459 excluded_file_name (struct exclude const *ex, char const *f)
461 /* If no patterns are given, the default is to include. */
462 if (!ex->head)
463 return false;
465 bool invert = false;
466 char *filename = nullptr;
468 /* Scan through the segments, reporting the status of the first match.
469 The segments are in reverse order, so this reports the status of
470 the last match in the original option list. */
471 struct exclude_segment *seg;
472 for (seg = ex->head; ; seg = seg->next)
474 if (seg->type == exclude_hash)
476 if (!filename)
477 filename = xmalloc (strlen (f) + 1);
478 if (file_name_matches (seg, f, filename))
479 break;
481 else
483 if (file_pattern_matches (seg, f))
484 break;
487 if (! seg->next)
489 /* If patterns are given but none match, the default is the
490 opposite of the last segment (i.e., the first in the
491 original option list). For example, in the command
492 'grep -r --exclude="a*" --include="*b" pat dir', the
493 first option is --exclude so any file name matching
494 neither a* nor *b is included. */
495 invert = true;
496 break;
500 free (filename);
501 return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
504 /* Append to EX the exclusion PATTERN with OPTIONS. */
506 void
507 add_exclude (struct exclude *ex, char const *pattern, int options)
509 if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
510 && fnmatch_pattern_has_wildcards (pattern, options))
512 if (! (ex->head && ex->head->type == exclude_pattern
513 && ((ex->head->options & EXCLUDE_INCLUDE)
514 == (options & EXCLUDE_INCLUDE))))
515 new_exclude_segment (ex, exclude_pattern, options);
517 struct exclude_pattern *pat = &ex->head->v.pat;
518 if (pat->exclude_count == pat->exclude_alloc)
519 pat->exclude = xpalloc (pat->exclude, &pat->exclude_alloc, 1, -1,
520 sizeof *pat->exclude);
521 struct patopts *patopts = &pat->exclude[pat->exclude_count++];
523 patopts->options = options;
524 if (options & EXCLUDE_REGEX)
526 int rc;
527 int cflags = (REG_NOSUB | REG_EXTENDED
528 | (options & FNM_CASEFOLD ? REG_ICASE : 0));
530 if (! (options & FNM_LEADING_DIR))
531 rc = regcomp (&patopts->v.re, pattern, cflags);
532 else
533 for (idx_t len = strlen (pattern); ; len--)
535 if (len == 0)
537 rc = 1;
538 break;
540 if (!ISSLASH (pattern[len - 1]))
542 static char const patsuffix[] = "(/.*)?";
543 char *tmp = ximalloc (len + sizeof patsuffix);
544 memcpy (tmp, pattern, len);
545 strcpy (tmp + len, patsuffix);
546 rc = regcomp (&patopts->v.re, tmp, cflags);
547 free (tmp);
548 break;
552 if (rc)
554 pat->exclude_count--;
555 return;
558 else
560 if (options & EXCLUDE_ALLOC)
562 char *dup = xstrdup (pattern);
563 pattern = dup;
564 exclude_add_pattern_buffer (ex, dup);
566 patopts->v.pattern = pattern;
569 else
571 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
572 | FNM_LEADING_DIR | FNM_CASEFOLD);
573 if (! (ex->head && ex->head->type == exclude_hash
574 && ((ex->head->options & exclude_hash_flags)
575 == (options & exclude_hash_flags))))
576 new_exclude_segment (ex, exclude_hash, options);
578 char *str = xstrdup (pattern);
579 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
580 unescape_pattern (str);
581 if (hash_insert (ex->head->v.table, str) != str)
582 free (str);
586 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
587 OPTIONS. LINE_END terminates each pattern in the file. If
588 LINE_END is a space character, ignore trailing spaces and empty
589 lines in FP. Return -1 (setting errno) on failure, 0 on success. */
592 add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
593 struct exclude *ex, FILE *fp, int options,
594 char line_end, void *data)
596 char *buf = nullptr;
597 idx_t buf_alloc = 0;
598 idx_t buf_count = 0;
600 for (int c; (c = getc (fp)) != EOF; )
602 if (buf_count == buf_alloc)
603 buf = xpalloc (buf, &buf_alloc, 1, -1, 1);
604 buf[buf_count++] = c;
607 int e = ferror (fp) ? errno : 0;
609 buf = xirealloc (buf, buf_count + 1);
610 buf[buf_count] = line_end;
611 char const *lim = (buf + buf_count
612 + ! (buf_count == 0 || buf[buf_count - 1] == line_end));
614 exclude_add_pattern_buffer (ex, buf);
616 char *pattern = buf;
618 for (char *p = buf; p < lim; p++)
619 if (*p == line_end)
621 char *pattern_end = p;
623 if (isspace ((unsigned char) line_end))
625 /* Assume that no multi-byte character has a trailing byte
626 that satisfies isspace, and that nobody cares about
627 trailing white space containing non-single-byte characters.
628 If either assumption turns out to be false, presumably
629 the code should be changed to scan forward through the
630 entire pattern, one multi-byte character at a time. */
631 for (; ; pattern_end--)
632 if (pattern_end == pattern)
633 goto next_pattern;
634 else if (! isspace ((unsigned char) pattern_end[-1]))
635 break;
638 *pattern_end = '\0';
639 add_func (ex, pattern, options, data);
641 next_pattern:
642 pattern = p + 1;
645 errno = e;
646 return e ? -1 : 0;
649 static void
650 call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
652 void (**addfnptr) (struct exclude *, char const *, int) = data;
653 (*addfnptr) (ex, pattern, options);
657 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
658 struct exclude *ex, char const *file_name, int options,
659 char line_end)
661 if (strcmp (file_name, "-") == 0)
662 return add_exclude_fp (call_addfn, ex, stdin, options, line_end, &add_func);
664 FILE *in = fopen (file_name, "re");
665 if (!in)
666 return -1;
667 int rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
668 int e = errno;
669 if (fclose (in) < 0)
670 return -1;
671 errno = e;
672 return rc;