unistr/u{8,16,32}-uctomb: Avoid possible trouble with huge strings.
[gnulib.git] / lib / exclude.c
blob2b57a9b14485a827ec22e97f456d0e7a0da2c330
1 /* exclude.c -- exclude file names
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2020 Free Software
4 Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
24 #include <config.h>
26 #include <stdbool.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wctype.h>
35 #include <regex.h>
37 #include "exclude.h"
38 #include "hash.h"
39 #include "mbuiter.h"
40 #include "fnmatch.h"
41 #include "xalloc.h"
42 #include "verify.h"
43 #include "filename.h"
45 #if USE_UNLOCKED_IO
46 # include "unlocked-io.h"
47 #endif
49 /* Non-GNU systems lack these options, so we don't need to check them. */
50 #ifndef FNM_CASEFOLD
51 # define FNM_CASEFOLD 0
52 #endif
53 #ifndef FNM_EXTMATCH
54 # define FNM_EXTMATCH 0
55 #endif
56 #ifndef FNM_LEADING_DIR
57 # define FNM_LEADING_DIR 0
58 #endif
60 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
61 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
62 | FNM_CASEFOLD | FNM_EXTMATCH))
63 == 0);
66 /* Exclusion patterns are grouped into a singly-linked list of
67 "exclusion segments". Each segment represents a set of patterns
68 that can be matches using the same algorithm. Non-wildcard
69 patterns are kept in hash tables, to speed up searches. Wildcard
70 patterns are stored as arrays of patterns. */
73 /* An exclude pattern-options pair. The options are fnmatch options
74 ORed with EXCLUDE_* options. */
76 struct patopts
78 int options;
79 union
81 char const *pattern;
82 regex_t re;
83 } v;
86 /* An array of pattern-options pairs. */
88 struct exclude_pattern
90 struct patopts *exclude;
91 size_t exclude_alloc;
92 size_t exclude_count;
95 enum exclude_type
97 exclude_hash, /* a hash table of excluded names */
98 exclude_pattern /* an array of exclude patterns */
101 struct exclude_segment
103 struct exclude_segment *next; /* next segment in list */
104 enum exclude_type type; /* type of this segment */
105 int options; /* common options for this segment */
106 union
108 Hash_table *table; /* for type == exclude_hash */
109 struct exclude_pattern pat; /* for type == exclude_pattern */
110 } v;
113 struct pattern_buffer
115 struct pattern_buffer *next;
116 char *base;
119 /* The exclude structure keeps a singly-linked list of exclude segments,
120 maintained in reverse order. */
121 struct exclude
123 struct exclude_segment *head;
124 struct pattern_buffer *patbuf;
127 /* Register BUF in the pattern buffer list of EX. ADD_FUNC (see
128 add_exclude_file and add_exclude_fp below) can use this function
129 if it modifies the pattern, to ensure the allocated memory will be
130 properly reclaimed upon calling free_exclude. */
131 void
132 exclude_add_pattern_buffer (struct exclude *ex, char *buf)
134 struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf);
135 pbuf->base = buf;
136 pbuf->next = ex->patbuf;
137 ex->patbuf = pbuf;
140 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
141 Return false if STR definitely does not have wildcards. */
142 bool
143 fnmatch_pattern_has_wildcards (const char *str, int options)
145 while (1)
147 switch (*str++)
149 case '.':
150 case '{':
151 case '}':
152 case '(':
153 case ')':
154 if (options & EXCLUDE_REGEX)
155 return true;
156 break;
158 case '\\':
159 if (options & EXCLUDE_REGEX)
160 continue;
161 else
162 str += ! (options & FNM_NOESCAPE) && *str;
163 break;
165 case '+': case '@': case '!':
166 if (options & FNM_EXTMATCH && *str == '(')
167 return true;
168 break;
170 case '?': case '*': case '[':
171 return true;
173 case '\0':
174 return false;
179 static void
180 unescape_pattern (char *str)
182 char const *q = str;
184 q += *q == '\\' && q[1];
185 while ((*str++ = *q++));
188 /* Return a newly allocated and empty exclude list. */
190 struct exclude *
191 new_exclude (void)
193 return xzalloc (sizeof *new_exclude ());
196 /* Calculate the hash of string. */
197 static size_t
198 string_hasher (void const *data, size_t n_buckets)
200 char const *p = data;
201 return hash_string (p, n_buckets);
204 /* Ditto, for case-insensitive hashes */
205 static size_t
206 string_hasher_ci (void const *data, size_t n_buckets)
208 char const *p = data;
209 mbui_iterator_t iter;
210 size_t value = 0;
212 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
214 mbchar_t m = mbui_cur (iter);
215 wchar_t wc;
217 if (m.wc_valid)
218 wc = towlower (m.wc);
219 else
220 wc = *m.ptr;
222 value = (value * 31 + wc) % n_buckets;
225 return value;
228 /* compare two strings for equality */
229 static bool
230 string_compare (void const *data1, void const *data2)
232 char const *p1 = data1;
233 char const *p2 = data2;
234 return strcmp (p1, p2) == 0;
237 /* compare two strings for equality, case-insensitive */
238 static bool
239 string_compare_ci (void const *data1, void const *data2)
241 char const *p1 = data1;
242 char const *p2 = data2;
243 return mbscasecmp (p1, p2) == 0;
246 static void
247 string_free (void *data)
249 free (data);
252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
253 to the head of EX. */
254 static void
255 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
257 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
258 sp->type = type;
259 sp->options = options;
260 switch (type)
262 case exclude_pattern:
263 break;
265 case exclude_hash:
266 sp->v.table = hash_initialize (0, NULL,
267 (options & FNM_CASEFOLD) ?
268 string_hasher_ci
269 : string_hasher,
270 (options & FNM_CASEFOLD) ?
271 string_compare_ci
272 : string_compare,
273 string_free);
274 break;
276 sp->next = ex->head;
277 ex->head = sp;
280 /* Free a single exclude segment */
281 static void
282 free_exclude_segment (struct exclude_segment *seg)
284 size_t i;
286 switch (seg->type)
288 case exclude_pattern:
289 for (i = 0; i < seg->v.pat.exclude_count; i++)
291 if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX)
292 regfree (&seg->v.pat.exclude[i].v.re);
294 free (seg->v.pat.exclude);
295 break;
297 case exclude_hash:
298 hash_free (seg->v.table);
299 break;
301 free (seg);
304 /* Free the storage associated with an exclude list. */
305 void
306 free_exclude (struct exclude *ex)
308 struct exclude_segment *seg;
309 struct pattern_buffer *pbuf;
311 for (seg = ex->head; seg; )
313 struct exclude_segment *next = seg->next;
314 free_exclude_segment (seg);
315 seg = next;
318 for (pbuf = ex->patbuf; pbuf; )
320 struct pattern_buffer *next = pbuf->next;
321 free (pbuf->base);
322 free (pbuf);
323 pbuf = next;
326 free (ex);
329 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
330 (unlike fnmatch) wildcards are disabled in PATTERN. */
332 static int
333 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
335 if (! (options & FNM_LEADING_DIR))
336 return ((options & FNM_CASEFOLD)
337 ? mbscasecmp (pattern, f)
338 : strcmp (pattern, f));
339 else if (! (options & FNM_CASEFOLD))
341 size_t patlen = strlen (pattern);
342 int r = strncmp (pattern, f, patlen);
343 if (! r)
345 r = f[patlen];
346 if (r == '/')
347 r = 0;
349 return r;
351 else
353 /* Walk through a copy of F, seeing whether P matches any prefix
354 of F.
356 FIXME: This is an O(N**2) algorithm; it should be O(N).
357 Also, the copy should not be necessary. However, fixing this
358 will probably involve a change to the mbs* API. */
360 char *fcopy = xstrdup (f);
361 char *p;
362 int r;
363 for (p = fcopy; ; *p++ = '/')
365 p = strchr (p, '/');
366 if (p)
367 *p = '\0';
368 r = mbscasecmp (pattern, fcopy);
369 if (!p || r <= 0)
370 break;
372 free (fcopy);
373 return r;
377 bool
378 exclude_fnmatch (char const *pattern, char const *f, int options)
380 int (*matcher) (char const *, char const *, int) =
381 (options & EXCLUDE_WILDCARDS
382 ? fnmatch
383 : fnmatch_no_wildcards);
384 bool matched = ((*matcher) (pattern, f, options) == 0);
385 char const *p;
387 if (! (options & EXCLUDE_ANCHORED))
388 for (p = f; *p && ! matched; p++)
389 if (*p == '/' && p[1] != '/')
390 matched = ((*matcher) (pattern, p + 1, options) == 0);
392 return matched;
395 static bool
396 exclude_patopts (struct patopts const *opts, char const *f)
398 int options = opts->options;
400 return (options & EXCLUDE_REGEX)
401 ? regexec (&opts->v.re, f, 0, NULL, 0) == 0
402 : exclude_fnmatch (opts->v.pattern, f, options);
405 /* Return true if the exclude_pattern segment SEG matches F. */
407 static bool
408 file_pattern_matches (struct exclude_segment const *seg, char const *f)
410 size_t exclude_count = seg->v.pat.exclude_count;
411 struct patopts const *exclude = seg->v.pat.exclude;
412 size_t i;
414 for (i = 0; i < exclude_count; i++)
416 if (exclude_patopts (exclude + i, f))
417 return true;
419 return false;
422 /* Return true if the exclude_hash segment SEG matches F.
423 BUFFER is an auxiliary storage of the same length as F (with nul
424 terminator included) */
425 static bool
426 file_name_matches (struct exclude_segment const *seg, char const *f,
427 char *buffer)
429 int options = seg->options;
430 Hash_table *table = seg->v.table;
434 /* initialize the pattern */
435 strcpy (buffer, f);
437 while (1)
439 if (hash_lookup (table, buffer))
440 return true;
441 if (options & FNM_LEADING_DIR)
443 char *p = strrchr (buffer, '/');
444 if (p)
446 *p = 0;
447 continue;
450 break;
453 if (!(options & EXCLUDE_ANCHORED))
455 f = strchr (f, '/');
456 if (f)
457 f++;
459 else
460 break;
462 while (f);
464 return false;
467 /* Return true if EX excludes F. */
469 bool
470 excluded_file_name (struct exclude const *ex, char const *f)
472 struct exclude_segment *seg;
473 bool invert = false;
474 char *filename = NULL;
476 /* If no patterns are given, the default is to include. */
477 if (!ex->head)
478 return false;
480 /* Scan through the segments, reporting the status of the first match.
481 The segments are in reverse order, so this reports the status of
482 the last match in the original option list. */
483 for (seg = ex->head; ; seg = seg->next)
485 if (seg->type == exclude_hash)
487 if (!filename)
488 filename = xmalloc (strlen (f) + 1);
489 if (file_name_matches (seg, f, filename))
490 break;
492 else
494 if (file_pattern_matches (seg, f))
495 break;
498 if (! seg->next)
500 /* If patterns are given but none match, the default is the
501 opposite of the last segment (i.e., the first in the
502 original option list). For example, in the command
503 'grep -r --exclude="a*" --include="*b" pat dir', the
504 first option is --exclude so any file name matching
505 neither a* nor *b is included. */
506 invert = true;
507 break;
511 free (filename);
512 return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
515 /* Append to EX the exclusion PATTERN with OPTIONS. */
517 void
518 add_exclude (struct exclude *ex, char const *pattern, int options)
520 struct exclude_segment *seg;
521 struct exclude_pattern *pat;
522 struct patopts *patopts;
524 if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS))
525 && fnmatch_pattern_has_wildcards (pattern, options))
527 if (! (ex->head && ex->head->type == exclude_pattern
528 && ((ex->head->options & EXCLUDE_INCLUDE)
529 == (options & EXCLUDE_INCLUDE))))
530 new_exclude_segment (ex, exclude_pattern, options);
532 seg = ex->head;
534 pat = &seg->v.pat;
535 if (pat->exclude_count == pat->exclude_alloc)
536 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
537 sizeof *pat->exclude);
538 patopts = &pat->exclude[pat->exclude_count++];
540 patopts->options = options;
541 if (options & EXCLUDE_REGEX)
543 int rc;
544 int cflags = REG_NOSUB|REG_EXTENDED|
545 ((options & FNM_CASEFOLD) ? REG_ICASE : 0);
547 if (options & FNM_LEADING_DIR)
549 char *tmp;
550 size_t len = strlen (pattern);
552 while (len > 0 && ISSLASH (pattern[len-1]))
553 --len;
555 if (len == 0)
556 rc = 1;
557 else
559 tmp = xmalloc (len + 7);
560 memcpy (tmp, pattern, len);
561 strcpy (tmp + len, "(/.*)?");
562 rc = regcomp (&patopts->v.re, tmp, cflags);
563 free (tmp);
566 else
567 rc = regcomp (&patopts->v.re, pattern, cflags);
569 if (rc)
571 pat->exclude_count--;
572 return;
575 else
577 if (options & EXCLUDE_ALLOC)
579 pattern = xstrdup (pattern);
580 exclude_add_pattern_buffer (ex, (char*) pattern);
582 patopts->v.pattern = pattern;
585 else
587 char *str, *p;
588 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
589 | FNM_LEADING_DIR | FNM_CASEFOLD);
590 if (! (ex->head && ex->head->type == exclude_hash
591 && ((ex->head->options & exclude_hash_flags)
592 == (options & exclude_hash_flags))))
593 new_exclude_segment (ex, exclude_hash, options);
594 seg = ex->head;
596 str = xstrdup (pattern);
597 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
598 unescape_pattern (str);
599 p = hash_insert (seg->v.table, str);
600 if (p != str)
601 free (str);
605 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
606 OPTIONS. LINE_END terminates each pattern in the file. If
607 LINE_END is a space character, ignore trailing spaces and empty
608 lines in FP. Return -1 on failure, 0 on success. */
611 add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *),
612 struct exclude *ex, FILE *fp, int options,
613 char line_end,
614 void *data)
616 char *buf = NULL;
617 char *p;
618 char *pattern;
619 char const *lim;
620 size_t buf_alloc = 0;
621 size_t buf_count = 0;
622 int c;
623 int e = 0;
625 while ((c = getc (fp)) != EOF)
627 if (buf_count == buf_alloc)
628 buf = x2realloc (buf, &buf_alloc);
629 buf[buf_count++] = c;
632 if (ferror (fp))
633 e = errno;
635 buf = xrealloc (buf, buf_count + 1);
636 buf[buf_count] = line_end;
637 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
639 exclude_add_pattern_buffer (ex, buf);
641 pattern = buf;
643 for (p = buf; p < lim; p++)
644 if (*p == line_end)
646 char *pattern_end = p;
648 if (isspace ((unsigned char) line_end))
650 for (; ; pattern_end--)
651 if (pattern_end == pattern)
652 goto next_pattern;
653 else if (! isspace ((unsigned char) pattern_end[-1]))
654 break;
657 *pattern_end = '\0';
658 (*add_func) (ex, pattern, options, data);
660 next_pattern:
661 pattern = p + 1;
664 errno = e;
665 return e ? -1 : 0;
668 static void
669 call_addfn (struct exclude *ex, char const *pattern, int options, void *data)
671 void (**addfnptr) (struct exclude *, char const *, int) = data;
672 (*addfnptr) (ex, pattern, options);
676 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
677 struct exclude *ex, char const *file_name, int options,
678 char line_end)
680 bool use_stdin = file_name[0] == '-' && !file_name[1];
681 FILE *in;
682 int rc = 0;
684 if (use_stdin)
685 in = stdin;
686 else if (! (in = fopen (file_name, "re")))
687 return -1;
689 rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func);
691 if (!use_stdin && fclose (in) != 0)
692 rc = -1;
694 return rc;