<machine/stdint.h>: Check __cplusplus first.
[dragonfly.git] / contrib / diffutils / lib / exclude.c
bloba7dd9b36bf694adc3a598f6411882d35165ecbf5
1 /* exclude.c -- exclude file names
3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2013 Free Software
4 Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com>
20 and Sergey Poznyakoff <gray@gnu.org>.
21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22 for improvement suggestions. */
24 #include <config.h>
26 #include <stdbool.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wctype.h>
36 #include "exclude.h"
37 #include "hash.h"
38 #include "mbuiter.h"
39 #include "fnmatch.h"
40 #include "xalloc.h"
41 #include "verify.h"
43 #if USE_UNLOCKED_IO
44 # include "unlocked-io.h"
45 #endif
47 /* Non-GNU systems lack these options, so we don't need to check them. */
48 #ifndef FNM_CASEFOLD
49 # define FNM_CASEFOLD 0
50 #endif
51 #ifndef FNM_EXTMATCH
52 # define FNM_EXTMATCH 0
53 #endif
54 #ifndef FNM_LEADING_DIR
55 # define FNM_LEADING_DIR 0
56 #endif
58 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
59 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
60 | FNM_CASEFOLD | FNM_EXTMATCH))
61 == 0);
64 /* Exclusion patterns are grouped into a singly-linked list of
65 "exclusion segments". Each segment represents a set of patterns
66 that can be matches using the same algorithm. Non-wildcard
67 patterns are kept in hash tables, to speed up searches. Wildcard
68 patterns are stored as arrays of patterns. */
71 /* An exclude pattern-options pair. The options are fnmatch options
72 ORed with EXCLUDE_* options. */
74 struct patopts
76 char const *pattern;
77 int options;
80 /* An array of pattern-options pairs. */
82 struct exclude_pattern
84 struct patopts *exclude;
85 size_t exclude_alloc;
86 size_t exclude_count;
89 enum exclude_type
91 exclude_hash, /* a hash table of excluded names */
92 exclude_pattern /* an array of exclude patterns */
95 struct exclude_segment
97 struct exclude_segment *next; /* next segment in list */
98 enum exclude_type type; /* type of this segment */
99 int options; /* common options for this segment */
100 union
102 Hash_table *table; /* for type == exclude_hash */
103 struct exclude_pattern pat; /* for type == exclude_pattern */
104 } v;
107 /* The exclude structure keeps a singly-linked list of exclude segments,
108 maintained in reverse order. */
109 struct exclude
111 struct exclude_segment *head;
114 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
115 Return false if STR definitely does not have wildcards. */
116 bool
117 fnmatch_pattern_has_wildcards (const char *str, int options)
119 while (1)
121 switch (*str++)
123 case '\\':
124 str += ! (options & FNM_NOESCAPE) && *str;
125 break;
127 case '+': case '@': case '!':
128 if (options & FNM_EXTMATCH && *str == '(')
129 return true;
130 break;
132 case '?': case '*': case '[':
133 return true;
135 case '\0':
136 return false;
141 static void
142 unescape_pattern (char *str)
144 char const *q = str;
146 q += *q == '\\' && q[1];
147 while ((*str++ = *q++));
150 /* Return a newly allocated and empty exclude list. */
152 struct exclude *
153 new_exclude (void)
155 return xzalloc (sizeof *new_exclude ());
158 /* Calculate the hash of string. */
159 static size_t
160 string_hasher (void const *data, size_t n_buckets)
162 char const *p = data;
163 return hash_string (p, n_buckets);
166 /* Ditto, for case-insensitive hashes */
167 static size_t
168 string_hasher_ci (void const *data, size_t n_buckets)
170 char const *p = data;
171 mbui_iterator_t iter;
172 size_t value = 0;
174 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
176 mbchar_t m = mbui_cur (iter);
177 wchar_t wc;
179 if (m.wc_valid)
180 wc = towlower (m.wc);
181 else
182 wc = *m.ptr;
184 value = (value * 31 + wc) % n_buckets;
187 return value;
190 /* compare two strings for equality */
191 static bool
192 string_compare (void const *data1, void const *data2)
194 char const *p1 = data1;
195 char const *p2 = data2;
196 return strcmp (p1, p2) == 0;
199 /* compare two strings for equality, case-insensitive */
200 static bool
201 string_compare_ci (void const *data1, void const *data2)
203 char const *p1 = data1;
204 char const *p2 = data2;
205 return mbscasecmp (p1, p2) == 0;
208 static void
209 string_free (void *data)
211 free (data);
214 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
215 to the head of EX. */
216 static void
217 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
219 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
220 sp->type = type;
221 sp->options = options;
222 switch (type)
224 case exclude_pattern:
225 break;
227 case exclude_hash:
228 sp->v.table = hash_initialize (0, NULL,
229 (options & FNM_CASEFOLD) ?
230 string_hasher_ci
231 : string_hasher,
232 (options & FNM_CASEFOLD) ?
233 string_compare_ci
234 : string_compare,
235 string_free);
236 break;
238 sp->next = ex->head;
239 ex->head = sp;
242 /* Free a single exclude segment */
243 static void
244 free_exclude_segment (struct exclude_segment *seg)
246 switch (seg->type)
248 case exclude_pattern:
249 free (seg->v.pat.exclude);
250 break;
252 case exclude_hash:
253 hash_free (seg->v.table);
254 break;
256 free (seg);
259 /* Free the storage associated with an exclude list. */
260 void
261 free_exclude (struct exclude *ex)
263 struct exclude_segment *seg;
264 for (seg = ex->head; seg; )
266 struct exclude_segment *next = seg->next;
267 free_exclude_segment (seg);
268 seg = next;
270 free (ex);
273 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
274 (unlike fnmatch) wildcards are disabled in PATTERN. */
276 static int
277 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
279 if (! (options & FNM_LEADING_DIR))
280 return ((options & FNM_CASEFOLD)
281 ? mbscasecmp (pattern, f)
282 : strcmp (pattern, f));
283 else if (! (options & FNM_CASEFOLD))
285 size_t patlen = strlen (pattern);
286 int r = strncmp (pattern, f, patlen);
287 if (! r)
289 r = f[patlen];
290 if (r == '/')
291 r = 0;
293 return r;
295 else
297 /* Walk through a copy of F, seeing whether P matches any prefix
298 of F.
300 FIXME: This is an O(N**2) algorithm; it should be O(N).
301 Also, the copy should not be necessary. However, fixing this
302 will probably involve a change to the mbs* API. */
304 char *fcopy = xstrdup (f);
305 char *p;
306 int r;
307 for (p = fcopy; ; *p++ = '/')
309 p = strchr (p, '/');
310 if (p)
311 *p = '\0';
312 r = mbscasecmp (pattern, fcopy);
313 if (!p || r <= 0)
314 break;
316 free (fcopy);
317 return r;
321 bool
322 exclude_fnmatch (char const *pattern, char const *f, int options)
324 int (*matcher) (char const *, char const *, int) =
325 (options & EXCLUDE_WILDCARDS
326 ? fnmatch
327 : fnmatch_no_wildcards);
328 bool matched = ((*matcher) (pattern, f, options) == 0);
329 char const *p;
331 if (! (options & EXCLUDE_ANCHORED))
332 for (p = f; *p && ! matched; p++)
333 if (*p == '/' && p[1] != '/')
334 matched = ((*matcher) (pattern, p + 1, options) == 0);
336 return matched;
339 /* Return true if the exclude_pattern segment SEG matches F. */
341 static bool
342 file_pattern_matches (struct exclude_segment const *seg, char const *f)
344 size_t exclude_count = seg->v.pat.exclude_count;
345 struct patopts const *exclude = seg->v.pat.exclude;
346 size_t i;
348 for (i = 0; i < exclude_count; i++)
350 char const *pattern = exclude[i].pattern;
351 int options = exclude[i].options;
352 if (exclude_fnmatch (pattern, f, options))
353 return true;
355 return false;
358 /* Return true if the exclude_hash segment SEG matches F.
359 BUFFER is an auxiliary storage of the same length as F (with nul
360 terminator included) */
361 static bool
362 file_name_matches (struct exclude_segment const *seg, char const *f,
363 char *buffer)
365 int options = seg->options;
366 Hash_table *table = seg->v.table;
370 /* initialize the pattern */
371 strcpy (buffer, f);
373 while (1)
375 if (hash_lookup (table, buffer))
376 return true;
377 if (options & FNM_LEADING_DIR)
379 char *p = strrchr (buffer, '/');
380 if (p)
382 *p = 0;
383 continue;
386 break;
389 if (!(options & EXCLUDE_ANCHORED))
391 f = strchr (f, '/');
392 if (f)
393 f++;
395 else
396 break;
398 while (f);
400 return false;
403 /* Return true if EX excludes F. */
405 bool
406 excluded_file_name (struct exclude const *ex, char const *f)
408 struct exclude_segment *seg;
409 bool invert = false;
410 char *filename = NULL;
412 /* If no patterns are given, the default is to include. */
413 if (!ex->head)
414 return false;
416 /* Scan through the segments, reporting the status of the first match.
417 The segments are in reverse order, so this reports the status of
418 the last match in the original option list. */
419 for (seg = ex->head; ; seg = seg->next)
421 if (seg->type == exclude_hash)
423 if (!filename)
424 filename = xmalloc (strlen (f) + 1);
425 if (file_name_matches (seg, f, filename))
426 break;
428 else
430 if (file_pattern_matches (seg, f))
431 break;
434 if (! seg->next)
436 /* If patterns are given but none match, the default is the
437 opposite of the last segment (i.e., the first in the
438 original option list). For example, in the command
439 'grep -r --exclude="a*" --include="*b" pat dir', the
440 first option is --exclude so any file name matching
441 neither a* nor *b is included. */
442 invert = true;
443 break;
447 free (filename);
448 return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
451 /* Append to EX the exclusion PATTERN with OPTIONS. */
453 void
454 add_exclude (struct exclude *ex, char const *pattern, int options)
456 struct exclude_segment *seg;
458 if ((options & EXCLUDE_WILDCARDS)
459 && fnmatch_pattern_has_wildcards (pattern, options))
461 struct exclude_pattern *pat;
462 struct patopts *patopts;
464 if (! (ex->head && ex->head->type == exclude_pattern
465 && ((ex->head->options & EXCLUDE_INCLUDE)
466 == (options & EXCLUDE_INCLUDE))))
467 new_exclude_segment (ex, exclude_pattern, options);
468 seg = ex->head;
470 pat = &seg->v.pat;
471 if (pat->exclude_count == pat->exclude_alloc)
472 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
473 sizeof *pat->exclude);
474 patopts = &pat->exclude[pat->exclude_count++];
475 patopts->pattern = pattern;
476 patopts->options = options;
478 else
480 char *str, *p;
481 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
482 | FNM_LEADING_DIR | FNM_CASEFOLD);
483 if (! (ex->head && ex->head->type == exclude_hash
484 && ((ex->head->options & exclude_hash_flags)
485 == (options & exclude_hash_flags))))
486 new_exclude_segment (ex, exclude_hash, options);
487 seg = ex->head;
489 str = xstrdup (pattern);
490 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
491 unescape_pattern (str);
492 p = hash_insert (seg->v.table, str);
493 if (p != str)
494 free (str);
498 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
499 OPTIONS. LINE_END terminates each pattern in the file. If
500 LINE_END is a space character, ignore trailing spaces and empty
501 lines in FILE. Return -1 on failure, 0 on success. */
504 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
505 struct exclude *ex, char const *file_name, int options,
506 char line_end)
508 bool use_stdin = file_name[0] == '-' && !file_name[1];
509 FILE *in;
510 char *buf = NULL;
511 char *p;
512 char const *pattern;
513 char const *lim;
514 size_t buf_alloc = 0;
515 size_t buf_count = 0;
516 int c;
517 int e = 0;
519 if (use_stdin)
520 in = stdin;
521 else if (! (in = fopen (file_name, "r")))
522 return -1;
524 while ((c = getc (in)) != EOF)
526 if (buf_count == buf_alloc)
527 buf = x2realloc (buf, &buf_alloc);
528 buf[buf_count++] = c;
531 if (ferror (in))
532 e = errno;
534 if (!use_stdin && fclose (in) != 0)
535 e = errno;
537 buf = xrealloc (buf, buf_count + 1);
538 buf[buf_count] = line_end;
539 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
540 pattern = buf;
542 for (p = buf; p < lim; p++)
543 if (*p == line_end)
545 char *pattern_end = p;
547 if (isspace ((unsigned char) line_end))
549 for (; ; pattern_end--)
550 if (pattern_end == pattern)
551 goto next_pattern;
552 else if (! isspace ((unsigned char) pattern_end[-1]))
553 break;
556 *pattern_end = '\0';
557 (*add_func) (ex, pattern, options);
559 next_pattern:
560 pattern = p + 1;
563 errno = e;
564 return e ? -1 : 0;