Bugfixes for Savannah bugs #19768 and #19766
[findutils.git] / locate / locate.c
blob37ab13e8d6dee3666d1fac6f15bd771053a32e9e
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <signal.h>
66 #include <ctype.h>
67 #include <sys/types.h>
68 #include <grp.h> /* for setgroups() */
69 #include <sys/stat.h>
70 #include <time.h>
71 #include <fnmatch.h>
72 #include <getopt.h>
73 #include <xstrtol.h>
75 /* The presence of unistd.h is assumed by gnulib these days, so we
76 * might as well assume it too.
78 /* We need <unistd.h> for isatty(). */
79 #include <unistd.h>
81 #if HAVE_FCNTL_H
82 /* We use fcntl() */
83 #include <fcntl.h>
84 #endif
86 #define NDEBUG
87 #include <assert.h>
88 #include <string.h>
91 #ifdef STDC_HEADERS
92 #include <stdlib.h>
93 #endif
95 #ifdef HAVE_ERRNO_H
96 #include <errno.h>
97 #else
98 extern int errno;
99 #endif
101 #ifdef HAVE_LOCALE_H
102 #include <locale.h>
103 #endif
105 #if ENABLE_NLS
106 # include <libintl.h>
107 # define _(Text) gettext (Text)
108 #else
109 # define _(Text) Text
110 #define textdomain(Domain)
111 #define bindtextdomain(Package, Directory)
112 #endif
113 #ifdef gettext_noop
114 # define N_(String) gettext_noop (String)
115 #else
116 /* We used to use (String) instead of just String, but apparentl;y ISO C
117 * doesn't allow this (at least, that's what HP said when someone reported
118 * this as a compiler bug). This is HP case number 1205608192. See
119 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
120 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
121 * like: static const char buf[] = ("string");
123 # define N_(String) String
124 #endif
126 #include "locatedb.h"
127 #include <getline.h>
128 #include "../gnulib/lib/xalloc.h"
129 #include "../gnulib/lib/error.h"
130 #include "../gnulib/lib/human.h"
131 #include "dirname.h"
132 #include "closeout.h"
133 #include "nextelem.h"
134 #include "regex.h"
135 #include "quote.h"
136 #include "quotearg.h"
137 #include "printquoted.h"
138 #include "regextype.h"
139 #include "gnulib-version.h"
141 /* Note that this evaluates Ch many times. */
142 #ifdef _LIBC
143 # define TOUPPER(Ch) toupper (Ch)
144 # define TOLOWER(Ch) tolower (Ch)
145 #else
146 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
147 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
148 #endif
150 /* typedef enum {false, true} boolean; */
152 /* Warn if a database is older than this. 8 days allows for a weekly
153 update that takes up to a day to perform. */
154 static unsigned int warn_number_units = 8;
156 /* Printable name of units used in WARN_SECONDS */
157 static const char warn_name_units[] = N_("days");
158 #define SECONDS_PER_UNIT (60 * 60 * 24)
160 enum visit_result
162 VISIT_CONTINUE = 1, /* please call the next visitor */
163 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
164 VISIT_REJECTED = 4, /* rejected, process next file. */
165 VISIT_ABORT = 8 /* rejected, process no more files. */
168 enum ExistenceCheckType
170 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
171 ACCEPT_EXISTING, /* Corresponds to option -e */
172 ACCEPT_NON_EXISTING /* Corresponds to option -E */
175 /* Check for existence of files before printing them out? */
176 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
178 static int follow_symlinks = 1;
180 /* What to separate the results with. */
181 static int separator = '\n';
183 static struct quoting_options * quote_opts = NULL;
184 static bool stdout_is_a_tty;
185 static bool print_quoted_filename;
186 static bool results_were_filtered;
188 static const char *selected_secure_db = NULL;
191 /* Change the number of days old the database can be
192 * before we complain about it.
194 static void
195 set_max_db_age(const char *s)
197 char *end;
198 unsigned long int val;
199 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
201 if (0 == *s)
203 error(1, 0,
204 _("The argument argument for option --max-database-age must not be empty"));
208 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
209 * we would not be able to tell if that is the correct answer, or whether it
210 * signifies an error.
212 errno = 0;
213 val = strtoul(s, &end, 10);
215 /* Diagnose number too large, non-numbes and trailing junk. */
216 if ((ULONG_MAX == val && ERANGE == errno) ||
217 (0 == val && EINVAL == errno))
219 error(1, errno,
220 _("Invalid argument %s for option --max-database-age"),
221 quotearg_n_style(0, locale_quoting_style, s));
223 else if (*end)
225 /* errno wasn't set, don't print its message */
226 error(1, 0,
227 _("Invalid argument %s for option --max-database-age"),
228 quotearg_n_style(0, locale_quoting_style, s));
230 else
232 warn_number_units = val;
238 /* Read in a 16-bit int, high byte first (network byte order). */
240 static short
241 get_short (FILE *fp)
244 register short x;
246 x = (signed char) fgetc (fp) << 8;
247 x |= (fgetc (fp) & 0xff);
248 return x;
251 const char * const metacharacters = "*?[]\\";
253 /* Return nonzero if S contains any shell glob characters.
255 static int
256 contains_metacharacter(const char *s)
258 if (NULL == strpbrk(s, metacharacters))
259 return 0;
260 else
261 return 1;
264 /* locate_read_str()
266 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
267 * until we reach DELIMITER or end-of-file. We reallocate the buffer
268 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
269 * is made regarding the content of the data (i.e. the implementation is
270 * 8-bit clean, the only delimiter is DELIMITER).
272 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
273 * has been removed from gnulib.
275 * We call the function locate_read_str() to avoid a name clash with the curses
276 * function getstr().
278 static int
279 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
281 char * p = NULL;
282 size_t sz = 0;
283 int nread;
284 size_t needed;
286 nread = getdelim(&p, &sz, delimiter, fp);
287 if (nread >= 0)
289 assert(p != NULL);
291 needed = offs + nread + 1u;
292 if (needed > (*siz))
294 char *pnew = realloc(*buf, needed);
295 if (NULL == pnew)
297 return -1; /* FAIL */
299 else
301 *siz = needed;
302 *buf = pnew;
305 memcpy((*buf)+offs, p, nread);
306 free(p);
308 return nread;
312 struct locate_limits
314 uintmax_t limit;
315 uintmax_t items_accepted;
317 static struct locate_limits limits;
320 struct locate_stats
322 uintmax_t compressed_bytes;
323 uintmax_t total_filename_count;
324 uintmax_t total_filename_length;
325 uintmax_t whitespace_count;
326 uintmax_t newline_count;
327 uintmax_t highbit_filename_count;
329 static struct locate_stats statistics;
332 struct regular_expression
334 struct re_pattern_buffer regex; /* for --regex */
338 struct process_data
340 int c; /* An input byte. */
341 char itemcount; /* Indicates we're at the beginning of an slocate db. */
342 int count; /* The length of the prefix shared with the previous database entry. */
343 int len;
344 char *original_filename; /* The current input database entry. */
345 size_t pathsize; /* Amount allocated for it. */
346 char *munged_filename; /* path or base_name(path) */
347 FILE *fp; /* The pathname database. */
348 const char *dbfile; /* Its name, or "<stdin>" */
349 int slocatedb_format; /* Allows us to cope with slocate's format variant */
350 /* for the old database format,
351 the first and second characters of the most common bigrams. */
352 char bigram1[128];
353 char bigram2[128];
357 typedef int (*visitfunc)(struct process_data *procdata,
358 void *context);
360 struct visitor
362 visitfunc inspector;
363 void * context;
364 struct visitor *next;
368 static struct visitor *inspectors = NULL;
369 static struct visitor *lastinspector = NULL;
370 static struct visitor *past_pat_inspector = NULL;
372 /* 0 or 1 pattern(s) */
373 static int
374 process_simple(struct process_data *procdata)
376 int result = VISIT_CONTINUE;
377 const struct visitor *p = inspectors;
379 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
381 result = (p->inspector)(procdata, p->context);
382 p = p->next;
385 return result;
388 /* Accept if any pattern matches. */
389 static int
390 process_or (struct process_data *procdata)
392 int result = VISIT_CONTINUE;
393 const struct visitor *p = inspectors;
395 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
397 result = (p->inspector)(procdata, p->context);
398 p = p->next;
401 if (result == VISIT_CONTINUE)
402 result = VISIT_REJECTED;
403 if (result & (VISIT_ABORT | VISIT_REJECTED))
404 return result;
406 p = past_pat_inspector;
407 result = VISIT_CONTINUE;
409 while ( (VISIT_CONTINUE == result) && (NULL != p) )
411 result = (p->inspector)(procdata, p->context);
412 p = p->next;
415 if (VISIT_CONTINUE == result)
416 return VISIT_ACCEPTED;
417 else
418 return result;
421 /* Accept if all pattern match. */
422 static int
423 process_and (struct process_data *procdata)
425 int result = VISIT_CONTINUE;
426 const struct visitor *p = inspectors;
428 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
430 result = (p->inspector)(procdata, p->context);
431 p = p->next;
434 if (result == VISIT_CONTINUE)
435 result = VISIT_REJECTED;
436 if (result & (VISIT_ABORT | VISIT_REJECTED))
437 return result;
439 p = past_pat_inspector;
440 result = VISIT_CONTINUE;
442 while ( (VISIT_CONTINUE == result) && (NULL != p) )
444 result = (p->inspector)(procdata, p->context);
445 p = p->next;
448 if (VISIT_CONTINUE == result)
449 return VISIT_ACCEPTED;
450 else
451 return result;
454 typedef int (*processfunc)(struct process_data *procdata);
456 static processfunc mainprocessor = NULL;
458 static void
459 add_visitor(visitfunc fn, void *context)
461 struct visitor *p = xmalloc(sizeof(struct visitor));
462 p->inspector = fn;
463 p->context = context;
464 p->next = NULL;
466 if (NULL == lastinspector)
468 lastinspector = inspectors = p;
470 else
472 lastinspector->next = p;
473 lastinspector = p;
479 static int
480 visit_justprint_quoted(struct process_data *procdata, void *context)
482 (void) context;
483 print_quoted (stdout, quote_opts, stdout_is_a_tty,
484 "%s",
485 procdata->original_filename);
486 putchar(separator);
487 return VISIT_CONTINUE;
490 static int
491 visit_justprint_unquoted(struct process_data *procdata, void *context)
493 (void) context;
494 fputs(procdata->original_filename, stdout);
495 putchar(separator);
496 return VISIT_CONTINUE;
499 static int
500 visit_old_format(struct process_data *procdata, void *context)
502 register char *s;
503 (void) context;
505 /* Get the offset in the path where this path info starts. */
506 if (procdata->c == LOCATEDB_OLD_ESCAPE)
507 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
508 else
509 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
511 /* Overlay the old path with the remainder of the new. */
512 for (s = procdata->original_filename + procdata->count;
513 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
514 if (procdata->c < 0200)
515 *s++ = procdata->c; /* An ordinary character. */
516 else
518 /* Bigram markers have the high bit set. */
519 procdata->c &= 0177;
520 *s++ = procdata->bigram1[procdata->c];
521 *s++ = procdata->bigram2[procdata->c];
523 *s-- = '\0';
525 procdata->munged_filename = procdata->original_filename;
527 return VISIT_CONTINUE;
531 static int
532 visit_locate02_format(struct process_data *procdata, void *context)
534 register char *s;
535 int nread;
536 (void) context;
538 if (procdata->slocatedb_format)
540 if (procdata->itemcount == 0)
542 ungetc(procdata->c, procdata->fp);
543 procdata->count = 0;
544 procdata->len = 0;
546 else if (procdata->itemcount == 1)
548 procdata->count = procdata->len-1;
550 else
552 if (procdata->c == LOCATEDB_ESCAPE)
553 procdata->count += (short)get_short (procdata->fp);
554 else if (procdata->c > 127)
555 procdata->count += procdata->c - 256;
556 else
557 procdata->count += procdata->c;
560 else
562 if (procdata->c == LOCATEDB_ESCAPE)
563 procdata->count += (short)get_short (procdata->fp);
564 else if (procdata->c > 127)
565 procdata->count += procdata->c - 256;
566 else
567 procdata->count += procdata->c;
570 if (procdata->count > procdata->len || procdata->count < 0)
572 /* This should not happen generally , but since we're
573 * reading in data which is outside our control, we
574 * cannot prevent it.
576 error(1, 0, _("locate database %s is corrupt or invalid"),
577 quotearg_n_style(0, locale_quoting_style, procdata->dbfile));
580 /* Overlay the old path with the remainder of the new. */
581 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
582 procdata->fp, 0, procdata->count);
583 if (nread < 0)
584 return VISIT_ABORT;
585 procdata->c = getc (procdata->fp);
586 procdata->len = procdata->count + nread;
587 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
588 assert (s[0] != '\0');
589 assert (s[1] == '\0'); /* Our terminator. */
590 assert (s[2] == '\0'); /* Added by locate_read_str. */
592 procdata->munged_filename = procdata->original_filename;
594 if (procdata->slocatedb_format)
596 /* Don't increment indefinitely, it might overflow. */
597 if (procdata->itemcount < 6)
599 ++(procdata->itemcount);
604 return VISIT_CONTINUE;
607 static int
608 visit_basename(struct process_data *procdata, void *context)
610 (void) context;
611 procdata->munged_filename = base_name(procdata->original_filename);
613 return VISIT_CONTINUE;
617 /* visit_existing_follow implements -L -e */
618 static int
619 visit_existing_follow(struct process_data *procdata, void *context)
621 struct stat st;
622 (void) context;
624 /* munged_filename has been converted in some way (to lower case,
625 * or is just the base name of the file), and original_filename has not.
626 * Hence only original_filename is still actually the name of the file
627 * whose existence we would need to check.
629 if (stat(procdata->original_filename, &st) != 0)
631 return VISIT_REJECTED;
633 else
635 return VISIT_CONTINUE;
639 /* visit_non_existing_follow implements -L -E */
640 static int
641 visit_non_existing_follow(struct process_data *procdata, void *context)
643 struct stat st;
644 (void) context;
646 /* munged_filename has been converted in some way (to lower case,
647 * or is just the base name of the file), and original_filename has not.
648 * Hence only original_filename is still actually the name of the file
649 * whose existence we would need to check.
651 if (stat(procdata->original_filename, &st) == 0)
653 return VISIT_REJECTED;
655 else
657 return VISIT_CONTINUE;
661 /* visit_existing_nofollow implements -P -e */
662 static int
663 visit_existing_nofollow(struct process_data *procdata, void *context)
665 struct stat st;
666 (void) context;
668 /* munged_filename has been converted in some way (to lower case,
669 * or is just the base name of the file), and original_filename has not.
670 * Hence only original_filename is still actually the name of the file
671 * whose existence we would need to check.
673 if (lstat(procdata->original_filename, &st) != 0)
675 return VISIT_REJECTED;
677 else
679 return VISIT_CONTINUE;
683 /* visit_non_existing_nofollow implements -P -E */
684 static int
685 visit_non_existing_nofollow(struct process_data *procdata, void *context)
687 struct stat st;
688 (void) context;
690 /* munged_filename has been converted in some way (to lower case,
691 * or is just the base name of the file), and original_filename has not.
692 * Hence only original_filename is still actually the name of the file
693 * whose existence we would need to check.
695 if (lstat(procdata->original_filename, &st) == 0)
697 return VISIT_REJECTED;
699 else
701 return VISIT_CONTINUE;
705 static int
706 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
708 const char *pattern = context;
710 if (NULL != mbsstr(procdata->munged_filename, pattern))
711 return VISIT_ACCEPTED;
712 else
713 return VISIT_REJECTED;
716 static int
717 visit_substring_match_casefold(struct process_data *procdata, void *context)
719 const char *pattern = context;
721 if (NULL != mbscasestr(procdata->munged_filename, pattern))
722 return VISIT_ACCEPTED;
723 else
724 return VISIT_REJECTED;
728 static int
729 visit_globmatch_nofold(struct process_data *procdata, void *context)
731 const char *glob = context;
732 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
733 return VISIT_REJECTED;
734 else
735 return VISIT_ACCEPTED;
739 static int
740 visit_globmatch_casefold(struct process_data *procdata, void *context)
742 const char *glob = context;
743 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
744 return VISIT_REJECTED;
745 else
746 return VISIT_ACCEPTED;
750 static int
751 visit_regex(struct process_data *procdata, void *context)
753 struct regular_expression *p = context;
754 const size_t len = strlen(procdata->munged_filename);
756 int rv = re_search (&p->regex, procdata->munged_filename,
757 len, 0, len,
758 (struct re_registers *) NULL);
759 if (rv < 0)
761 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
763 else
765 return VISIT_ACCEPTED; /* match */
770 static int
771 visit_stats(struct process_data *procdata, void *context)
773 struct locate_stats *p = context;
774 size_t len = strlen(procdata->original_filename);
775 const char *s;
776 int highbit, whitespace, newline;
778 ++(p->total_filename_count);
779 p->total_filename_length += len;
781 highbit = whitespace = newline = 0;
782 for (s=procdata->original_filename; *s; ++s)
784 if ( (int)(*s) & 128 )
785 highbit = 1;
786 if ('\n' == *s)
788 newline = whitespace = 1;
790 else if (isspace((unsigned char)*s))
792 whitespace = 1;
796 if (highbit)
797 ++(p->highbit_filename_count);
798 if (whitespace)
799 ++(p->whitespace_count);
800 if (newline)
801 ++(p->newline_count);
803 return VISIT_CONTINUE;
807 static int
808 visit_limit(struct process_data *procdata, void *context)
810 struct locate_limits *p = context;
812 (void) procdata;
814 if (++p->items_accepted >= p->limit)
815 return VISIT_ABORT;
816 else
817 return VISIT_CONTINUE;
820 static int
821 visit_count(struct process_data *procdata, void *context)
823 struct locate_limits *p = context;
825 (void) procdata;
827 ++p->items_accepted;
828 return VISIT_CONTINUE;
831 /* Emit the statistics.
833 static void
834 print_stats(int argc, size_t database_file_size)
836 char hbuf[LONGEST_HUMAN_READABLE + 1];
838 printf(_("Locate database size: %s bytes\n"),
839 human_readable ((uintmax_t) database_file_size,
840 hbuf, human_ceiling, 1, 1));
842 printf( (results_were_filtered ?
843 _("Matching Filenames: %s ") :
844 _("All Filenames: %s ")),
845 human_readable (statistics.total_filename_count,
846 hbuf, human_ceiling, 1, 1));
847 printf(_("with a cumulative length of %s bytes"),
848 human_readable (statistics.total_filename_length,
849 hbuf, human_ceiling, 1, 1));
851 printf(_("\n\tof which %s contain whitespace, "),
852 human_readable (statistics.whitespace_count,
853 hbuf, human_ceiling, 1, 1));
854 printf(_("\n\t%s contain newline characters, "),
855 human_readable (statistics.newline_count,
856 hbuf, human_ceiling, 1, 1));
857 printf(_("\n\tand %s contain characters with the high bit set.\n"),
858 human_readable (statistics.highbit_filename_count,
859 hbuf, human_ceiling, 1, 1));
861 if (!argc)
863 if (results_were_filtered)
865 printf(_("Some filenames may have been filtered out, "
866 "so we cannot compute the compression ratio.\n"));
868 else
870 if (statistics.total_filename_length)
872 printf(_("Compression ratio %4.2f%%\n"),
873 100.0 * ((double)statistics.total_filename_length
874 - (double) database_file_size)
875 / (double) statistics.total_filename_length);
877 else
879 printf(_("Compression ratio is undefined\n"));
883 printf("\n");
887 * Return nonzero if the data we read in indicates that we are
888 * looking at a LOCATE02 locate database.
890 static int
891 looking_at_gnu_locatedb (const char *data, size_t len)
893 if (len < sizeof (LOCATEDB_MAGIC))
894 return 0;
895 else if (0 == memcmp (data, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
896 return 1; /* We saw the magic byte sequence */
897 else
898 return 0;
902 * Return nonzero if the data we read in indicates that we are
903 * looking at an slocate database.
905 static int
906 looking_at_slocate_locatedb (const char *filename,
907 const char *data,
908 size_t len,
909 int *seclevel)
911 assert(len <= 2);
913 if (len < 2)
915 return 0;
917 else
919 /* Check that the magic number is a one-byte string */
920 if (0 == data[1])
922 if (isdigit((unsigned char)data[0]))
924 /* looks promising. */
925 *seclevel = (data[0] - '0');
927 if (*seclevel > 1)
929 /* Hmm, well it's probably an slocate database
930 * of some awsomely huge security level, like 2.
931 * We don't know how to handle those.
933 error(0, 0,
934 _("locate database %s looks like an slocate "
935 "database but it seems to have security level %c, "
936 "which GNU findutils does not currently support"),
937 quotearg_n_style(0, locale_quoting_style, filename),
938 data[1]);
939 return 1;
941 else
943 return 1;
946 else
948 /* Not a digit. */
949 return 0;
952 else
954 /* Definitely not slocate. */
955 return 0;
960 /* Print or count the entries in DBFILE that match shell globbing patterns in
961 ARGV. Return the number of entries matched. */
963 static unsigned long
964 search_one_database (int argc,
965 char **argv,
966 const char *dbfile,
967 FILE *fp,
968 off_t filesize,
969 int ignore_case,
970 int enable_print,
971 int basename_only,
972 int use_limit,
973 struct locate_limits *plimit,
974 int stats,
975 int op_and,
976 int regex,
977 int regex_options)
979 char *pathpart; /* A pattern to consider. */
980 int argn; /* Index to current pattern in argv. */
981 int nread; /* number of bytes read from an entry. */
982 struct process_data procdata; /* Storage for data shared with visitors. */
983 int slocate_seclevel;
984 struct visitor* pvis; /* temp for determining past_pat_inspector. */
985 const char *format_name;
986 enum ExistenceCheckType do_check_existence;
989 /* We may turn on existence checking for a given database.
990 * We ensure that we can return to the previous behaviour
991 * by using two variables, do_check_existence (which we act on)
992 * and check_existence (whcih indicates the default before we
993 * adjust it on the bassis of what kind of database we;re using
995 do_check_existence = check_existence;
998 if (ignore_case)
999 regex_options |= RE_ICASE;
1001 procdata.len = procdata.count = 0;
1002 procdata.slocatedb_format = 0;
1003 procdata.itemcount = 0;
1005 procdata.dbfile = dbfile;
1006 procdata.fp = fp;
1008 /* Set up the inspection regime */
1009 inspectors = NULL;
1010 lastinspector = NULL;
1011 past_pat_inspector = NULL;
1012 results_were_filtered = false;
1014 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
1015 procdata.original_filename = xmalloc (procdata.pathsize);
1018 nread = fread (procdata.original_filename, 1, SLOCATE_DB_MAGIC_LEN,
1019 procdata.fp);
1020 slocate_seclevel = 0;
1021 if (looking_at_slocate_locatedb(procdata.dbfile,
1022 procdata.original_filename,
1023 nread,
1024 &slocate_seclevel))
1026 error(0, 0,
1027 _("%s is an slocate database. "
1028 "Support for these is new, expect problems for now."),
1029 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1031 /* slocate also uses frcode, but with a different header.
1032 * We handle the header here and then work with the data
1033 * in the normal way.
1035 if (slocate_seclevel > 1)
1037 /* We don't know what those security levels mean,
1038 * so do nothing further
1040 error(0, 0,
1041 _("%s is an slocate database of unsupported security level %d; skipping it."),
1042 quotearg_n_style(0, locale_quoting_style, procdata.dbfile),
1043 slocate_seclevel);
1044 return 0;
1046 else if (slocate_seclevel > 0)
1048 /* Don't show the filenames to the user if they don't exist.
1049 * Showing stats is safe since filenames are only counted
1050 * after the existence check
1052 if (ACCEPT_NON_EXISTING == check_existence)
1054 /* Do not allow the user to see a list of filenames that they
1055 * cannot stat().
1057 error(0, 0,
1058 _("You specified the -E option, but that option "
1059 "cannot be used with slocate-format databases "
1060 "with a non-zero security level. No results will be "
1061 "generated for this database.\n"));
1062 return 0;
1064 if (ACCEPT_EXISTING != do_check_existence)
1066 if (enable_print || stats)
1068 error(0, 0,
1069 _("%s is an slocate database. "
1070 "Turning on the '-e' option."),
1071 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1073 do_check_existence = ACCEPT_EXISTING;
1076 add_visitor(visit_locate02_format, NULL);
1077 format_name = "slocate";
1078 procdata.slocatedb_format = 1;
1080 else
1082 int nread2;
1084 procdata.slocatedb_format = 0;
1085 nread2 = fread (procdata.original_filename+nread, 1, sizeof (LOCATEDB_MAGIC)-nread,
1086 procdata.fp);
1087 if (looking_at_gnu_locatedb(procdata.original_filename, nread+nread2))
1089 add_visitor(visit_locate02_format, NULL);
1090 format_name = "GNU LOCATE02";
1092 else /* Use the old format */
1094 int i;
1096 nread += nread2;
1097 /* Read the list of the most common bigrams in the database. */
1098 if (nread < 256)
1100 int more_read = fread (procdata.original_filename + nread, 1,
1101 256 - nread, procdata.fp);
1102 if ( (more_read + nread) != 256 )
1104 error(1, 0,
1105 _("Old-format locate database %s is "
1106 "too short to be valid"),
1107 quotearg_n_style(0, locale_quoting_style, dbfile));
1112 for (i = 0; i < 128; i++)
1114 procdata.bigram1[i] = procdata.original_filename[i << 1];
1115 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
1117 format_name = "old";
1118 add_visitor(visit_old_format, NULL);
1122 if (basename_only)
1123 add_visitor(visit_basename, NULL);
1125 /* Add an inspector for each pattern we're looking for. */
1126 for ( argn = 0; argn < argc; argn++ )
1128 results_were_filtered = true;
1129 pathpart = argv[argn];
1130 if (regex)
1132 struct regular_expression *p = xmalloc(sizeof(*p));
1133 const char *error_message = NULL;
1135 memset (&p->regex, 0, sizeof (p->regex));
1137 re_set_syntax(regex_options);
1138 p->regex.allocated = 100;
1139 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
1140 p->regex.fastmap = NULL;
1141 p->regex.syntax = regex_options;
1142 p->regex.translate = NULL;
1144 error_message = re_compile_pattern (pathpart, strlen (pathpart),
1145 &p->regex);
1146 if (error_message)
1148 error (1, 0, "%s", error_message);
1150 else
1152 add_visitor(visit_regex, p);
1155 else if (contains_metacharacter(pathpart))
1157 if (ignore_case)
1158 add_visitor(visit_globmatch_casefold, pathpart);
1159 else
1160 add_visitor(visit_globmatch_nofold, pathpart);
1162 else
1164 /* No glob characters used. Hence we match on
1165 * _any part_ of the filename, not just the
1166 * basename. This seems odd to me, but it is the
1167 * traditional behaviour.
1168 * James Youngman <jay@gnu.org>
1170 if (ignore_case)
1171 add_visitor(visit_substring_match_casefold, pathpart);
1172 else
1173 add_visitor(visit_substring_match_nocasefold, pathpart);
1177 pvis = lastinspector;
1179 /* We add visit_existing_*() as late as possible to reduce the
1180 * number of stat() calls.
1182 switch (do_check_existence)
1184 case ACCEPT_EXISTING:
1185 results_were_filtered = true;
1186 if (follow_symlinks) /* -L, default */
1187 add_visitor(visit_existing_follow, NULL);
1188 else /* -P */
1189 add_visitor(visit_existing_nofollow, NULL);
1190 break;
1192 case ACCEPT_NON_EXISTING:
1193 results_were_filtered = true;
1194 if (follow_symlinks) /* -L, default */
1195 add_visitor(visit_non_existing_follow, NULL);
1196 else /* -P */
1197 add_visitor(visit_non_existing_nofollow, NULL);
1198 break;
1200 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1201 /* do nothing; no extra processing. */
1202 break;
1205 /* Security issue: The stats visitor must be added immediately
1206 * before the print visitor, because otherwise the -S option would
1207 * leak information about files that the caller cannot see.
1209 if (stats)
1210 add_visitor(visit_stats, &statistics);
1212 if (enable_print)
1214 if (print_quoted_filename)
1215 add_visitor(visit_justprint_quoted, NULL);
1216 else
1217 add_visitor(visit_justprint_unquoted, NULL);
1221 if (use_limit)
1222 add_visitor(visit_limit, plimit);
1223 else
1224 add_visitor(visit_count, plimit);
1227 if (argc > 1)
1229 past_pat_inspector = pvis->next;
1230 if (op_and)
1231 mainprocessor = process_and;
1232 else
1233 mainprocessor = process_or;
1235 else
1236 mainprocessor = process_simple;
1238 if (stats)
1240 printf(_("Database %s is in the %s format.\n"),
1241 procdata.dbfile,
1242 format_name);
1246 procdata.c = getc (procdata.fp);
1247 /* If we are searching for filename patterns, the inspector list
1248 * will contain an entry for each pattern for which we are searching.
1250 while ( (procdata.c != EOF) &&
1251 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1253 /* Do nothing; all the work is done in the visitor functions. */
1256 if (stats)
1258 if (filesize)
1259 print_stats(argc, filesize);
1262 if (ferror (procdata.fp))
1264 error (0, errno, "%s",
1265 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1266 return 0;
1268 return plimit->items_accepted;
1274 extern char *version_string;
1276 /* The name this program was run with. */
1277 char *program_name;
1279 static void
1280 usage (FILE *stream)
1282 fprintf (stream, _("\
1283 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1284 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1285 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1286 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1287 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1288 [--max-database-age D] [--version] [--help]\n\
1289 pattern...\n"),
1290 program_name);
1291 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1293 enum
1295 REGEXTYPE_OPTION = CHAR_MAX + 1,
1296 MAX_DB_AGE
1300 static struct option const longopts[] =
1302 {"database", required_argument, NULL, 'd'},
1303 {"existing", no_argument, NULL, 'e'},
1304 {"non-existing", no_argument, NULL, 'E'},
1305 {"ignore-case", no_argument, NULL, 'i'},
1306 {"all", no_argument, NULL, 'A'},
1307 {"help", no_argument, NULL, 'h'},
1308 {"version", no_argument, NULL, 'v'},
1309 {"null", no_argument, NULL, '0'},
1310 {"count", no_argument, NULL, 'c'},
1311 {"wholename", no_argument, NULL, 'w'},
1312 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1313 {"basename", no_argument, NULL, 'b'},
1314 {"print", no_argument, NULL, 'p'},
1315 {"stdio", no_argument, NULL, 's'},
1316 {"mmap", no_argument, NULL, 'm'},
1317 {"limit", required_argument, NULL, 'l'},
1318 {"regex", no_argument, NULL, 'r'},
1319 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1320 {"statistics", no_argument, NULL, 'S'},
1321 {"follow", no_argument, NULL, 'L'},
1322 {"nofollow", no_argument, NULL, 'P'},
1323 {"max-database-age", required_argument, NULL, MAX_DB_AGE},
1324 {NULL, no_argument, NULL, 0}
1328 static int
1329 drop_privs(void)
1331 const char * what = "failed";
1332 const uid_t orig_euid = geteuid();
1333 const uid_t uid = getuid();
1334 const gid_t gid = getgid();
1336 /* Use of setgroups() is restrcted to root only. */
1337 if (0 == orig_euid)
1339 /* UID != 0, but EUID == 0. We're running setuid-root. */
1340 gid_t groups[1];
1341 groups[1] = getgid();
1342 if (0 != setgroups(1, groups))
1344 what = _("failed to drop group privileges");
1345 goto fail;
1349 /* Drop any setuid privileges */
1350 if (uid != orig_euid)
1352 if (0 == uid)
1354 /* We're really root anyway, but are setuid to something else. Leave it. */
1356 else
1358 errno = 0;
1359 if (0 != setuid(getuid()))
1361 what = _("failed to drop setuid privileges");
1362 goto fail;
1365 /* Defend against the case where the attacker runs us with the
1366 * capability to call setuid() turned off, which on some systems
1367 * will cause the above attempt to drop privileges fail (leaving us
1368 * privileged).
1370 else
1372 /* Check that we can no longer switch bask to root */
1373 if (0 == setuid(0))
1375 what = _("Failed to fully drop privileges");
1376 /* The errno value here is not interesting (since
1377 * the system call we are complaining about
1378 * succeeded when we wanted it to fail). Arrange
1379 * for the call to error() not to print the errno
1380 * value by setting errno=0.
1382 errno = 0;
1383 goto fail;
1389 /* Drop any setgid privileges */
1390 errno = 0;
1391 if (0 != setgid(gid))
1393 what = _("failed to drop setgid privileges");
1394 goto fail;
1397 /* success. */
1398 return 0;
1400 fail:
1401 error(1, errno, "%s",
1402 quotearg_n_style(0, locale_quoting_style, what));
1403 abort();
1404 kill(0, SIGKILL);
1405 _exit(1);
1406 /*NOTREACHED*/
1407 /* ... we hope. */
1408 for (;;)
1410 /* deliberate infinite loop */
1414 static int
1415 opendb(const char *name)
1417 int fd = open(name, O_RDONLY
1418 #if defined(O_LARGEFILE)
1419 |O_LARGEFILE
1420 #endif
1422 if (fd >= 0)
1424 /* Make sure it won't survive an exec */
1425 if (0 != fcntl(fd, F_SETFD, FD_CLOEXEC))
1427 close(fd);
1428 fd = -1;
1431 return fd;
1435 dolocate (int argc, char **argv, int secure_db_fd)
1437 char *dbpath;
1438 unsigned long int found = 0uL;
1439 int optc;
1440 int ignore_case = 0;
1441 int print = 0;
1442 int just_count = 0;
1443 int basename_only = 0;
1444 int use_limit = 0;
1445 int regex = 0;
1446 int regex_options = RE_SYNTAX_EMACS;
1447 int stats = 0;
1448 int op_and = 0;
1449 const char *e;
1450 FILE *fp;
1451 int they_chose_db = 0;
1452 bool did_stdin = false; /* Set to prevent rereading stdin. */
1454 program_name = argv[0];
1456 #ifdef HAVE_SETLOCALE
1457 setlocale (LC_ALL, "");
1458 #endif
1459 bindtextdomain (PACKAGE, LOCALEDIR);
1460 textdomain (PACKAGE);
1461 atexit (close_stdout);
1463 limits.limit = 0;
1464 limits.items_accepted = 0;
1466 quote_opts = clone_quoting_options (NULL);
1467 print_quoted_filename = true;
1469 /* We cannot simultaneously trust $LOCATE_PATH and use the
1470 * setuid-access-controlled database,, since that could cause a leak
1471 * of private data.
1473 dbpath = getenv ("LOCATE_PATH");
1474 if (dbpath)
1476 they_chose_db = 1;
1479 check_existence = ACCEPT_EITHER;
1481 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1482 switch (optc)
1484 case '0':
1485 separator = 0;
1486 print_quoted_filename = false; /* print filename 'raw'. */
1487 break;
1489 case 'A':
1490 op_and = 1;
1491 break;
1493 case 'b':
1494 basename_only = 1;
1495 break;
1497 case 'c':
1498 just_count = 1;
1499 break;
1501 case 'd':
1502 dbpath = optarg;
1503 they_chose_db = 1;
1504 break;
1506 case 'e':
1507 check_existence = ACCEPT_EXISTING;
1508 break;
1510 case 'E':
1511 check_existence = ACCEPT_NON_EXISTING;
1512 break;
1514 case 'i':
1515 ignore_case = 1;
1516 break;
1518 case 'h':
1519 usage (stdout);
1520 return 0;
1522 case MAX_DB_AGE:
1523 /* XXX: nothing in the test suite for this option. */
1524 set_max_db_age(optarg);
1525 break;
1527 case 'p':
1528 print = 1;
1529 break;
1531 case 'v':
1532 printf (_("GNU locate version %s\n"), version_string);
1533 printf (_("Built using GNU gnulib version %s\n"), gnulib_version);
1534 return 0;
1536 case 'w':
1537 basename_only = 0;
1538 break;
1540 case 'r':
1541 regex = 1;
1542 break;
1544 case REGEXTYPE_OPTION:
1545 regex_options = get_regex_type(optarg);
1546 break;
1548 case 'S':
1549 stats = 1;
1550 break;
1552 case 'L':
1553 follow_symlinks = 1;
1554 break;
1556 /* In find, -P and -H differ in the way they handle paths
1557 * given on the command line. This is not relevant for
1558 * locate, but the -H option is supported because it is
1559 * probably more intuitive to do so.
1561 case 'P':
1562 case 'H':
1563 follow_symlinks = 0;
1564 break;
1566 case 'l':
1568 char *end = optarg;
1569 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1570 if (LONGINT_OK != err)
1572 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1574 use_limit = 1;
1576 break;
1578 case 's': /* use stdio */
1579 case 'm': /* use mmap */
1580 /* These options are implemented simply for
1581 * compatibility with FreeBSD
1583 break;
1585 default:
1586 usage (stderr);
1587 return 1;
1591 /* If the user gave the -d option or set LOCATE_PATH,
1592 * relinquish access to the secure database.
1594 if (they_chose_db)
1596 if (secure_db_fd >= 0)
1598 close(secure_db_fd);
1599 secure_db_fd = -1;
1603 if (!just_count && !stats)
1604 print = 1;
1606 if (stats)
1608 if (optind == argc)
1609 use_limit = 0;
1611 else
1613 if (!just_count && optind == argc)
1615 usage (stderr);
1616 return 1;
1621 if (1 == isatty(STDOUT_FILENO))
1622 stdout_is_a_tty = true;
1623 else
1624 stdout_is_a_tty = false;
1626 if (they_chose_db)
1627 next_element (dbpath, 0); /* Initialize. */
1629 /* Bail out early if limit already reached. */
1630 while (!use_limit || limits.limit > limits.items_accepted)
1632 struct stat st;
1633 int fd;
1634 off_t filesize;
1636 statistics.compressed_bytes =
1637 statistics.total_filename_count =
1638 statistics.total_filename_length =
1639 statistics.whitespace_count =
1640 statistics.newline_count =
1641 statistics.highbit_filename_count = 0u;
1643 if (they_chose_db)
1645 /* Take the next element from the list of databases */
1646 e = next_element ((char *) NULL, 0);
1647 if (NULL == e)
1648 break;
1650 if (0 == strcmp (e, "-"))
1652 if (did_stdin)
1654 error (0, 0,
1655 _("warning: the locate database can only be read from stdin once."));
1656 return 0;
1658 else
1660 e = "<stdin>";
1661 fd = 0;
1662 did_stdin = true;
1665 else
1667 if (0 == strlen(e) || 0 == strcmp(e, "."))
1669 e = LOCATE_DB;
1672 /* open the database */
1673 fd = opendb(e);
1674 if (fd < 0)
1676 error (0, errno, "%s",
1677 quotearg_n_style(0, locale_quoting_style, e));
1678 return 0;
1682 else
1684 if (-1 == secure_db_fd)
1686 /* Already searched the database, it's time to exit the loop */
1687 break;
1689 else
1691 e = selected_secure_db;
1692 fd = secure_db_fd;
1693 secure_db_fd = -1;
1697 /* Check the database to see if it is old. */
1698 if (fstat(fd, &st))
1700 error (0, errno, "%s",
1701 quotearg_n_style(0, locale_quoting_style, e));
1702 /* continue anyway */
1703 filesize = (off_t)0;
1705 else
1707 time_t now;
1709 filesize = st.st_size;
1711 if ((time_t)-1 == time(&now))
1713 /* If we can't tell the time, we don't know how old the
1714 * database is. But since the message is just advisory,
1715 * we continue anyway.
1717 error (0, errno, "time system call");
1719 else
1721 double age = difftime(now, st.st_mtime);
1722 double warn_seconds = SECONDS_PER_UNIT * warn_number_units;
1723 if (age > warn_seconds)
1725 /* For example:
1726 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1727 error (0, 0,
1728 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1729 quotearg_n_style(0, locale_quoting_style, e),
1730 warn_number_units, _(warn_name_units),
1731 (age/(double)SECONDS_PER_UNIT), _(warn_name_units));
1736 fp = fdopen(fd, "r");
1737 if (NULL == fp)
1739 error (0, errno, "%s",
1740 quotearg_n_style(0, locale_quoting_style, e));
1741 return 0;
1744 /* Search this database for all patterns simultaneously */
1745 found = search_one_database (argc - optind, &argv[optind],
1746 e, fp, filesize,
1747 ignore_case, print, basename_only,
1748 use_limit, &limits, stats,
1749 op_and, regex, regex_options);
1751 /* Close the databsase (even if it is stdin) */
1752 if (fclose (fp) == EOF)
1754 error (0, errno, "%s",
1755 quotearg_n_style(0, locale_quoting_style, e));
1756 return 0;
1760 if (just_count)
1762 printf("%ld\n", found);
1765 if (found || (use_limit && (limits.limit==0)) || stats )
1766 return 0;
1767 else
1768 return 1;
1771 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1772 static int
1773 open_secure_db(void)
1775 int fd, i;
1777 const char * secure_db_list[] =
1779 LOCATE_DB,
1780 "/var/lib/slocate/slocate.db",
1781 NULL
1783 for (i=0; secure_db_list[i]; ++i)
1785 fd = opendb(secure_db_list[i]);
1786 if (fd >= 0)
1788 selected_secure_db = secure_db_list[i];
1789 return fd;
1792 return -1;
1796 main (int argc, char **argv)
1798 int dbfd = open_secure_db();
1799 drop_privs();
1801 return dolocate(argc, argv, dbfd);