Fix Svannah bug# 19981: don't call setgroups if it is not available
[findutils.git] / locate / locate.c
blob1b4e6d482f8093e2dabba0814539de75efe21eff
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <signal.h>
66 #include <ctype.h>
67 #include <sys/types.h>
68 #include <grp.h> /* for setgroups() */
69 #include <sys/stat.h>
70 #include <time.h>
71 #include <fnmatch.h>
72 #include <getopt.h>
73 #include <xstrtol.h>
75 /* The presence of unistd.h is assumed by gnulib these days, so we
76 * might as well assume it too.
78 /* We need <unistd.h> for isatty(). */
79 #include <unistd.h>
81 #if HAVE_FCNTL_H
82 /* We use fcntl() */
83 #include <fcntl.h>
84 #endif
86 #define NDEBUG
87 #include <assert.h>
88 #include <string.h>
91 #ifdef STDC_HEADERS
92 #include <stdlib.h>
93 #endif
95 #ifdef HAVE_ERRNO_H
96 #include <errno.h>
97 #else
98 extern int errno;
99 #endif
101 #ifdef HAVE_LOCALE_H
102 #include <locale.h>
103 #endif
105 #if ENABLE_NLS
106 # include <libintl.h>
107 # define _(Text) gettext (Text)
108 #else
109 # define _(Text) Text
110 #define textdomain(Domain)
111 #define bindtextdomain(Package, Directory)
112 #endif
113 #ifdef gettext_noop
114 # define N_(String) gettext_noop (String)
115 #else
116 /* We used to use (String) instead of just String, but apparently ISO C
117 * doesn't allow this (at least, that's what HP said when someone reported
118 * this as a compiler bug). This is HP case number 1205608192. See
119 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
120 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
121 * like: static const char buf[] = ("string");
123 # define N_(String) String
124 #endif
126 #include "locatedb.h"
127 #include <getline.h>
128 #include "../gnulib/lib/xalloc.h"
129 #include "../gnulib/lib/error.h"
130 #include "../gnulib/lib/human.h"
131 #include "dirname.h"
132 #include "closeout.h"
133 #include "nextelem.h"
134 #include "regex.h"
135 #include "quote.h"
136 #include "quotearg.h"
137 #include "printquoted.h"
138 #include "regextype.h"
139 #include "gnulib-version.h"
141 /* Note that this evaluates Ch many times. */
142 #ifdef _LIBC
143 # define TOUPPER(Ch) toupper (Ch)
144 # define TOLOWER(Ch) tolower (Ch)
145 #else
146 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
147 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
148 #endif
150 /* typedef enum {false, true} boolean; */
152 /* Warn if a database is older than this. 8 days allows for a weekly
153 update that takes up to a day to perform. */
154 static unsigned int warn_number_units = 8;
156 /* Printable name of units used in WARN_SECONDS */
157 static const char warn_name_units[] = N_("days");
158 #define SECONDS_PER_UNIT (60 * 60 * 24)
160 enum visit_result
162 VISIT_CONTINUE = 1, /* please call the next visitor */
163 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
164 VISIT_REJECTED = 4, /* rejected, process next file. */
165 VISIT_ABORT = 8 /* rejected, process no more files. */
168 enum ExistenceCheckType
170 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
171 ACCEPT_EXISTING, /* Corresponds to option -e */
172 ACCEPT_NON_EXISTING /* Corresponds to option -E */
175 /* Check for existence of files before printing them out? */
176 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
178 static int follow_symlinks = 1;
180 /* What to separate the results with. */
181 static int separator = '\n';
183 static struct quoting_options * quote_opts = NULL;
184 static bool stdout_is_a_tty;
185 static bool print_quoted_filename;
186 static bool results_were_filtered;
188 static const char *selected_secure_db = NULL;
191 /* Change the number of days old the database can be
192 * before we complain about it.
194 static void
195 set_max_db_age(const char *s)
197 char *end;
198 unsigned long int val;
199 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
201 if (0 == *s)
203 error(1, 0,
204 _("The argument argument for option --max-database-age must not be empty"));
208 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
209 * we would not be able to tell if that is the correct answer, or whether it
210 * signifies an error.
212 errno = 0;
213 val = strtoul(s, &end, 10);
215 /* Diagnose number too large, non-numbes and trailing junk. */
216 if ((ULONG_MAX == val && ERANGE == errno) ||
217 (0 == val && EINVAL == errno))
219 error(1, errno,
220 _("Invalid argument %s for option --max-database-age"),
221 quotearg_n_style(0, locale_quoting_style, s));
223 else if (*end)
225 /* errno wasn't set, don't print its message */
226 error(1, 0,
227 _("Invalid argument %s for option --max-database-age"),
228 quotearg_n_style(0, locale_quoting_style, s));
230 else
232 warn_number_units = val;
238 /* Read in a 16-bit int, high byte first (network byte order). */
240 static short
241 get_short (FILE *fp)
244 register short x;
246 x = (signed char) fgetc (fp) << 8;
247 x |= (fgetc (fp) & 0xff);
248 return x;
251 const char * const metacharacters = "*?[]\\";
253 /* Return nonzero if S contains any shell glob characters.
255 static int
256 contains_metacharacter(const char *s)
258 if (NULL == strpbrk(s, metacharacters))
259 return 0;
260 else
261 return 1;
264 /* locate_read_str()
266 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
267 * until we reach DELIMITER or end-of-file. We reallocate the buffer
268 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
269 * is made regarding the content of the data (i.e. the implementation is
270 * 8-bit clean, the only delimiter is DELIMITER).
272 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
273 * has been removed from gnulib.
275 * We call the function locate_read_str() to avoid a name clash with the curses
276 * function getstr().
278 static int
279 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
281 char * p = NULL;
282 size_t sz = 0;
283 int nread;
284 size_t needed;
286 nread = getdelim(&p, &sz, delimiter, fp);
287 if (nread >= 0)
289 assert(p != NULL);
291 needed = offs + nread + 1u;
292 if (needed > (*siz))
294 char *pnew = realloc(*buf, needed);
295 if (NULL == pnew)
297 return -1; /* FAIL */
299 else
301 *siz = needed;
302 *buf = pnew;
305 memcpy((*buf)+offs, p, nread);
306 free(p);
308 return nread;
312 struct locate_limits
314 uintmax_t limit;
315 uintmax_t items_accepted;
317 static struct locate_limits limits;
320 struct locate_stats
322 uintmax_t compressed_bytes;
323 uintmax_t total_filename_count;
324 uintmax_t total_filename_length;
325 uintmax_t whitespace_count;
326 uintmax_t newline_count;
327 uintmax_t highbit_filename_count;
329 static struct locate_stats statistics;
332 struct regular_expression
334 struct re_pattern_buffer regex; /* for --regex */
338 struct process_data
340 int c; /* An input byte. */
341 char itemcount; /* Indicates we're at the beginning of an slocate db. */
342 int count; /* The length of the prefix shared with the previous database entry. */
343 int len;
344 char *original_filename; /* The current input database entry. */
345 size_t pathsize; /* Amount allocated for it. */
346 char *munged_filename; /* path or base_name(path) */
347 FILE *fp; /* The pathname database. */
348 const char *dbfile; /* Its name, or "<stdin>" */
349 int slocatedb_format; /* Allows us to cope with slocate's format variant */
350 /* for the old database format,
351 the first and second characters of the most common bigrams. */
352 char bigram1[128];
353 char bigram2[128];
357 typedef int (*visitfunc)(struct process_data *procdata,
358 void *context);
360 struct visitor
362 visitfunc inspector;
363 void * context;
364 struct visitor *next;
368 static struct visitor *inspectors = NULL;
369 static struct visitor *lastinspector = NULL;
370 static struct visitor *past_pat_inspector = NULL;
372 static inline int visit(const struct visitor *p,
373 int accept_flags,
374 struct process_data *procdata,
375 const struct visitor * const stop)
377 register int result = accept_flags;
378 while ( (accept_flags & result) && (stop != p) )
380 result = (p->inspector)(procdata, p->context);
381 p = p->next;
383 return result;
386 /* 0 or 1 pattern(s) */
387 static int
388 process_simple(struct process_data *procdata)
390 return visit(inspectors, (VISIT_CONTINUE|VISIT_ACCEPTED), procdata, NULL);
393 /* Accept if any pattern matches. */
394 static int
395 process_or (struct process_data *procdata)
397 int result;
399 result = visit(inspectors, (VISIT_CONTINUE|VISIT_REJECTED), procdata, past_pat_inspector);
400 if (result == VISIT_CONTINUE)
401 result = VISIT_REJECTED;
402 if (result & (VISIT_ABORT | VISIT_REJECTED))
403 return result;
405 result = visit(past_pat_inspector, VISIT_CONTINUE, procdata, NULL);
406 if (VISIT_CONTINUE == result)
407 return VISIT_ACCEPTED;
408 else
409 return result;
412 /* Accept if all pattern match. */
413 static int
414 process_and (struct process_data *procdata)
416 int result;
418 result = visit(inspectors, (VISIT_CONTINUE|VISIT_ACCEPTED), procdata, past_pat_inspector);
419 if (result == VISIT_CONTINUE)
420 result = VISIT_REJECTED;
421 if (result & (VISIT_ABORT | VISIT_REJECTED))
422 return result;
424 result = visit(past_pat_inspector, VISIT_CONTINUE, procdata, NULL);
425 if (VISIT_CONTINUE == result)
426 return VISIT_ACCEPTED;
427 else
428 return result;
431 typedef int (*processfunc)(struct process_data *procdata);
433 static processfunc mainprocessor = NULL;
435 static void
436 add_visitor(visitfunc fn, void *context)
438 struct visitor *p = xmalloc(sizeof(struct visitor));
439 p->inspector = fn;
440 p->context = context;
441 p->next = NULL;
443 if (NULL == lastinspector)
445 lastinspector = inspectors = p;
447 else
449 lastinspector->next = p;
450 lastinspector = p;
456 static int
457 visit_justprint_quoted(struct process_data *procdata, void *context)
459 (void) context;
460 print_quoted (stdout, quote_opts, stdout_is_a_tty,
461 "%s",
462 procdata->original_filename);
463 putchar(separator);
464 return VISIT_CONTINUE;
467 static int
468 visit_justprint_unquoted(struct process_data *procdata, void *context)
470 (void) context;
471 fputs(procdata->original_filename, stdout);
472 putchar(separator);
473 return VISIT_CONTINUE;
476 static int
477 visit_old_format(struct process_data *procdata, void *context)
479 register char *s;
480 (void) context;
482 /* Get the offset in the path where this path info starts. */
483 if (procdata->c == LOCATEDB_OLD_ESCAPE)
484 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
485 else
486 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
488 /* Overlay the old path with the remainder of the new. */
489 for (s = procdata->original_filename + procdata->count;
490 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
491 if (procdata->c < 0200)
492 *s++ = procdata->c; /* An ordinary character. */
493 else
495 /* Bigram markers have the high bit set. */
496 procdata->c &= 0177;
497 *s++ = procdata->bigram1[procdata->c];
498 *s++ = procdata->bigram2[procdata->c];
500 *s-- = '\0';
502 procdata->munged_filename = procdata->original_filename;
504 return VISIT_CONTINUE;
508 static int
509 visit_locate02_format(struct process_data *procdata, void *context)
511 register char *s;
512 int nread;
513 (void) context;
515 if (procdata->slocatedb_format)
517 if (procdata->itemcount == 0)
519 ungetc(procdata->c, procdata->fp);
520 procdata->count = 0;
521 procdata->len = 0;
523 else if (procdata->itemcount == 1)
525 procdata->count = procdata->len-1;
527 else
529 if (procdata->c == LOCATEDB_ESCAPE)
530 procdata->count += (short)get_short (procdata->fp);
531 else if (procdata->c > 127)
532 procdata->count += procdata->c - 256;
533 else
534 procdata->count += procdata->c;
537 else
539 if (procdata->c == LOCATEDB_ESCAPE)
540 procdata->count += (short)get_short (procdata->fp);
541 else if (procdata->c > 127)
542 procdata->count += procdata->c - 256;
543 else
544 procdata->count += procdata->c;
547 if (procdata->count > procdata->len || procdata->count < 0)
549 /* This should not happen generally , but since we're
550 * reading in data which is outside our control, we
551 * cannot prevent it.
553 error(1, 0, _("locate database %s is corrupt or invalid"),
554 quotearg_n_style(0, locale_quoting_style, procdata->dbfile));
557 /* Overlay the old path with the remainder of the new. */
558 nread = locate_read_str (&procdata->original_filename,
559 &procdata->pathsize,
560 procdata->fp, 0, procdata->count);
561 if (nread < 0)
562 return VISIT_ABORT;
563 procdata->c = getc (procdata->fp);
564 procdata->len = procdata->count + nread;
565 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
566 assert (s[0] != '\0');
567 assert (s[1] == '\0'); /* Our terminator. */
568 assert (s[2] == '\0'); /* Added by locate_read_str. */
570 procdata->munged_filename = procdata->original_filename;
572 if (procdata->slocatedb_format)
574 /* Don't increment indefinitely, it might overflow. */
575 if (procdata->itemcount < 6)
577 ++(procdata->itemcount);
582 return VISIT_CONTINUE;
585 static int
586 visit_basename(struct process_data *procdata, void *context)
588 (void) context;
589 procdata->munged_filename = base_name(procdata->original_filename);
591 return VISIT_CONTINUE;
595 /* visit_existing_follow implements -L -e */
596 static int
597 visit_existing_follow(struct process_data *procdata, void *context)
599 struct stat st;
600 (void) context;
602 /* munged_filename has been converted in some way (to lower case,
603 * or is just the base name of the file), and original_filename has not.
604 * Hence only original_filename is still actually the name of the file
605 * whose existence we would need to check.
607 if (stat(procdata->original_filename, &st) != 0)
609 return VISIT_REJECTED;
611 else
613 return VISIT_CONTINUE;
617 /* visit_non_existing_follow implements -L -E */
618 static int
619 visit_non_existing_follow(struct process_data *procdata, void *context)
621 struct stat st;
622 (void) context;
624 /* munged_filename has been converted in some way (to lower case,
625 * or is just the base name of the file), and original_filename has not.
626 * Hence only original_filename is still actually the name of the file
627 * whose existence we would need to check.
629 if (stat(procdata->original_filename, &st) == 0)
631 return VISIT_REJECTED;
633 else
635 return VISIT_CONTINUE;
639 /* visit_existing_nofollow implements -P -e */
640 static int
641 visit_existing_nofollow(struct process_data *procdata, void *context)
643 struct stat st;
644 (void) context;
646 /* munged_filename has been converted in some way (to lower case,
647 * or is just the base name of the file), and original_filename has not.
648 * Hence only original_filename is still actually the name of the file
649 * whose existence we would need to check.
651 if (lstat(procdata->original_filename, &st) != 0)
653 return VISIT_REJECTED;
655 else
657 return VISIT_CONTINUE;
661 /* visit_non_existing_nofollow implements -P -E */
662 static int
663 visit_non_existing_nofollow(struct process_data *procdata, void *context)
665 struct stat st;
666 (void) context;
668 /* munged_filename has been converted in some way (to lower case,
669 * or is just the base name of the file), and original_filename has not.
670 * Hence only original_filename is still actually the name of the file
671 * whose existence we would need to check.
673 if (lstat(procdata->original_filename, &st) == 0)
675 return VISIT_REJECTED;
677 else
679 return VISIT_CONTINUE;
683 static int
684 visit_substring_match_nocasefold_wide(struct process_data *procdata, void *context)
686 const char *pattern = context;
688 if (NULL != mbsstr(procdata->munged_filename, pattern))
689 return VISIT_ACCEPTED;
690 else
691 return VISIT_REJECTED;
694 static int
695 visit_substring_match_nocasefold_narrow(struct process_data *procdata, void *context)
697 const char *pattern = context;
698 assert(MB_CUR_MAX == 1);
699 if (NULL != strstr(procdata->munged_filename, pattern))
700 return VISIT_ACCEPTED;
701 else
702 return VISIT_REJECTED;
705 static int
706 visit_substring_match_casefold_wide(struct process_data *procdata, void *context)
708 const char *pattern = context;
710 if (NULL != mbscasestr(procdata->munged_filename, pattern))
711 return VISIT_ACCEPTED;
712 else
713 return VISIT_REJECTED;
717 static int
718 visit_substring_match_casefold_narrow(struct process_data *procdata, void *context)
720 const char *pattern = context;
722 assert(MB_CUR_MAX == 1);
723 if (NULL != strcasestr(procdata->munged_filename, pattern))
724 return VISIT_ACCEPTED;
725 else
726 return VISIT_REJECTED;
730 static int
731 visit_globmatch_nofold(struct process_data *procdata, void *context)
733 const char *glob = context;
734 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
735 return VISIT_REJECTED;
736 else
737 return VISIT_ACCEPTED;
741 static int
742 visit_globmatch_casefold(struct process_data *procdata, void *context)
744 const char *glob = context;
745 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
746 return VISIT_REJECTED;
747 else
748 return VISIT_ACCEPTED;
752 static int
753 visit_regex(struct process_data *procdata, void *context)
755 struct regular_expression *p = context;
756 const size_t len = strlen(procdata->munged_filename);
758 int rv = re_search (&p->regex, procdata->munged_filename,
759 len, 0, len,
760 (struct re_registers *) NULL);
761 if (rv < 0)
763 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
765 else
767 return VISIT_ACCEPTED; /* match */
772 static int
773 visit_stats(struct process_data *procdata, void *context)
775 struct locate_stats *p = context;
776 size_t len = strlen(procdata->original_filename);
777 const char *s;
778 int highbit, whitespace, newline;
780 ++(p->total_filename_count);
781 p->total_filename_length += len;
783 highbit = whitespace = newline = 0;
784 for (s=procdata->original_filename; *s; ++s)
786 if ( (int)(*s) & 128 )
787 highbit = 1;
788 if ('\n' == *s)
790 newline = whitespace = 1;
792 else if (isspace((unsigned char)*s))
794 whitespace = 1;
798 if (highbit)
799 ++(p->highbit_filename_count);
800 if (whitespace)
801 ++(p->whitespace_count);
802 if (newline)
803 ++(p->newline_count);
805 return VISIT_CONTINUE;
809 static int
810 visit_limit(struct process_data *procdata, void *context)
812 struct locate_limits *p = context;
814 (void) procdata;
816 if (++p->items_accepted >= p->limit)
817 return VISIT_ABORT;
818 else
819 return VISIT_CONTINUE;
822 static int
823 visit_count(struct process_data *procdata, void *context)
825 struct locate_limits *p = context;
827 (void) procdata;
829 ++p->items_accepted;
830 return VISIT_CONTINUE;
833 /* Emit the statistics.
835 static void
836 print_stats(int argc, size_t database_file_size)
838 char hbuf[LONGEST_HUMAN_READABLE + 1];
840 printf(_("Locate database size: %s bytes\n"),
841 human_readable ((uintmax_t) database_file_size,
842 hbuf, human_ceiling, 1, 1));
844 printf( (results_were_filtered ?
845 _("Matching Filenames: %s ") :
846 _("All Filenames: %s ")),
847 human_readable (statistics.total_filename_count,
848 hbuf, human_ceiling, 1, 1));
849 printf(_("with a cumulative length of %s bytes"),
850 human_readable (statistics.total_filename_length,
851 hbuf, human_ceiling, 1, 1));
853 printf(_("\n\tof which %s contain whitespace, "),
854 human_readable (statistics.whitespace_count,
855 hbuf, human_ceiling, 1, 1));
856 printf(_("\n\t%s contain newline characters, "),
857 human_readable (statistics.newline_count,
858 hbuf, human_ceiling, 1, 1));
859 printf(_("\n\tand %s contain characters with the high bit set.\n"),
860 human_readable (statistics.highbit_filename_count,
861 hbuf, human_ceiling, 1, 1));
863 if (!argc)
865 if (results_were_filtered)
867 printf(_("Some filenames may have been filtered out, "
868 "so we cannot compute the compression ratio.\n"));
870 else
872 if (statistics.total_filename_length)
874 printf(_("Compression ratio %4.2f%% (higher is better)\n"),
875 100.0 * ((double)statistics.total_filename_length
876 - (double) database_file_size)
877 / (double) statistics.total_filename_length);
879 else
881 printf(_("Compression ratio is undefined\n"));
885 printf("\n");
889 * Return nonzero if the data we read in indicates that we are
890 * looking at a LOCATE02 locate database.
892 static int
893 looking_at_gnu_locatedb (const char *data, size_t len)
895 if (len < sizeof (LOCATEDB_MAGIC))
896 return 0;
897 else if (0 == memcmp (data, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
898 return 1; /* We saw the magic byte sequence */
899 else
900 return 0;
904 * Return nonzero if the data we read in indicates that we are
905 * looking at an slocate database.
907 static int
908 looking_at_slocate_locatedb (const char *filename,
909 const char *data,
910 size_t len,
911 int *seclevel)
913 assert(len <= 2);
915 if (len < 2)
917 return 0;
919 else
921 /* Check that the magic number is a one-byte string */
922 if (0 == data[1])
924 if (isdigit((unsigned char)data[0]))
926 /* looks promising. */
927 *seclevel = (data[0] - '0');
929 if (*seclevel > 1)
931 /* Hmm, well it's probably an slocate database
932 * of some awsomely huge security level, like 2.
933 * We don't know how to handle those.
935 error(0, 0,
936 _("locate database %s looks like an slocate "
937 "database but it seems to have security level %c, "
938 "which GNU findutils does not currently support"),
939 quotearg_n_style(0, locale_quoting_style, filename),
940 data[1]);
941 return 1;
943 else
945 return 1;
948 else
950 /* Not a digit. */
951 return 0;
954 else
956 /* Definitely not slocate. */
957 return 0;
962 /* Print or count the entries in DBFILE that match shell globbing patterns in
963 ARGV. Return the number of entries matched. */
965 static unsigned long
966 search_one_database (int argc,
967 char **argv,
968 const char *dbfile,
969 FILE *fp,
970 off_t filesize,
971 int ignore_case,
972 int enable_print,
973 int basename_only,
974 int use_limit,
975 struct locate_limits *plimit,
976 int stats,
977 int op_and,
978 int regex,
979 int regex_options)
981 char *pathpart; /* A pattern to consider. */
982 int argn; /* Index to current pattern in argv. */
983 int nread; /* number of bytes read from an entry. */
984 struct process_data procdata; /* Storage for data shared with visitors. */
985 int slocate_seclevel;
986 struct visitor* pvis; /* temp for determining past_pat_inspector. */
987 const char *format_name;
988 enum ExistenceCheckType do_check_existence;
991 /* We may turn on existence checking for a given database.
992 * We ensure that we can return to the previous behaviour
993 * by using two variables, do_check_existence (which we act on)
994 * and check_existence (whcih indicates the default before we
995 * adjust it on the bassis of what kind of database we;re using
997 do_check_existence = check_existence;
1000 if (ignore_case)
1001 regex_options |= RE_ICASE;
1003 procdata.len = procdata.count = 0;
1004 procdata.slocatedb_format = 0;
1005 procdata.itemcount = 0;
1007 procdata.dbfile = dbfile;
1008 procdata.fp = fp;
1010 /* Set up the inspection regime */
1011 inspectors = NULL;
1012 lastinspector = NULL;
1013 past_pat_inspector = NULL;
1014 results_were_filtered = false;
1015 #if 0
1016 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
1017 #else
1018 procdata.pathsize = 128; /* Increased as necessary by locate_read_str. */
1019 #endif
1020 procdata.original_filename = xmalloc (procdata.pathsize);
1023 nread = fread (procdata.original_filename, 1, SLOCATE_DB_MAGIC_LEN,
1024 procdata.fp);
1025 slocate_seclevel = 0;
1026 if (looking_at_slocate_locatedb(procdata.dbfile,
1027 procdata.original_filename,
1028 nread,
1029 &slocate_seclevel))
1031 error(0, 0,
1032 _("%s is an slocate database. "
1033 "Support for these is new, expect problems for now."),
1034 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1036 /* slocate also uses frcode, but with a different header.
1037 * We handle the header here and then work with the data
1038 * in the normal way.
1040 if (slocate_seclevel > 1)
1042 /* We don't know what those security levels mean,
1043 * so do nothing further
1045 error(0, 0,
1046 _("%s is an slocate database of unsupported security level %d; skipping it."),
1047 quotearg_n_style(0, locale_quoting_style, procdata.dbfile),
1048 slocate_seclevel);
1049 return 0;
1051 else if (slocate_seclevel > 0)
1053 /* Don't show the filenames to the user if they don't exist.
1054 * Showing stats is safe since filenames are only counted
1055 * after the existence check
1057 if (ACCEPT_NON_EXISTING == check_existence)
1059 /* Do not allow the user to see a list of filenames that they
1060 * cannot stat().
1062 error(0, 0,
1063 _("You specified the -E option, but that option "
1064 "cannot be used with slocate-format databases "
1065 "with a non-zero security level. No results will be "
1066 "generated for this database.\n"));
1067 return 0;
1069 if (ACCEPT_EXISTING != do_check_existence)
1071 if (enable_print || stats)
1073 error(0, 0,
1074 _("%s is an slocate database. "
1075 "Turning on the '-e' option."),
1076 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1078 do_check_existence = ACCEPT_EXISTING;
1081 add_visitor(visit_locate02_format, NULL);
1082 format_name = "slocate";
1083 procdata.slocatedb_format = 1;
1085 else
1087 int nread2;
1089 procdata.slocatedb_format = 0;
1090 nread2 = fread (procdata.original_filename+nread, 1, sizeof (LOCATEDB_MAGIC)-nread,
1091 procdata.fp);
1092 if (looking_at_gnu_locatedb(procdata.original_filename, nread+nread2))
1094 add_visitor(visit_locate02_format, NULL);
1095 format_name = "GNU LOCATE02";
1097 else /* Use the old format */
1099 int i;
1101 nread += nread2;
1102 /* Read the list of the most common bigrams in the database. */
1103 if (nread < 256)
1105 int more_read = fread (procdata.original_filename + nread, 1,
1106 256 - nread, procdata.fp);
1107 if ( (more_read + nread) != 256 )
1109 error(1, 0,
1110 _("Old-format locate database %s is "
1111 "too short to be valid"),
1112 quotearg_n_style(0, locale_quoting_style, dbfile));
1117 for (i = 0; i < 128; i++)
1119 procdata.bigram1[i] = procdata.original_filename[i << 1];
1120 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
1122 format_name = "old";
1123 add_visitor(visit_old_format, NULL);
1127 if (basename_only)
1128 add_visitor(visit_basename, NULL);
1130 /* Add an inspector for each pattern we're looking for. */
1131 for ( argn = 0; argn < argc; argn++ )
1133 results_were_filtered = true;
1134 pathpart = argv[argn];
1135 if (regex)
1137 struct regular_expression *p = xmalloc(sizeof(*p));
1138 const char *error_message = NULL;
1140 memset (&p->regex, 0, sizeof (p->regex));
1142 re_set_syntax(regex_options);
1143 p->regex.allocated = 100;
1144 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
1145 p->regex.fastmap = NULL;
1146 p->regex.syntax = regex_options;
1147 p->regex.translate = NULL;
1149 error_message = re_compile_pattern (pathpart, strlen (pathpart),
1150 &p->regex);
1151 if (error_message)
1153 error (1, 0, "%s", error_message);
1155 else
1157 add_visitor(visit_regex, p);
1160 else if (contains_metacharacter(pathpart))
1162 if (ignore_case)
1163 add_visitor(visit_globmatch_casefold, pathpart);
1164 else
1165 add_visitor(visit_globmatch_nofold, pathpart);
1167 else
1169 /* No glob characters used. Hence we match on
1170 * _any part_ of the filename, not just the
1171 * basename. This seems odd to me, but it is the
1172 * traditional behaviour.
1173 * James Youngman <jay@gnu.org>
1175 visitfunc matcher;
1176 if (1 == MB_CUR_MAX)
1178 /* As an optimisation, use a strstr() matcher if we are
1179 * in a unibyte locale. This can give a x2 speedup in
1180 * the C locale. Some light testing reveals that
1181 * glibc's strstr() is somewhere around 40% faster than
1182 * gnulib's, so we just use strstr().
1184 matcher = ignore_case ?
1185 visit_substring_match_casefold_narrow :
1186 visit_substring_match_nocasefold_narrow;
1188 else
1190 matcher = ignore_case ?
1191 visit_substring_match_casefold_wide :
1192 visit_substring_match_nocasefold_wide;
1194 add_visitor(matcher, pathpart);
1198 pvis = lastinspector;
1200 /* We add visit_existing_*() as late as possible to reduce the
1201 * number of stat() calls.
1203 switch (do_check_existence)
1205 case ACCEPT_EXISTING:
1206 results_were_filtered = true;
1207 if (follow_symlinks) /* -L, default */
1208 add_visitor(visit_existing_follow, NULL);
1209 else /* -P */
1210 add_visitor(visit_existing_nofollow, NULL);
1211 break;
1213 case ACCEPT_NON_EXISTING:
1214 results_were_filtered = true;
1215 if (follow_symlinks) /* -L, default */
1216 add_visitor(visit_non_existing_follow, NULL);
1217 else /* -P */
1218 add_visitor(visit_non_existing_nofollow, NULL);
1219 break;
1221 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1222 /* do nothing; no extra processing. */
1223 break;
1226 /* Security issue: The stats visitor must be added immediately
1227 * before the print visitor, because otherwise the -S option would
1228 * leak information about files that the caller cannot see.
1230 if (stats)
1231 add_visitor(visit_stats, &statistics);
1233 if (enable_print)
1235 if (print_quoted_filename)
1236 add_visitor(visit_justprint_quoted, NULL);
1237 else
1238 add_visitor(visit_justprint_unquoted, NULL);
1242 if (use_limit)
1243 add_visitor(visit_limit, plimit);
1244 else
1245 add_visitor(visit_count, plimit);
1248 if (argc > 1)
1250 past_pat_inspector = pvis->next;
1251 if (op_and)
1252 mainprocessor = process_and;
1253 else
1254 mainprocessor = process_or;
1256 else
1257 mainprocessor = process_simple;
1259 if (stats)
1261 printf(_("Database %s is in the %s format.\n"),
1262 procdata.dbfile,
1263 format_name);
1267 procdata.c = getc (procdata.fp);
1268 /* If we are searching for filename patterns, the inspector list
1269 * will contain an entry for each pattern for which we are searching.
1271 while ( (procdata.c != EOF) &&
1272 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1274 /* Do nothing; all the work is done in the visitor functions. */
1277 if (stats)
1279 if (filesize)
1280 print_stats(argc, filesize);
1283 if (ferror (procdata.fp))
1285 error (0, errno, "%s",
1286 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1287 return 0;
1289 return plimit->items_accepted;
1295 extern char *version_string;
1297 /* The name this program was run with. */
1298 char *program_name;
1300 static void
1301 usage (FILE *stream)
1303 fprintf (stream, _("\
1304 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1305 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1306 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1307 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1308 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1309 [--max-database-age D] [--version] [--help]\n\
1310 pattern...\n"),
1311 program_name);
1312 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1314 enum
1316 REGEXTYPE_OPTION = CHAR_MAX + 1,
1317 MAX_DB_AGE
1321 static struct option const longopts[] =
1323 {"database", required_argument, NULL, 'd'},
1324 {"existing", no_argument, NULL, 'e'},
1325 {"non-existing", no_argument, NULL, 'E'},
1326 {"ignore-case", no_argument, NULL, 'i'},
1327 {"all", no_argument, NULL, 'A'},
1328 {"help", no_argument, NULL, 'h'},
1329 {"version", no_argument, NULL, 'v'},
1330 {"null", no_argument, NULL, '0'},
1331 {"count", no_argument, NULL, 'c'},
1332 {"wholename", no_argument, NULL, 'w'},
1333 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1334 {"basename", no_argument, NULL, 'b'},
1335 {"print", no_argument, NULL, 'p'},
1336 {"stdio", no_argument, NULL, 's'},
1337 {"mmap", no_argument, NULL, 'm'},
1338 {"limit", required_argument, NULL, 'l'},
1339 {"regex", no_argument, NULL, 'r'},
1340 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1341 {"statistics", no_argument, NULL, 'S'},
1342 {"follow", no_argument, NULL, 'L'},
1343 {"nofollow", no_argument, NULL, 'P'},
1344 {"max-database-age", required_argument, NULL, MAX_DB_AGE},
1345 {NULL, no_argument, NULL, 0}
1349 static int
1350 drop_privs(void)
1352 const char * what = "failed";
1353 const uid_t orig_euid = geteuid();
1354 const uid_t uid = getuid();
1355 const gid_t gid = getgid();
1357 #if HAVE_SETGROUPS
1358 /* Use of setgroups() is restricted to root only. */
1359 if (0 == orig_euid)
1361 /* We're either root or running setuid-root. */
1362 gid_t groups[1];
1363 groups[1] = gid;
1364 if (0 != setgroups(1u, groups))
1366 what = _("failed to drop group privileges");
1367 goto fail;
1370 #endif
1372 /* Drop any setuid privileges */
1373 if (uid != orig_euid)
1375 if (0 == uid)
1377 /* We're really root anyway, but are setuid to something else. Leave it. */
1379 else
1381 errno = 0;
1382 if (0 != setuid(getuid()))
1384 what = _("failed to drop setuid privileges");
1385 goto fail;
1388 /* Defend against the case where the attacker runs us with the
1389 * capability to call setuid() turned off, which on some systems
1390 * will cause the above attempt to drop privileges fail (leaving us
1391 * privileged).
1393 else
1395 /* Check that we can no longer switch bask to root */
1396 if (0 == setuid(0))
1398 what = _("Failed to fully drop privileges");
1399 /* The errno value here is not interesting (since
1400 * the system call we are complaining about
1401 * succeeded when we wanted it to fail). Arrange
1402 * for the call to error() not to print the errno
1403 * value by setting errno=0.
1405 errno = 0;
1406 goto fail;
1412 /* Drop any setgid privileges */
1413 errno = 0;
1414 if (0 != setgid(gid))
1416 what = _("failed to drop setgid privileges");
1417 goto fail;
1420 /* success. */
1421 return 0;
1423 fail:
1424 error(1, errno, "%s",
1425 quotearg_n_style(0, locale_quoting_style, what));
1426 abort();
1427 kill(0, SIGKILL);
1428 _exit(1);
1429 /*NOTREACHED*/
1430 /* ... we hope. */
1431 for (;;)
1433 /* deliberate infinite loop */
1437 static int
1438 opendb(const char *name)
1440 int fd = open(name, O_RDONLY
1441 #if defined(O_LARGEFILE)
1442 |O_LARGEFILE
1443 #endif
1445 if (fd >= 0)
1447 /* Make sure it won't survive an exec */
1448 if (0 != fcntl(fd, F_SETFD, FD_CLOEXEC))
1450 close(fd);
1451 fd = -1;
1454 return fd;
1458 dolocate (int argc, char **argv, int secure_db_fd)
1460 char *dbpath;
1461 unsigned long int found = 0uL;
1462 int optc;
1463 int ignore_case = 0;
1464 int print = 0;
1465 int just_count = 0;
1466 int basename_only = 0;
1467 int use_limit = 0;
1468 int regex = 0;
1469 int regex_options = RE_SYNTAX_EMACS;
1470 int stats = 0;
1471 int op_and = 0;
1472 const char *e;
1473 FILE *fp;
1474 int they_chose_db = 0;
1475 bool did_stdin = false; /* Set to prevent rereading stdin. */
1477 program_name = argv[0];
1479 #ifdef HAVE_SETLOCALE
1480 setlocale (LC_ALL, "");
1481 #endif
1482 bindtextdomain (PACKAGE, LOCALEDIR);
1483 textdomain (PACKAGE);
1484 atexit (close_stdout);
1486 limits.limit = 0;
1487 limits.items_accepted = 0;
1489 quote_opts = clone_quoting_options (NULL);
1490 print_quoted_filename = true;
1492 /* We cannot simultaneously trust $LOCATE_PATH and use the
1493 * setuid-access-controlled database,, since that could cause a leak
1494 * of private data.
1496 dbpath = getenv ("LOCATE_PATH");
1497 if (dbpath)
1499 they_chose_db = 1;
1502 check_existence = ACCEPT_EITHER;
1504 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1505 switch (optc)
1507 case '0':
1508 separator = 0;
1509 print_quoted_filename = false; /* print filename 'raw'. */
1510 break;
1512 case 'A':
1513 op_and = 1;
1514 break;
1516 case 'b':
1517 basename_only = 1;
1518 break;
1520 case 'c':
1521 just_count = 1;
1522 break;
1524 case 'd':
1525 dbpath = optarg;
1526 they_chose_db = 1;
1527 break;
1529 case 'e':
1530 check_existence = ACCEPT_EXISTING;
1531 break;
1533 case 'E':
1534 check_existence = ACCEPT_NON_EXISTING;
1535 break;
1537 case 'i':
1538 ignore_case = 1;
1539 break;
1541 case 'h':
1542 usage (stdout);
1543 return 0;
1545 case MAX_DB_AGE:
1546 /* XXX: nothing in the test suite for this option. */
1547 set_max_db_age(optarg);
1548 break;
1550 case 'p':
1551 print = 1;
1552 break;
1554 case 'v':
1555 printf (_("GNU locate version %s\n"), version_string);
1556 printf (_("Built using GNU gnulib version %s\n"), gnulib_version);
1557 return 0;
1559 case 'w':
1560 basename_only = 0;
1561 break;
1563 case 'r':
1564 regex = 1;
1565 break;
1567 case REGEXTYPE_OPTION:
1568 regex_options = get_regex_type(optarg);
1569 break;
1571 case 'S':
1572 stats = 1;
1573 break;
1575 case 'L':
1576 follow_symlinks = 1;
1577 break;
1579 /* In find, -P and -H differ in the way they handle paths
1580 * given on the command line. This is not relevant for
1581 * locate, but the -H option is supported because it is
1582 * probably more intuitive to do so.
1584 case 'P':
1585 case 'H':
1586 follow_symlinks = 0;
1587 break;
1589 case 'l':
1591 char *end = optarg;
1592 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1593 if (LONGINT_OK != err)
1595 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1597 use_limit = 1;
1599 break;
1601 case 's': /* use stdio */
1602 case 'm': /* use mmap */
1603 /* These options are implemented simply for
1604 * compatibility with FreeBSD
1606 break;
1608 default:
1609 usage (stderr);
1610 return 1;
1614 /* If the user gave the -d option or set LOCATE_PATH,
1615 * relinquish access to the secure database.
1617 if (they_chose_db)
1619 if (secure_db_fd >= 0)
1621 close(secure_db_fd);
1622 secure_db_fd = -1;
1626 if (!just_count && !stats)
1627 print = 1;
1629 if (stats)
1631 if (optind == argc)
1632 use_limit = 0;
1634 else
1636 if (!just_count && optind == argc)
1638 usage (stderr);
1639 return 1;
1644 if (1 == isatty(STDOUT_FILENO))
1645 stdout_is_a_tty = true;
1646 else
1647 stdout_is_a_tty = false;
1649 if (they_chose_db)
1650 next_element (dbpath, 0); /* Initialize. */
1652 /* Bail out early if limit already reached. */
1653 while (!use_limit || limits.limit > limits.items_accepted)
1655 struct stat st;
1656 int fd;
1657 off_t filesize;
1659 statistics.compressed_bytes =
1660 statistics.total_filename_count =
1661 statistics.total_filename_length =
1662 statistics.whitespace_count =
1663 statistics.newline_count =
1664 statistics.highbit_filename_count = 0u;
1666 if (they_chose_db)
1668 /* Take the next element from the list of databases */
1669 e = next_element ((char *) NULL, 0);
1670 if (NULL == e)
1671 break;
1673 if (0 == strcmp (e, "-"))
1675 if (did_stdin)
1677 error (0, 0,
1678 _("warning: the locate database can only be read from stdin once."));
1679 return 0;
1681 else
1683 e = "<stdin>";
1684 fd = 0;
1685 did_stdin = true;
1688 else
1690 if (0 == strlen(e) || 0 == strcmp(e, "."))
1692 e = LOCATE_DB;
1695 /* open the database */
1696 fd = opendb(e);
1697 if (fd < 0)
1699 error (0, errno, "%s",
1700 quotearg_n_style(0, locale_quoting_style, e));
1701 return 0;
1705 else
1707 if (-1 == secure_db_fd)
1709 /* Already searched the database, it's time to exit the loop */
1710 break;
1712 else
1714 e = selected_secure_db;
1715 fd = secure_db_fd;
1716 secure_db_fd = -1;
1720 /* Check the database to see if it is old. */
1721 if (fstat(fd, &st))
1723 error (0, errno, "%s",
1724 quotearg_n_style(0, locale_quoting_style, e));
1725 /* continue anyway */
1726 filesize = (off_t)0;
1728 else
1730 time_t now;
1732 filesize = st.st_size;
1734 if ((time_t)-1 == time(&now))
1736 /* If we can't tell the time, we don't know how old the
1737 * database is. But since the message is just advisory,
1738 * we continue anyway.
1740 error (0, errno, "time system call");
1742 else
1744 double age = difftime(now, st.st_mtime);
1745 double warn_seconds = SECONDS_PER_UNIT * warn_number_units;
1746 if (age > warn_seconds)
1748 /* For example:
1749 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1750 error (0, 0,
1751 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1752 quotearg_n_style(0, locale_quoting_style, e),
1753 warn_number_units, _(warn_name_units),
1754 (age/(double)SECONDS_PER_UNIT), _(warn_name_units));
1759 fp = fdopen(fd, "r");
1760 if (NULL == fp)
1762 error (0, errno, "%s",
1763 quotearg_n_style(0, locale_quoting_style, e));
1764 return 0;
1767 /* Search this database for all patterns simultaneously */
1768 found = search_one_database (argc - optind, &argv[optind],
1769 e, fp, filesize,
1770 ignore_case, print, basename_only,
1771 use_limit, &limits, stats,
1772 op_and, regex, regex_options);
1774 /* Close the databsase (even if it is stdin) */
1775 if (fclose (fp) == EOF)
1777 error (0, errno, "%s",
1778 quotearg_n_style(0, locale_quoting_style, e));
1779 return 0;
1783 if (just_count)
1785 printf("%ld\n", found);
1788 if (found || (use_limit && (limits.limit==0)) || stats )
1789 return 0;
1790 else
1791 return 1;
1794 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1795 static int
1796 open_secure_db(void)
1798 int fd, i;
1800 const char * secure_db_list[] =
1802 LOCATE_DB,
1803 "/var/lib/slocate/slocate.db",
1804 NULL
1806 for (i=0; secure_db_list[i]; ++i)
1808 fd = opendb(secure_db_list[i]);
1809 if (fd >= 0)
1811 selected_secure_db = secure_db_list[i];
1812 return fd;
1815 return -1;
1819 main (int argc, char **argv)
1821 int dbfd = open_secure_db();
1822 drop_privs();
1824 return dolocate(argc, argv, dbfd);