cvsimport
[findutils.git] / locate / locate.c
blob38057d0645f0494eb482579fad7f31d95d266a74
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <signal.h>
66 #include <ctype.h>
67 #include <sys/types.h>
68 #include <grp.h> /* for setgroups() */
69 #include <sys/stat.h>
70 #include <time.h>
71 #include <fnmatch.h>
72 #include <getopt.h>
73 #include <xstrtol.h>
75 /* The presence of unistd.h is assumed by gnulib these days, so we
76 * might as well assume it too.
78 /* We need <unistd.h> for isatty(). */
79 #include <unistd.h>
81 #if HAVE_FCNTL_H
82 /* We use fcntl() */
83 #include <fcntl.h>
84 #endif
86 #define NDEBUG
87 #include <assert.h>
88 #include <string.h>
91 #ifdef STDC_HEADERS
92 #include <stdlib.h>
93 #endif
95 #ifdef HAVE_ERRNO_H
96 #include <errno.h>
97 #else
98 extern int errno;
99 #endif
101 #ifdef HAVE_LOCALE_H
102 #include <locale.h>
103 #endif
105 #if ENABLE_NLS
106 # include <libintl.h>
107 # define _(Text) gettext (Text)
108 #else
109 # define _(Text) Text
110 #define textdomain(Domain)
111 #define bindtextdomain(Package, Directory)
112 #endif
113 #ifdef gettext_noop
114 # define N_(String) gettext_noop (String)
115 #else
116 /* We used to use (String) instead of just String, but apparentl;y ISO C
117 * doesn't allow this (at least, that's what HP said when someone reported
118 * this as a compiler bug). This is HP case number 1205608192. See
119 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
120 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
121 * like: static const char buf[] = ("string");
123 # define N_(String) String
124 #endif
126 #include "locatedb.h"
127 #include <getline.h>
128 #include "../gnulib/lib/xalloc.h"
129 #include "../gnulib/lib/error.h"
130 #include "../gnulib/lib/human.h"
131 #include "dirname.h"
132 #include "closeout.h"
133 #include "nextelem.h"
134 #include "regex.h"
135 #include "quote.h"
136 #include "quotearg.h"
137 #include "printquoted.h"
138 #include "regextype.h"
139 #include "gnulib-version.h"
141 /* Note that this evaluates Ch many times. */
142 #ifdef _LIBC
143 # define TOUPPER(Ch) toupper (Ch)
144 # define TOLOWER(Ch) tolower (Ch)
145 #else
146 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
147 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
148 #endif
150 /* typedef enum {false, true} boolean; */
152 /* Warn if a database is older than this. 8 days allows for a weekly
153 update that takes up to a day to perform. */
154 static unsigned int warn_number_units = 8;
156 /* Printable name of units used in WARN_SECONDS */
157 static const char warn_name_units[] = N_("days");
158 #define SECONDS_PER_UNIT (60 * 60 * 24)
160 enum visit_result
162 VISIT_CONTINUE = 1, /* please call the next visitor */
163 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
164 VISIT_REJECTED = 4, /* rejected, process next file. */
165 VISIT_ABORT = 8 /* rejected, process no more files. */
168 enum ExistenceCheckType
170 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
171 ACCEPT_EXISTING, /* Corresponds to option -e */
172 ACCEPT_NON_EXISTING /* Corresponds to option -E */
175 /* Check for existence of files before printing them out? */
176 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
178 static int follow_symlinks = 1;
180 /* What to separate the results with. */
181 static int separator = '\n';
183 static struct quoting_options * quote_opts = NULL;
184 static bool stdout_is_a_tty;
185 static bool print_quoted_filename;
186 static bool results_were_filtered;
188 static const char *selected_secure_db = NULL;
191 /* Change the number of days old the database can be
192 * before we complain about it.
194 static void
195 set_max_db_age(const char *s)
197 char *end;
198 unsigned long int val;
199 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
201 if (0 == *s)
203 error(1, 0,
204 _("The argument argument for option --max-database-age must not be empty"));
208 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
209 * we would not be able to tell if that is the correct answer, or whether it
210 * signifies an error.
212 errno = 0;
213 val = strtoul(s, &end, 10);
215 /* Diagnose number too large, non-numbes and trailing junk. */
216 if ((ULONG_MAX == val && ERANGE == errno) ||
217 (0 == val && EINVAL == errno))
219 error(1, errno,
220 _("Invalid argument `%s' for option --max-database-age"),
223 else if (*end)
225 /* errno wasn't set, don't print its message */
226 error(1, 0,
227 _("Invalid argument `%s' for option --max-database-age"),
230 else
232 warn_number_units = val;
238 /* Read in a 16-bit int, high byte first (network byte order). */
240 static short
241 get_short (FILE *fp)
244 register short x;
246 x = (signed char) fgetc (fp) << 8;
247 x |= (fgetc (fp) & 0xff);
248 return x;
251 const char * const metacharacters = "*?[]\\";
253 /* Return nonzero if S contains any shell glob characters.
255 static int
256 contains_metacharacter(const char *s)
258 if (NULL == strpbrk(s, metacharacters))
259 return 0;
260 else
261 return 1;
264 /* locate_read_str()
266 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
267 * until we reach DELIMITER or end-of-file. We reallocate the buffer
268 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
269 * is made regarding the content of the data (i.e. the implementation is
270 * 8-bit clean, the only delimiter is DELIMITER).
272 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
273 * has been removed from gnulib.
275 * We call the function locate_read_str() to avoid a name clash with the curses
276 * function getstr().
278 static int
279 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
281 char * p = NULL;
282 size_t sz = 0;
283 int nread;
284 size_t needed;
286 nread = getdelim(&p, &sz, delimiter, fp);
287 if (nread >= 0)
289 assert(p != NULL);
291 needed = offs + nread + 1u;
292 if (needed > (*siz))
294 char *pnew = realloc(*buf, needed);
295 if (NULL == pnew)
297 return -1; /* FAIL */
299 else
301 *siz = needed;
302 *buf = pnew;
305 memcpy((*buf)+offs, p, nread);
306 free(p);
308 return nread;
312 struct locate_limits
314 uintmax_t limit;
315 uintmax_t items_accepted;
317 static struct locate_limits limits;
320 struct locate_stats
322 uintmax_t compressed_bytes;
323 uintmax_t total_filename_count;
324 uintmax_t total_filename_length;
325 uintmax_t whitespace_count;
326 uintmax_t newline_count;
327 uintmax_t highbit_filename_count;
329 static struct locate_stats statistics;
332 struct stringbuf
334 char *buffer;
335 size_t buffersize;
336 size_t *preqlen;
338 static struct stringbuf casebuf;
341 struct regular_expression
343 struct re_pattern_buffer regex; /* for --regex */
347 struct process_data
349 int c; /* An input byte. */
350 char itemcount; /* Indicates we're at the beginning of an slocate db. */
351 int count; /* The length of the prefix shared with the previous database entry. */
352 int len;
353 char *original_filename; /* The current input database entry. */
354 size_t pathsize; /* Amount allocated for it. */
355 char *munged_filename; /* path or base_name(path) */
356 FILE *fp; /* The pathname database. */
357 const char *dbfile; /* Its name, or "<stdin>" */
358 int slocatedb_format; /* Allows us to cope with slocate's format variant */
359 /* for the old database format,
360 the first and second characters of the most common bigrams. */
361 char bigram1[128];
362 char bigram2[128];
366 typedef int (*visitfunc)(struct process_data *procdata,
367 void *context);
369 struct visitor
371 visitfunc inspector;
372 void * context;
373 struct visitor *next;
377 static struct visitor *inspectors = NULL;
378 static struct visitor *lastinspector = NULL;
379 static struct visitor *past_pat_inspector = NULL;
381 /* 0 or 1 pattern(s) */
382 static int
383 process_simple(struct process_data *procdata)
385 int result = VISIT_CONTINUE;
386 const struct visitor *p = inspectors;
388 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
390 result = (p->inspector)(procdata, p->context);
391 p = p->next;
394 return result;
397 /* Accept if any pattern matches. */
398 static int
399 process_or (struct process_data *procdata)
401 int result = VISIT_CONTINUE;
402 const struct visitor *p = inspectors;
404 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
406 result = (p->inspector)(procdata, p->context);
407 p = p->next;
410 if (result == VISIT_CONTINUE)
411 result = VISIT_REJECTED;
412 if (result & (VISIT_ABORT | VISIT_REJECTED))
413 return result;
415 p = past_pat_inspector;
416 result = VISIT_CONTINUE;
418 while ( (VISIT_CONTINUE == result) && (NULL != p) )
420 result = (p->inspector)(procdata, p->context);
421 p = p->next;
424 if (VISIT_CONTINUE == result)
425 return VISIT_ACCEPTED;
426 else
427 return result;
430 /* Accept if all pattern match. */
431 static int
432 process_and (struct process_data *procdata)
434 int result = VISIT_CONTINUE;
435 const struct visitor *p = inspectors;
437 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
439 result = (p->inspector)(procdata, p->context);
440 p = p->next;
443 if (result == VISIT_CONTINUE)
444 result = VISIT_REJECTED;
445 if (result & (VISIT_ABORT | VISIT_REJECTED))
446 return result;
448 p = past_pat_inspector;
449 result = VISIT_CONTINUE;
451 while ( (VISIT_CONTINUE == result) && (NULL != p) )
453 result = (p->inspector)(procdata, p->context);
454 p = p->next;
457 if (VISIT_CONTINUE == result)
458 return VISIT_ACCEPTED;
459 else
460 return result;
463 typedef int (*processfunc)(struct process_data *procdata);
465 static processfunc mainprocessor = NULL;
467 static void
468 add_visitor(visitfunc fn, void *context)
470 struct visitor *p = xmalloc(sizeof(struct visitor));
471 p->inspector = fn;
472 p->context = context;
473 p->next = NULL;
475 if (NULL == lastinspector)
477 lastinspector = inspectors = p;
479 else
481 lastinspector->next = p;
482 lastinspector = p;
488 static int
489 visit_justprint_quoted(struct process_data *procdata, void *context)
491 (void) context;
492 print_quoted (stdout, quote_opts, stdout_is_a_tty,
493 "%s",
494 procdata->original_filename);
495 putchar(separator);
496 return VISIT_CONTINUE;
499 static int
500 visit_justprint_unquoted(struct process_data *procdata, void *context)
502 (void) context;
503 fputs(procdata->original_filename, stdout);
504 putchar(separator);
505 return VISIT_CONTINUE;
508 static int
509 visit_old_format(struct process_data *procdata, void *context)
511 register char *s;
512 (void) context;
514 /* Get the offset in the path where this path info starts. */
515 if (procdata->c == LOCATEDB_OLD_ESCAPE)
516 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
517 else
518 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
520 /* Overlay the old path with the remainder of the new. */
521 for (s = procdata->original_filename + procdata->count;
522 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
523 if (procdata->c < 0200)
524 *s++ = procdata->c; /* An ordinary character. */
525 else
527 /* Bigram markers have the high bit set. */
528 procdata->c &= 0177;
529 *s++ = procdata->bigram1[procdata->c];
530 *s++ = procdata->bigram2[procdata->c];
532 *s-- = '\0';
534 procdata->munged_filename = procdata->original_filename;
536 return VISIT_CONTINUE;
540 static int
541 visit_locate02_format(struct process_data *procdata, void *context)
543 register char *s;
544 int nread;
545 (void) context;
547 if (procdata->slocatedb_format)
549 if (procdata->itemcount == 0)
551 ungetc(procdata->c, procdata->fp);
552 procdata->count = 0;
553 procdata->len = 0;
555 else if (procdata->itemcount == 1)
557 procdata->count = procdata->len-1;
559 else
561 if (procdata->c == LOCATEDB_ESCAPE)
562 procdata->count += (short)get_short (procdata->fp);
563 else if (procdata->c > 127)
564 procdata->count += procdata->c - 256;
565 else
566 procdata->count += procdata->c;
569 else
571 if (procdata->c == LOCATEDB_ESCAPE)
572 procdata->count += (short)get_short (procdata->fp);
573 else if (procdata->c > 127)
574 procdata->count += procdata->c - 256;
575 else
576 procdata->count += procdata->c;
579 if (procdata->count > procdata->len || procdata->count < 0)
581 /* This should not happen generally , but since we're
582 * reading in data which is outside our control, we
583 * cannot prevent it.
585 error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
588 /* Overlay the old path with the remainder of the new. */
589 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
590 procdata->fp, 0, procdata->count);
591 if (nread < 0)
592 return VISIT_ABORT;
593 procdata->c = getc (procdata->fp);
594 procdata->len = procdata->count + nread;
595 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
596 assert (s[0] != '\0');
597 assert (s[1] == '\0'); /* Our terminator. */
598 assert (s[2] == '\0'); /* Added by locate_read_str. */
600 procdata->munged_filename = procdata->original_filename;
602 if (procdata->slocatedb_format)
604 /* Don't increment indefinitely, it might overflow. */
605 if (procdata->itemcount < 6)
607 ++(procdata->itemcount);
612 return VISIT_CONTINUE;
615 static int
616 visit_basename(struct process_data *procdata, void *context)
618 (void) context;
619 procdata->munged_filename = base_name(procdata->original_filename);
621 return VISIT_CONTINUE;
625 /* visit_existing_follow implements -L -e */
626 static int
627 visit_existing_follow(struct process_data *procdata, void *context)
629 struct stat st;
630 (void) context;
632 /* munged_filename has been converted in some way (to lower case,
633 * or is just the base name of the file), and original_filename has not.
634 * Hence only original_filename is still actually the name of the file
635 * whose existence we would need to check.
637 if (stat(procdata->original_filename, &st) != 0)
639 return VISIT_REJECTED;
641 else
643 return VISIT_CONTINUE;
647 /* visit_non_existing_follow implements -L -E */
648 static int
649 visit_non_existing_follow(struct process_data *procdata, void *context)
651 struct stat st;
652 (void) context;
654 /* munged_filename has been converted in some way (to lower case,
655 * or is just the base name of the file), and original_filename has not.
656 * Hence only original_filename is still actually the name of the file
657 * whose existence we would need to check.
659 if (stat(procdata->original_filename, &st) == 0)
661 return VISIT_REJECTED;
663 else
665 return VISIT_CONTINUE;
669 /* visit_existing_nofollow implements -P -e */
670 static int
671 visit_existing_nofollow(struct process_data *procdata, void *context)
673 struct stat st;
674 (void) context;
676 /* munged_filename has been converted in some way (to lower case,
677 * or is just the base name of the file), and original_filename has not.
678 * Hence only original_filename is still actually the name of the file
679 * whose existence we would need to check.
681 if (lstat(procdata->original_filename, &st) != 0)
683 return VISIT_REJECTED;
685 else
687 return VISIT_CONTINUE;
691 /* visit_non_existing_nofollow implements -P -E */
692 static int
693 visit_non_existing_nofollow(struct process_data *procdata, void *context)
695 struct stat st;
696 (void) context;
698 /* munged_filename has been converted in some way (to lower case,
699 * or is just the base name of the file), and original_filename has not.
700 * Hence only original_filename is still actually the name of the file
701 * whose existence we would need to check.
703 if (lstat(procdata->original_filename, &st) == 0)
705 return VISIT_REJECTED;
707 else
709 return VISIT_CONTINUE;
713 static int
714 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
716 const char *pattern = context;
718 if (NULL != mbsstr(procdata->munged_filename, pattern))
719 return VISIT_ACCEPTED;
720 else
721 return VISIT_REJECTED;
724 static int
725 visit_substring_match_casefold(struct process_data *procdata, void *context)
727 const char *pattern = context;
729 if (NULL != mbscasestr(procdata->munged_filename, pattern))
730 return VISIT_ACCEPTED;
731 else
732 return VISIT_REJECTED;
736 static int
737 visit_globmatch_nofold(struct process_data *procdata, void *context)
739 const char *glob = context;
740 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
741 return VISIT_REJECTED;
742 else
743 return VISIT_ACCEPTED;
747 static int
748 visit_globmatch_casefold(struct process_data *procdata, void *context)
750 const char *glob = context;
751 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
752 return VISIT_REJECTED;
753 else
754 return VISIT_ACCEPTED;
758 static int
759 visit_regex(struct process_data *procdata, void *context)
761 struct regular_expression *p = context;
762 const size_t len = strlen(procdata->munged_filename);
764 int rv = re_search (&p->regex, procdata->munged_filename,
765 len, 0, len,
766 (struct re_registers *) NULL);
767 if (rv < 0)
769 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
771 else
773 return VISIT_ACCEPTED; /* match */
778 static int
779 visit_stats(struct process_data *procdata, void *context)
781 struct locate_stats *p = context;
782 size_t len = strlen(procdata->original_filename);
783 const char *s;
784 int highbit, whitespace, newline;
786 ++(p->total_filename_count);
787 p->total_filename_length += len;
789 highbit = whitespace = newline = 0;
790 for (s=procdata->original_filename; *s; ++s)
792 if ( (int)(*s) & 128 )
793 highbit = 1;
794 if ('\n' == *s)
796 newline = whitespace = 1;
798 else if (isspace((unsigned char)*s))
800 whitespace = 1;
804 if (highbit)
805 ++(p->highbit_filename_count);
806 if (whitespace)
807 ++(p->whitespace_count);
808 if (newline)
809 ++(p->newline_count);
811 return VISIT_CONTINUE;
815 static int
816 visit_limit(struct process_data *procdata, void *context)
818 struct locate_limits *p = context;
820 (void) procdata;
822 if (++p->items_accepted >= p->limit)
823 return VISIT_ABORT;
824 else
825 return VISIT_CONTINUE;
828 static int
829 visit_count(struct process_data *procdata, void *context)
831 struct locate_limits *p = context;
833 (void) procdata;
835 ++p->items_accepted;
836 return VISIT_CONTINUE;
839 /* Emit the statistics.
841 static void
842 print_stats(int argc, size_t database_file_size)
844 char hbuf[LONGEST_HUMAN_READABLE + 1];
846 printf(_("Locate database size: %s bytes\n"),
847 human_readable ((uintmax_t) database_file_size,
848 hbuf, human_ceiling, 1, 1));
850 printf( (results_were_filtered ?
851 _("Matching Filenames: %s ") :
852 _("All Filenames: %s ")),
853 human_readable (statistics.total_filename_count,
854 hbuf, human_ceiling, 1, 1));
855 printf(_("with a cumulative length of %s bytes"),
856 human_readable (statistics.total_filename_length,
857 hbuf, human_ceiling, 1, 1));
859 printf(_("\n\tof which %s contain whitespace, "),
860 human_readable (statistics.whitespace_count,
861 hbuf, human_ceiling, 1, 1));
862 printf(_("\n\t%s contain newline characters, "),
863 human_readable (statistics.newline_count,
864 hbuf, human_ceiling, 1, 1));
865 printf(_("\n\tand %s contain characters with the high bit set.\n"),
866 human_readable (statistics.highbit_filename_count,
867 hbuf, human_ceiling, 1, 1));
869 if (!argc)
871 if (results_were_filtered)
873 printf(_("Some filenames may have been filtered out, "
874 "so we cannot compute the compression ratio.\n"));
876 else
878 if (statistics.total_filename_length)
880 printf(_("Compression ratio %4.2f%%\n"),
881 100.0 * ((double)statistics.total_filename_length
882 - (double) database_file_size)
883 / (double) statistics.total_filename_length);
885 else
887 printf(_("Compression ratio is undefined\n"));
891 printf("\n");
895 * Return nonzero if the data we read in indicates that we are
896 * looking at a LOCATE02 locate database.
898 static int
899 looking_at_gnu_locatedb (const char *data, size_t len)
901 if (len < sizeof (LOCATEDB_MAGIC))
902 return 0;
903 else if (0 == memcmp (data, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
904 return 1; /* We saw the magic byte sequence */
905 else
906 return 0;
910 * Return nonzero if the data we read in indicates that we are
911 * looking at an slocate database.
913 static int
914 looking_at_slocate_locatedb (const char *filename,
915 const char *data,
916 size_t len,
917 int *seclevel)
919 assert(len <= 2);
921 if (len < 2)
923 return 0;
925 else
927 /* Check that the magic number is a one-byte string */
928 if (0 == data[1])
930 if (isdigit((unsigned char)data[0]))
932 /* looks promising. */
933 *seclevel = (data[0] - '0');
935 if (*seclevel > 1)
937 /* Hmm, well it's probably an slocate database
938 * of some awsomely huge security level, like 2.
939 * We don't know how to handle those.
941 error(0, 0,
942 _("locate database `%s' looks like an slocate "
943 "database but it seems to have security level %c, "
944 "which GNU findutils does not currently support"),
945 filename, data[1]);
946 return 1;
948 else
950 return 1;
953 else
955 /* Not a digit. */
956 return 0;
959 else
961 /* Definitely not slocate. */
962 return 0;
967 /* Print or count the entries in DBFILE that match shell globbing patterns in
968 ARGV. Return the number of entries matched. */
970 static unsigned long
971 search_one_database (int argc,
972 char **argv,
973 const char *dbfile,
974 FILE *fp,
975 off_t filesize,
976 int ignore_case,
977 int enable_print,
978 int basename_only,
979 int use_limit,
980 struct locate_limits *plimit,
981 int stats,
982 int op_and,
983 int regex,
984 int regex_options)
986 char *pathpart; /* A pattern to consider. */
987 int argn; /* Index to current pattern in argv. */
988 int nread; /* number of bytes read from an entry. */
989 struct process_data procdata; /* Storage for data shared with visitors. */
990 int slocate_seclevel;
991 struct visitor* pvis; /* temp for determining past_pat_inspector. */
992 const char *format_name;
993 enum ExistenceCheckType do_check_existence;
996 /* We may turn on existence checking for a given database.
997 * We ensure that we can return to the previous behaviour
998 * by using two variables, do_check_existence (which we act on)
999 * and check_existence (whcih indicates the default before we
1000 * adjust it on the bassis of what kind of database we;re using
1002 do_check_existence = check_existence;
1005 if (ignore_case)
1006 regex_options |= RE_ICASE;
1008 procdata.len = procdata.count = 0;
1009 procdata.slocatedb_format = 0;
1010 procdata.itemcount = 0;
1012 procdata.dbfile = dbfile;
1013 procdata.fp = fp;
1015 /* Set up the inspection regime */
1016 inspectors = NULL;
1017 lastinspector = NULL;
1018 past_pat_inspector = NULL;
1019 results_were_filtered = false;
1021 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
1022 procdata.original_filename = xmalloc (procdata.pathsize);
1025 nread = fread (procdata.original_filename, 1, SLOCATE_DB_MAGIC_LEN,
1026 procdata.fp);
1027 slocate_seclevel = 0;
1028 if (looking_at_slocate_locatedb(procdata.dbfile,
1029 procdata.original_filename,
1030 nread,
1031 &slocate_seclevel))
1033 error(0, 0,
1034 _("`%s' is an slocate database. "
1035 "Support for these is new, expect problems for now."),
1036 procdata.dbfile);
1038 /* slocate also uses frcode, but with a different header.
1039 * We handle the header here and then work with the data
1040 * in the normal way.
1042 if (slocate_seclevel > 1)
1044 /* We don't know what those security levels mean,
1045 * so do nothing further
1047 error(0, 0,
1048 _("`%s' is an slocate database of unsupported security level %d; skipping it."),
1049 procdata.dbfile, slocate_seclevel);
1050 return 0;
1052 else if (slocate_seclevel > 0)
1054 /* Don't show the filenames to the user if they don't exist.
1055 * Showing stats is safe since filenames are only counted
1056 * after the existence check
1058 if (ACCEPT_NON_EXISTING == check_existence)
1060 /* Do not allow the user to see a list of filenames that they
1061 * cannot stat().
1063 error(0, 0,
1064 _("You specified the -E option, but that option "
1065 "cannot be used with slocate-format databases "
1066 "with a non-zero security level. No results will be "
1067 "generated for this database.\n"));
1068 return 0;
1070 if (ACCEPT_EXISTING != do_check_existence)
1072 if (enable_print || stats)
1074 error(0, 0,
1075 _("`%s' is an slocate database. "
1076 "Turning on the '-e' option."),
1077 procdata.dbfile);
1079 do_check_existence = ACCEPT_EXISTING;
1082 add_visitor(visit_locate02_format, NULL);
1083 format_name = "slocate";
1084 procdata.slocatedb_format = 1;
1086 else
1088 int nread2;
1090 procdata.slocatedb_format = 0;
1091 nread2 = fread (procdata.original_filename+nread, 1, sizeof (LOCATEDB_MAGIC)-nread,
1092 procdata.fp);
1093 if (looking_at_gnu_locatedb(procdata.original_filename, nread+nread2))
1095 add_visitor(visit_locate02_format, NULL);
1096 format_name = "GNU LOCATE02";
1098 else /* Use the old format */
1100 int i;
1102 nread += nread2;
1103 /* Read the list of the most common bigrams in the database. */
1104 if (nread < 256)
1106 int more_read = fread (procdata.original_filename + nread, 1,
1107 256 - nread, procdata.fp);
1108 /* XXX: check more_read+nread! */
1111 for (i = 0; i < 128; i++)
1113 procdata.bigram1[i] = procdata.original_filename[i << 1];
1114 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
1116 format_name = "old";
1117 add_visitor(visit_old_format, NULL);
1121 if (basename_only)
1122 add_visitor(visit_basename, NULL);
1124 /* Add an inspector for each pattern we're looking for. */
1125 for ( argn = 0; argn < argc; argn++ )
1127 results_were_filtered = true;
1128 pathpart = argv[argn];
1129 if (regex)
1131 struct regular_expression *p = xmalloc(sizeof(*p));
1132 const char *error_message = NULL;
1134 memset (&p->regex, 0, sizeof (p->regex));
1136 re_set_syntax(regex_options);
1137 p->regex.allocated = 100;
1138 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
1139 p->regex.fastmap = NULL;
1140 p->regex.syntax = regex_options;
1141 p->regex.translate = NULL;
1143 error_message = re_compile_pattern (pathpart, strlen (pathpart),
1144 &p->regex);
1145 if (error_message)
1147 error (1, 0, "%s", error_message);
1149 else
1151 add_visitor(visit_regex, p);
1154 else if (contains_metacharacter(pathpart))
1156 if (ignore_case)
1157 add_visitor(visit_globmatch_casefold, pathpart);
1158 else
1159 add_visitor(visit_globmatch_nofold, pathpart);
1161 else
1163 /* No glob characters used. Hence we match on
1164 * _any part_ of the filename, not just the
1165 * basename. This seems odd to me, but it is the
1166 * traditional behaviour.
1167 * James Youngman <jay@gnu.org>
1169 if (ignore_case)
1170 add_visitor(visit_substring_match_casefold, pathpart);
1171 else
1172 add_visitor(visit_substring_match_nocasefold, pathpart);
1176 pvis = lastinspector;
1178 /* We add visit_existing_*() as late as possible to reduce the
1179 * number of stat() calls.
1181 switch (do_check_existence)
1183 case ACCEPT_EXISTING:
1184 results_were_filtered = true;
1185 if (follow_symlinks) /* -L, default */
1186 add_visitor(visit_existing_follow, NULL);
1187 else /* -P */
1188 add_visitor(visit_existing_nofollow, NULL);
1189 break;
1191 case ACCEPT_NON_EXISTING:
1192 results_were_filtered = true;
1193 if (follow_symlinks) /* -L, default */
1194 add_visitor(visit_non_existing_follow, NULL);
1195 else /* -P */
1196 add_visitor(visit_non_existing_nofollow, NULL);
1197 break;
1199 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1200 /* do nothing; no extra processing. */
1201 break;
1204 /* Security issue: The stats visitor must be added immediately
1205 * before the print visitor, because otherwise the -S option would
1206 * leak information about files that the caller cannot see.
1208 if (stats)
1209 add_visitor(visit_stats, &statistics);
1211 if (enable_print)
1213 if (print_quoted_filename)
1214 add_visitor(visit_justprint_quoted, NULL);
1215 else
1216 add_visitor(visit_justprint_unquoted, NULL);
1220 if (use_limit)
1221 add_visitor(visit_limit, plimit);
1222 else
1223 add_visitor(visit_count, plimit);
1226 if (argc > 1)
1228 past_pat_inspector = pvis->next;
1229 if (op_and)
1230 mainprocessor = process_and;
1231 else
1232 mainprocessor = process_or;
1234 else
1235 mainprocessor = process_simple;
1237 if (stats)
1239 printf(_("Database %s is in the %s format.\n"),
1240 procdata.dbfile,
1241 format_name);
1245 procdata.c = getc (procdata.fp);
1246 /* If we are searching for filename patterns, the inspector list
1247 * will contain an entry for each pattern for which we are searching.
1249 while ( (procdata.c != EOF) &&
1250 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1252 /* Do nothing; all the work is done in the visitor functions. */
1255 if (stats)
1257 if (filesize)
1258 print_stats(argc, filesize);
1261 if (ferror (procdata.fp))
1263 error (0, errno, "%s", procdata.dbfile);
1264 return 0;
1266 return plimit->items_accepted;
1272 extern char *version_string;
1274 /* The name this program was run with. */
1275 char *program_name;
1277 static void
1278 usage (FILE *stream)
1280 fprintf (stream, _("\
1281 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1282 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1283 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1284 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1285 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1286 [--max-database-age D] [--version] [--help]\n\
1287 pattern...\n"),
1288 program_name);
1289 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1291 enum
1293 REGEXTYPE_OPTION = CHAR_MAX + 1,
1294 MAX_DB_AGE
1298 static struct option const longopts[] =
1300 {"database", required_argument, NULL, 'd'},
1301 {"existing", no_argument, NULL, 'e'},
1302 {"non-existing", no_argument, NULL, 'E'},
1303 {"ignore-case", no_argument, NULL, 'i'},
1304 {"all", no_argument, NULL, 'A'},
1305 {"help", no_argument, NULL, 'h'},
1306 {"version", no_argument, NULL, 'v'},
1307 {"null", no_argument, NULL, '0'},
1308 {"count", no_argument, NULL, 'c'},
1309 {"wholename", no_argument, NULL, 'w'},
1310 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1311 {"basename", no_argument, NULL, 'b'},
1312 {"print", no_argument, NULL, 'p'},
1313 {"stdio", no_argument, NULL, 's'},
1314 {"mmap", no_argument, NULL, 'm'},
1315 {"limit", required_argument, NULL, 'l'},
1316 {"regex", no_argument, NULL, 'r'},
1317 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1318 {"statistics", no_argument, NULL, 'S'},
1319 {"follow", no_argument, NULL, 'L'},
1320 {"nofollow", no_argument, NULL, 'P'},
1321 {"max-database-age", required_argument, NULL, MAX_DB_AGE},
1322 {NULL, no_argument, NULL, 0}
1326 static int
1327 drop_privs(void)
1329 const char * what = "failed";
1330 const uid_t orig_euid = geteuid();
1331 const uid_t uid = getuid();
1332 const gid_t gid = getgid();
1334 /* Use of setgroups() is restrcted to root only. */
1335 if (0 == orig_euid)
1337 /* UID != 0, but EUID == 0. We're running setuid-root. */
1338 gid_t groups[1];
1339 groups[1] = getgid();
1340 if (0 != setgroups(1, groups))
1342 what = _("failed to drop group privileges");
1343 goto fail;
1347 /* Drop any setuid privileges */
1348 if (uid != orig_euid)
1350 if (0 == uid)
1352 /* We're really root anyway, but are setuid to something else. Leave it. */
1354 else
1356 errno = 0;
1357 if (0 != setuid(getuid()))
1359 what = _("failed to drop setuid privileges");
1360 goto fail;
1363 /* Defend against the case where the attacker runs us with the
1364 * capability to call setuid() turned off, which on some systems
1365 * will cause the above attempt to drop privileges fail (leaving us
1366 * privileged).
1368 else
1370 /* Check that we can no longer switch bask to root */
1371 if (0 == setuid(0))
1373 what = _("Failed to fully drop privileges");
1374 /* The errno value here is not interesting (since
1375 * the system call we are complaining about
1376 * succeeded when we wanted it to fail). Arrange
1377 * for the call to error() not to print the errno
1378 * value by setting errno=0.
1380 errno = 0;
1381 goto fail;
1387 /* Drop any setgid privileges */
1388 errno = 0;
1389 if (0 != setgid(gid))
1391 what = _("failed to drop setgid privileges");
1392 goto fail;
1395 /* success. */
1396 return 0;
1398 fail:
1399 error(1, errno, "%s", what);
1400 abort();
1401 kill(0, SIGKILL);
1402 _exit(1);
1403 /*NOTREACHED*/
1404 /* ... we hope. */
1405 for (;;)
1407 /* deliberate infinite loop */
1411 static int
1412 opendb(const char *name)
1414 int fd = open(name, O_RDONLY
1415 #if defined(O_LARGEFILE)
1416 |O_LARGEFILE
1417 #endif
1419 if (fd >= 0)
1421 /* Make sure it won't survive an exec */
1422 if (0 != fcntl(fd, F_SETFD, FD_CLOEXEC))
1424 close(fd);
1425 fd = -1;
1428 return fd;
1432 dolocate (int argc, char **argv, int secure_db_fd)
1434 char *dbpath;
1435 unsigned long int found = 0uL;
1436 int optc;
1437 int ignore_case = 0;
1438 int print = 0;
1439 int just_count = 0;
1440 int basename_only = 0;
1441 int use_limit = 0;
1442 int regex = 0;
1443 int regex_options = RE_SYNTAX_EMACS;
1444 int stats = 0;
1445 int op_and = 0;
1446 const char *e;
1447 FILE *fp;
1448 int they_chose_db = 0;
1449 bool did_stdin = false; /* Set to prevent rereading stdin. */
1451 program_name = argv[0];
1453 #ifdef HAVE_SETLOCALE
1454 setlocale (LC_ALL, "");
1455 #endif
1456 bindtextdomain (PACKAGE, LOCALEDIR);
1457 textdomain (PACKAGE);
1458 atexit (close_stdout);
1460 limits.limit = 0;
1461 limits.items_accepted = 0;
1463 quote_opts = clone_quoting_options (NULL);
1464 print_quoted_filename = true;
1466 /* We cannot simultaneously trust $LOCATE_PATH and use the
1467 * setuid-access-controlled database,, since that could cause a leak
1468 * of private data.
1470 dbpath = getenv ("LOCATE_PATH");
1471 if (dbpath)
1473 they_chose_db = 1;
1476 check_existence = ACCEPT_EITHER;
1478 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1479 switch (optc)
1481 case '0':
1482 separator = 0;
1483 print_quoted_filename = false; /* print filename 'raw'. */
1484 break;
1486 case 'A':
1487 op_and = 1;
1488 break;
1490 case 'b':
1491 basename_only = 1;
1492 break;
1494 case 'c':
1495 just_count = 1;
1496 break;
1498 case 'd':
1499 dbpath = optarg;
1500 they_chose_db = 1;
1501 break;
1503 case 'e':
1504 check_existence = ACCEPT_EXISTING;
1505 break;
1507 case 'E':
1508 check_existence = ACCEPT_NON_EXISTING;
1509 break;
1511 case 'i':
1512 ignore_case = 1;
1513 break;
1515 case 'h':
1516 usage (stdout);
1517 return 0;
1519 case MAX_DB_AGE:
1520 /* XXX: nothing in the test suite for this option. */
1521 set_max_db_age(optarg);
1522 break;
1524 case 'p':
1525 print = 1;
1526 break;
1528 case 'v':
1529 printf (_("GNU locate version %s\n"), version_string);
1530 printf (_("Built using GNU gnulib version %s\n"), gnulib_version);
1531 return 0;
1533 case 'w':
1534 basename_only = 0;
1535 break;
1537 case 'r':
1538 regex = 1;
1539 break;
1541 case REGEXTYPE_OPTION:
1542 regex_options = get_regex_type(optarg);
1543 break;
1545 case 'S':
1546 stats = 1;
1547 break;
1549 case 'L':
1550 follow_symlinks = 1;
1551 break;
1553 /* In find, -P and -H differ in the way they handle paths
1554 * given on the command line. This is not relevant for
1555 * locate, but the -H option is supported because it is
1556 * probably more intuitive to do so.
1558 case 'P':
1559 case 'H':
1560 follow_symlinks = 0;
1561 break;
1563 case 'l':
1565 char *end = optarg;
1566 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1567 if (LONGINT_OK != err)
1569 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1571 use_limit = 1;
1573 break;
1575 case 's': /* use stdio */
1576 case 'm': /* use mmap */
1577 /* These options are implemented simply for
1578 * compatibility with FreeBSD
1580 break;
1582 default:
1583 usage (stderr);
1584 return 1;
1588 /* If the user gave the -d option or set LOCATE_PATH,
1589 * relinquish access to the secure database.
1591 if (they_chose_db)
1593 if (secure_db_fd >= 0)
1595 close(secure_db_fd);
1596 secure_db_fd = -1;
1600 if (!just_count && !stats)
1601 print = 1;
1603 if (stats)
1605 if (optind == argc)
1606 use_limit = 0;
1608 else
1610 if (!just_count && optind == argc)
1612 usage (stderr);
1613 return 1;
1618 if (1 == isatty(STDOUT_FILENO))
1619 stdout_is_a_tty = true;
1620 else
1621 stdout_is_a_tty = false;
1623 if (they_chose_db)
1624 next_element (dbpath, 0); /* Initialize. */
1626 /* Bail out early if limit already reached. */
1627 while (!use_limit || limits.limit > limits.items_accepted)
1629 struct stat st;
1630 int fd;
1631 off_t filesize;
1633 statistics.compressed_bytes =
1634 statistics.total_filename_count =
1635 statistics.total_filename_length =
1636 statistics.whitespace_count =
1637 statistics.newline_count =
1638 statistics.highbit_filename_count = 0u;
1640 if (they_chose_db)
1642 /* Take the next element from the list of databases */
1643 e = next_element ((char *) NULL, 0);
1644 if (NULL == e)
1645 break;
1647 if (0 == strcmp (e, "-"))
1649 if (did_stdin)
1651 error (0, 0,
1652 _("warning: the locate database can only be read from stdin once."));
1653 return 0;
1655 else
1657 e = "<stdin>";
1658 fd = 0;
1659 did_stdin = true;
1662 else
1664 if (0 == strlen(e) || 0 == strcmp(e, "."))
1666 e = LOCATE_DB;
1669 /* open the database */
1670 fd = opendb(e);
1671 if (fd < 0)
1673 error (0, errno, "%s", e);
1674 return 0;
1678 else
1680 if (-1 == secure_db_fd)
1682 /* Already searched the database, it's time to exit the loop */
1683 break;
1685 else
1687 e = selected_secure_db;
1688 fd = secure_db_fd;
1689 secure_db_fd = -1;
1693 /* Check the database to see if it is old. */
1694 if (fstat(fd, &st))
1696 error (0, errno, "%s", e);
1697 /* continue anyway */
1698 filesize = (off_t)0;
1700 else
1702 time_t now;
1704 filesize = st.st_size;
1706 if ((time_t)-1 == time(&now))
1708 /* If we can't tell the time, we don't know how old the
1709 * database is. But since the message is just advisory,
1710 * we continue anyway.
1712 error (0, errno, "time system call");
1714 else
1716 double age = difftime(now, st.st_mtime);
1717 double warn_seconds = SECONDS_PER_UNIT * warn_number_units;
1718 if (age > warn_seconds)
1720 /* For example:
1721 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1722 error (0, 0,
1723 _("warning: database `%s' is more than %d %s old (actual age is %.1f %s)"),
1725 warn_number_units, _(warn_name_units),
1726 (age/(double)SECONDS_PER_UNIT), _(warn_name_units));
1731 fp = fdopen(fd, "r");
1732 if (NULL == fp)
1734 error (0, errno, "%s", e);
1735 return 0;
1738 /* Search this database for all patterns simultaneously */
1739 found = search_one_database (argc - optind, &argv[optind],
1740 e, fp, filesize,
1741 ignore_case, print, basename_only,
1742 use_limit, &limits, stats,
1743 op_and, regex, regex_options);
1745 /* Close the databsase (even if it is stdin) */
1746 if (fclose (fp) == EOF)
1748 error (0, errno, "%s", e);
1749 return 0;
1753 if (just_count)
1755 printf("%ld\n", found);
1758 if (found || (use_limit && (limits.limit==0)) || stats )
1759 return 0;
1760 else
1761 return 1;
1764 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1765 static int
1766 open_secure_db(void)
1768 int fd, i;
1770 const char * secure_db_list[] =
1772 LOCATE_DB,
1773 "/var/lib/slocate/slocate.db",
1774 NULL
1776 for (i=0; secure_db_list[i]; ++i)
1778 fd = opendb(secure_db_list[i]);
1779 if (fd >= 0)
1781 selected_secure_db = secure_db_list[i];
1782 return fd;
1785 return -1;
1789 main (int argc, char **argv)
1791 int dbfd = open_secure_db();
1792 drop_privs();
1794 return dolocate(argc, argv, dbfd);