did make dist
[findutils.git] / locate / locate.c
blobe32521918f43a87a8929a6f967b753cee4417ba5
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <signal.h>
66 #include <ctype.h>
67 #include <sys/types.h>
68 #include <sys/stat.h>
69 #include <time.h>
70 #include <fnmatch.h>
71 #include <getopt.h>
72 #include <xstrtol.h>
74 /* The presence of unistd.h is assumed by gnulib these days, so we
75 * might as well assume it too.
77 /* We need <unistd.h> for isatty(). */
78 #include <unistd.h>
80 #if HAVE_FCNTL_H
81 /* We use fcntl() */
82 #include <fcntl.h>
83 #endif
85 #define NDEBUG
86 #include <assert.h>
88 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
89 #include <string.h>
90 #else
91 #include <strings.h>
92 #define strchr index
93 #endif
95 #ifdef STDC_HEADERS
96 #include <stdlib.h>
97 #endif
99 #ifdef HAVE_ERRNO_H
100 #include <errno.h>
101 #else
102 extern int errno;
103 #endif
105 #ifdef HAVE_LOCALE_H
106 #include <locale.h>
107 #endif
109 #if ENABLE_NLS
110 # include <libintl.h>
111 # define _(Text) gettext (Text)
112 #else
113 # define _(Text) Text
114 #define textdomain(Domain)
115 #define bindtextdomain(Package, Directory)
116 #endif
117 #ifdef gettext_noop
118 # define N_(String) gettext_noop (String)
119 #else
120 /* We used to use (String) instead of just String, but apparentl;y ISO C
121 * doesn't allow this (at least, that's what HP said when someone reported
122 * this as a compiler bug). This is HP case number 1205608192. See
123 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
124 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
125 * like: static const char buf[] = ("string");
127 # define N_(String) String
128 #endif
130 #include "locatedb.h"
131 #include <getline.h>
132 #include "../gnulib/lib/xalloc.h"
133 #include "../gnulib/lib/error.h"
134 #include "../gnulib/lib/human.h"
135 #include "dirname.h"
136 #include "closeout.h"
137 #include "nextelem.h"
138 #include "regex.h"
139 #include "quote.h"
140 #include "quotearg.h"
141 #include "printquoted.h"
142 #include "regextype.h"
145 /* Note that this evaluates C many times. */
146 #ifdef _LIBC
147 # define TOUPPER(Ch) toupper (Ch)
148 # define TOLOWER(Ch) tolower (Ch)
149 #else
150 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
151 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
152 #endif
154 /* typedef enum {false, true} boolean; */
156 /* Warn if a database is older than this. 8 days allows for a weekly
157 update that takes up to a day to perform. */
158 #define WARN_NUMBER_UNITS (8)
159 /* Printable name of units used in WARN_SECONDS */
160 static const char warn_name_units[] = N_("days");
161 #define SECONDS_PER_UNIT (60 * 60 * 24)
163 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
165 enum visit_result
167 VISIT_CONTINUE = 1, /* please call the next visitor */
168 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
169 VISIT_REJECTED = 4, /* rejected, process next file. */
170 VISIT_ABORT = 8 /* rejected, process no more files. */
173 enum ExistenceCheckType
175 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
176 ACCEPT_EXISTING, /* Corresponds to option -e */
177 ACCEPT_NON_EXISTING /* Corresponds to option -E */
180 /* Check for existence of files before printing them out? */
181 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
183 static int follow_symlinks = 1;
185 /* What to separate the results with. */
186 static int separator = '\n';
188 static struct quoting_options * quote_opts = NULL;
189 static bool stdout_is_a_tty;
190 static bool print_quoted_filename;
191 static bool results_were_filtered;
193 static char* slocate_db_pathname = "/var/lib/slocate/slocate.db";
195 static const char *selected_secure_db = NULL;
198 /* Read in a 16-bit int, high byte first (network byte order). */
200 static short
201 get_short (FILE *fp)
204 register short x;
206 x = (signed char) fgetc (fp) << 8;
207 x |= (fgetc (fp) & 0xff);
208 return x;
211 const char * const metacharacters = "*?[]\\";
213 /* Return nonzero if S contains any shell glob characters.
215 static int
216 contains_metacharacter(const char *s)
218 if (NULL == strpbrk(s, metacharacters))
219 return 0;
220 else
221 return 1;
224 /* locate_read_str()
226 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
227 * until we reach DELIMITER or end-of-file. We reallocate the buffer
228 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
229 * is made regarding the content of the data (i.e. the implementation is
230 * 8-bit clean, the only delimiter is DELIMITER).
232 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
233 * has been removed from gnulib.
235 * We call the function locate_read_str() to avoid a name clash with the curses
236 * function getstr().
238 static int
239 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
241 char * p = NULL;
242 size_t sz = 0;
243 int nread;
244 size_t needed;
246 nread = getdelim(&p, &sz, delimiter, fp);
247 if (nread >= 0)
249 assert(p != NULL);
251 needed = offs + nread + 1u;
252 if (needed > (*siz))
254 char *pnew = realloc(*buf, needed);
255 if (NULL == pnew)
257 return -1; /* FAIL */
259 else
261 *siz = needed;
262 *buf = pnew;
265 memcpy((*buf)+offs, p, nread);
266 free(p);
268 return nread;
272 static void
273 lc_strcpy(char *dest, const char *src)
275 while (*src)
277 *dest++ = TOLOWER(*src);
278 ++src;
280 *dest = 0;
283 struct locate_limits
285 uintmax_t limit;
286 uintmax_t items_accepted;
288 static struct locate_limits limits;
291 struct locate_stats
293 uintmax_t compressed_bytes;
294 uintmax_t total_filename_count;
295 uintmax_t total_filename_length;
296 uintmax_t whitespace_count;
297 uintmax_t newline_count;
298 uintmax_t highbit_filename_count;
300 static struct locate_stats statistics;
303 struct stringbuf
305 char *buffer;
306 size_t buffersize;
307 size_t *preqlen;
309 static struct stringbuf casebuf;
312 struct casefolder
314 const char *pattern;
315 struct stringbuf *pbuf;
318 struct regular_expression
320 struct re_pattern_buffer regex; /* for --regex */
324 struct process_data
326 int c; /* An input byte. */
327 char itemcount; /* Indicates we're at the beginning of an slocate db. */
328 int count; /* The length of the prefix shared with the previous database entry. */
329 int len;
330 char *original_filename; /* The current input database entry. */
331 size_t pathsize; /* Amount allocated for it. */
332 char *munged_filename; /* path or base_name(path) */
333 FILE *fp; /* The pathname database. */
334 const char *dbfile; /* Its name, or "<stdin>" */
335 int slocatedb_format; /* Allows us to cope with slocate's format variant */
336 /* for the old database format,
337 the first and second characters of the most common bigrams. */
338 char bigram1[128];
339 char bigram2[128];
343 typedef int (*visitfunc)(struct process_data *procdata,
344 void *context);
346 struct visitor
348 visitfunc inspector;
349 void * context;
350 struct visitor *next;
354 static struct visitor *inspectors = NULL;
355 static struct visitor *lastinspector = NULL;
356 static struct visitor *past_pat_inspector = NULL;
358 /* 0 or 1 pattern(s) */
359 static int
360 process_simple(struct process_data *procdata)
362 int result = VISIT_CONTINUE;
363 const struct visitor *p = inspectors;
365 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
367 result = (p->inspector)(procdata, p->context);
368 p = p->next;
371 return result;
374 /* Accept if any pattern matches. */
375 static int
376 process_or (struct process_data *procdata)
378 int result = VISIT_CONTINUE;
379 const struct visitor *p = inspectors;
381 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
383 result = (p->inspector)(procdata, p->context);
384 p = p->next;
387 if (result == VISIT_CONTINUE)
388 result = VISIT_REJECTED;
389 if (result & (VISIT_ABORT | VISIT_REJECTED))
390 return result;
392 p = past_pat_inspector;
393 result = VISIT_CONTINUE;
395 while ( (VISIT_CONTINUE == result) && (NULL != p) )
397 result = (p->inspector)(procdata, p->context);
398 p = p->next;
401 if (VISIT_CONTINUE == result)
402 return VISIT_ACCEPTED;
403 else
404 return result;
407 /* Accept if all pattern match. */
408 static int
409 process_and (struct process_data *procdata)
411 int result = VISIT_CONTINUE;
412 const struct visitor *p = inspectors;
414 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
416 result = (p->inspector)(procdata, p->context);
417 p = p->next;
420 if (result == VISIT_CONTINUE)
421 result = VISIT_REJECTED;
422 if (result & (VISIT_ABORT | VISIT_REJECTED))
423 return result;
425 p = past_pat_inspector;
426 result = VISIT_CONTINUE;
428 while ( (VISIT_CONTINUE == result) && (NULL != p) )
430 result = (p->inspector)(procdata, p->context);
431 p = p->next;
434 if (VISIT_CONTINUE == result)
435 return VISIT_ACCEPTED;
436 else
437 return result;
440 typedef int (*processfunc)(struct process_data *procdata);
442 static processfunc mainprocessor = NULL;
444 static void
445 add_visitor(visitfunc fn, void *context)
447 struct visitor *p = xmalloc(sizeof(struct visitor));
448 p->inspector = fn;
449 p->context = context;
450 p->next = NULL;
452 if (NULL == lastinspector)
454 lastinspector = inspectors = p;
456 else
458 lastinspector->next = p;
459 lastinspector = p;
465 static int
466 visit_justprint_quoted(struct process_data *procdata, void *context)
468 (void) context;
469 print_quoted (stdout, quote_opts, stdout_is_a_tty,
470 "%s",
471 procdata->original_filename);
472 putchar(separator);
473 return VISIT_CONTINUE;
476 static int
477 visit_justprint_unquoted(struct process_data *procdata, void *context)
479 (void) context;
480 fputs(procdata->original_filename, stdout);
481 putchar(separator);
482 return VISIT_CONTINUE;
485 static int
486 visit_old_format(struct process_data *procdata, void *context)
488 register char *s;
489 (void) context;
491 /* Get the offset in the path where this path info starts. */
492 if (procdata->c == LOCATEDB_OLD_ESCAPE)
493 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
494 else
495 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
497 /* Overlay the old path with the remainder of the new. */
498 for (s = procdata->original_filename + procdata->count;
499 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
500 if (procdata->c < 0200)
501 *s++ = procdata->c; /* An ordinary character. */
502 else
504 /* Bigram markers have the high bit set. */
505 procdata->c &= 0177;
506 *s++ = procdata->bigram1[procdata->c];
507 *s++ = procdata->bigram2[procdata->c];
509 *s-- = '\0';
511 procdata->munged_filename = procdata->original_filename;
513 return VISIT_CONTINUE;
517 static int
518 visit_locate02_format(struct process_data *procdata, void *context)
520 register char *s;
521 int nread;
522 (void) context;
524 if (procdata->slocatedb_format)
526 if (procdata->itemcount == 0)
528 ungetc(procdata->c, procdata->fp);
529 procdata->count = 0;
530 procdata->len = 0;
532 else if (procdata->itemcount == 1)
534 procdata->count = procdata->len-1;
536 else
538 if (procdata->c == LOCATEDB_ESCAPE)
539 procdata->count += (short)get_short (procdata->fp);
540 else if (procdata->c > 127)
541 procdata->count += procdata->c - 256;
542 else
543 procdata->count += procdata->c;
546 else
548 if (procdata->c == LOCATEDB_ESCAPE)
549 procdata->count += (short)get_short (procdata->fp);
550 else if (procdata->c > 127)
551 procdata->count += procdata->c - 256;
552 else
553 procdata->count += procdata->c;
556 if (procdata->count > procdata->len || procdata->count < 0)
558 /* This should not happen generally , but since we're
559 * reading in data which is outside our control, we
560 * cannot prevent it.
562 error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
565 /* Overlay the old path with the remainder of the new. */
566 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
567 procdata->fp, 0, procdata->count);
568 if (nread < 0)
569 return VISIT_ABORT;
570 procdata->c = getc (procdata->fp);
571 procdata->len = procdata->count + nread;
572 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
573 assert (s[0] != '\0');
574 assert (s[1] == '\0'); /* Our terminator. */
575 assert (s[2] == '\0'); /* Added by locate_read_str. */
577 procdata->munged_filename = procdata->original_filename;
579 if (procdata->slocatedb_format)
581 /* Don't increment indefinitely, it might overflow. */
582 if (procdata->itemcount < 6)
584 ++(procdata->itemcount);
589 return VISIT_CONTINUE;
592 static int
593 visit_basename(struct process_data *procdata, void *context)
595 (void) context;
596 procdata->munged_filename = base_name(procdata->original_filename);
598 return VISIT_CONTINUE;
602 static int
603 visit_casefold(struct process_data *procdata, void *context)
605 struct stringbuf *b = context;
607 if (*b->preqlen+1 > b->buffersize)
609 b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
610 b->buffersize = *b->preqlen+1;
612 lc_strcpy(b->buffer, procdata->munged_filename);
614 return VISIT_CONTINUE;
617 /* visit_existing_follow implements -L -e */
618 static int
619 visit_existing_follow(struct process_data *procdata, void *context)
621 struct stat st;
622 (void) context;
624 /* munged_filename has been converted in some way (to lower case,
625 * or is just the base name of the file), and original_filename has not.
626 * Hence only original_filename is still actually the name of the file
627 * whose existence we would need to check.
629 if (stat(procdata->original_filename, &st) != 0)
631 return VISIT_REJECTED;
633 else
635 return VISIT_CONTINUE;
639 /* visit_non_existing_follow implements -L -E */
640 static int
641 visit_non_existing_follow(struct process_data *procdata, void *context)
643 struct stat st;
644 (void) context;
646 /* munged_filename has been converted in some way (to lower case,
647 * or is just the base name of the file), and original_filename has not.
648 * Hence only original_filename is still actually the name of the file
649 * whose existence we would need to check.
651 if (stat(procdata->original_filename, &st) == 0)
653 return VISIT_REJECTED;
655 else
657 return VISIT_CONTINUE;
661 /* visit_existing_nofollow implements -P -e */
662 static int
663 visit_existing_nofollow(struct process_data *procdata, void *context)
665 struct stat st;
666 (void) context;
668 /* munged_filename has been converted in some way (to lower case,
669 * or is just the base name of the file), and original_filename has not.
670 * Hence only original_filename is still actually the name of the file
671 * whose existence we would need to check.
673 if (lstat(procdata->original_filename, &st) != 0)
675 return VISIT_REJECTED;
677 else
679 return VISIT_CONTINUE;
683 /* visit_non_existing_nofollow implements -P -E */
684 static int
685 visit_non_existing_nofollow(struct process_data *procdata, void *context)
687 struct stat st;
688 (void) context;
690 /* munged_filename has been converted in some way (to lower case,
691 * or is just the base name of the file), and original_filename has not.
692 * Hence only original_filename is still actually the name of the file
693 * whose existence we would need to check.
695 if (lstat(procdata->original_filename, &st) == 0)
697 return VISIT_REJECTED;
699 else
701 return VISIT_CONTINUE;
705 static int
706 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
708 const char *pattern = context;
710 if (NULL != strstr(procdata->munged_filename, pattern))
711 return VISIT_ACCEPTED;
712 else
713 return VISIT_REJECTED;
716 static int
717 visit_substring_match_casefold(struct process_data *procdata, void *context)
719 const struct casefolder * p = context;
720 const struct stringbuf * b = p->pbuf;
721 (void) procdata;
723 if (NULL != strstr(b->buffer, p->pattern))
724 return VISIT_ACCEPTED;
725 else
726 return VISIT_REJECTED;
730 static int
731 visit_globmatch_nofold(struct process_data *procdata, void *context)
733 const char *glob = context;
734 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
735 return VISIT_REJECTED;
736 else
737 return VISIT_ACCEPTED;
741 static int
742 visit_globmatch_casefold(struct process_data *procdata, void *context)
744 const char *glob = context;
745 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
746 return VISIT_REJECTED;
747 else
748 return VISIT_ACCEPTED;
752 static int
753 visit_regex(struct process_data *procdata, void *context)
755 struct regular_expression *p = context;
756 const size_t len = strlen(procdata->munged_filename);
758 int rv = re_search (&p->regex, procdata->munged_filename,
759 len, 0, len,
760 (struct re_registers *) NULL);
761 if (rv < 0)
763 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
765 else
767 return VISIT_ACCEPTED; /* match */
772 static int
773 visit_stats(struct process_data *procdata, void *context)
775 struct locate_stats *p = context;
776 size_t len = strlen(procdata->original_filename);
777 const char *s;
778 int highbit, whitespace, newline;
780 ++(p->total_filename_count);
781 p->total_filename_length += len;
783 highbit = whitespace = newline = 0;
784 for (s=procdata->original_filename; *s; ++s)
786 if ( (int)(*s) & 128 )
787 highbit = 1;
788 if ('\n' == *s)
790 newline = whitespace = 1;
792 else if (isspace((unsigned char)*s))
794 whitespace = 1;
798 if (highbit)
799 ++(p->highbit_filename_count);
800 if (whitespace)
801 ++(p->whitespace_count);
802 if (newline)
803 ++(p->newline_count);
805 return VISIT_CONTINUE;
809 static int
810 visit_limit(struct process_data *procdata, void *context)
812 struct locate_limits *p = context;
814 (void) procdata;
816 if (++p->items_accepted >= p->limit)
817 return VISIT_ABORT;
818 else
819 return VISIT_CONTINUE;
822 static int
823 visit_count(struct process_data *procdata, void *context)
825 struct locate_limits *p = context;
827 (void) procdata;
829 ++p->items_accepted;
830 return VISIT_CONTINUE;
833 /* Emit the statistics.
835 static void
836 print_stats(int argc, size_t database_file_size)
838 char hbuf[LONGEST_HUMAN_READABLE + 1];
840 printf(_("Locate database size: %s bytes\n"),
841 human_readable ((uintmax_t) database_file_size,
842 hbuf, human_ceiling, 1, 1));
844 printf( (results_were_filtered ?
845 _("Matching Filenames: %s ") :
846 _("All Filenames: %s ")),
847 human_readable (statistics.total_filename_count,
848 hbuf, human_ceiling, 1, 1));
849 printf(_("with a cumulative length of %s bytes"),
850 human_readable (statistics.total_filename_length,
851 hbuf, human_ceiling, 1, 1));
853 printf(_("\n\tof which %s contain whitespace, "),
854 human_readable (statistics.whitespace_count,
855 hbuf, human_ceiling, 1, 1));
856 printf(_("\n\t%s contain newline characters, "),
857 human_readable (statistics.newline_count,
858 hbuf, human_ceiling, 1, 1));
859 printf(_("\n\tand %s contain characters with the high bit set.\n"),
860 human_readable (statistics.highbit_filename_count,
861 hbuf, human_ceiling, 1, 1));
863 if (!argc)
865 if (results_were_filtered)
867 printf(_("Some filenames may have been filtered out, "
868 "so we cannot compute the compression ratio.\n"));
870 else
872 if (statistics.total_filename_length)
874 printf(_("Compression ratio %4.2f%%\n"),
875 100.0 * ((double)statistics.total_filename_length
876 - (double) database_file_size)
877 / (double) statistics.total_filename_length);
879 else
881 printf(_("Compression ratio is undefined\n"));
885 printf("\n");
889 * Return nonzero if the data we read in indicates that we are
890 * looking at a LOCATE02 locate database.
892 static int
893 looking_at_gnu_locatedb (const char *data, size_t len)
895 if (len < sizeof (LOCATEDB_MAGIC))
896 return 0;
897 else if (0 == memcmp (data, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
898 return 1; /* We saw the magic byte sequence */
899 else
900 return 0;
904 * Return nonzero if the data we read in indicates that we are
905 * looking at an slocate database.
907 static int
908 looking_at_slocate_locatedb (const char *filename,
909 const char *data,
910 size_t len,
911 int *seclevel)
913 char slocate_magic[] = "1";
914 size_t lenwanted = sizeof(slocate_magic);
915 assert(len <= 2);
917 if (len < 2)
919 return 0;
921 else
923 /* Check that the magic number is a one-byte string */
924 if (0 == data[1])
926 if (isdigit((unsigned char)data[0]))
928 /* looks promising. */
929 *seclevel = (data[0] - '0');
931 if (*seclevel > 1)
933 /* Hmm, well it's probably an slocate database
934 * of some awsomely huge security level, like 2.
935 * We don't know how to handle those.
937 error(0, 0,
938 _("locate database `%s' looks like an slocate "
939 "database but it seems to have security level %c, "
940 "which GNU findutils does not currently support"),
941 filename, data[1]);
942 return 1;
944 else
946 return 1;
950 else
952 /* Definitely not slocate. */
953 return 0;
958 /* Print or count the entries in DBFILE that match shell globbing patterns in
959 ARGV. Return the number of entries matched. */
961 static unsigned long
962 search_one_database (int argc,
963 char **argv,
964 const char *dbfile,
965 FILE *fp,
966 off_t filesize,
967 int ignore_case,
968 int enable_print,
969 int basename_only,
970 int use_limit,
971 struct locate_limits *plimit,
972 int stats,
973 int op_and,
974 int regex,
975 int regex_options)
977 char *pathpart; /* A pattern to consider. */
978 int argn; /* Index to current pattern in argv. */
979 int need_fold; /* Set when folding and any pattern is non-glob. */
980 int nread; /* number of bytes read from an entry. */
981 struct process_data procdata; /* Storage for data shared with visitors. */
982 int slocate_seclevel;
983 struct visitor* pvis; /* temp for determining past_pat_inspector. */
984 const char *format_name;
985 enum ExistenceCheckType do_check_existence;
988 /* We may turn on existence checking for a given database.
989 * We ensure that we can return to the previous behaviour
990 * by using two variables, do_check_existence (which we act on)
991 * and check_existence (whcih indicates the default before we
992 * adjust it on the bassis of what kind of database we;re using
994 do_check_existence = check_existence;
997 if (ignore_case)
998 regex_options |= RE_ICASE;
1000 procdata.len = procdata.count = 0;
1001 procdata.slocatedb_format = 0;
1002 procdata.itemcount = 0;
1004 procdata.dbfile = dbfile;
1005 procdata.fp = fp;
1007 /* Set up the inspection regime */
1008 inspectors = NULL;
1009 lastinspector = NULL;
1010 past_pat_inspector = NULL;
1011 results_were_filtered = false;
1013 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
1014 procdata.original_filename = xmalloc (procdata.pathsize);
1017 nread = fread (procdata.original_filename, 1, SLOCATE_DB_MAGIC_LEN,
1018 procdata.fp);
1019 if (looking_at_slocate_locatedb(procdata.dbfile,
1020 procdata.original_filename,
1021 nread,
1022 &slocate_seclevel))
1024 error(0, 0,
1025 _("`%s' is an slocate database. "
1026 "Support for these is new, expect problems for now "
1027 "(you are, after all, using the CVS code)."),
1028 procdata.dbfile);
1030 /* slocate also uses frcode, but with a different header.
1031 * We handle the header here and then work with the data
1032 * in the normal way.
1034 if (slocate_seclevel > 1)
1036 /* We don't know what those security levels mean,
1037 * so do nothing further
1039 return 0;
1041 else if (slocate_seclevel > 0)
1043 /* Don't show the filenames to the user if they don't exist.
1044 * Showing stats is safe since filenames are only counted
1045 * after the existence check
1047 if (ACCEPT_NON_EXISTING == check_existence)
1049 /* Do not allow the user to see a list of filenames that they
1050 * cannot stat().
1052 error(0, 0,
1053 _("You specified the -E option, but that option "
1054 "cannot be used with slocate-format databases "
1055 "with a non-zero security level. No results will be "
1056 "generated for this database.\n"));
1057 return 0;
1059 if (ACCEPT_EXISTING != do_check_existence)
1061 if (enable_print || stats)
1063 error(0, 0,
1064 _("`%s' is an slocate database. "
1065 "Turning on the '-e' option."),
1066 procdata.dbfile);
1068 do_check_existence = ACCEPT_EXISTING;
1071 add_visitor(visit_locate02_format, NULL);
1072 format_name = "slocate";
1073 procdata.slocatedb_format = 1;
1075 else
1077 int nread2;
1079 procdata.slocatedb_format = 0;
1080 nread2 = fread (procdata.original_filename+nread, 1, sizeof (LOCATEDB_MAGIC)-nread,
1081 procdata.fp);
1082 if (looking_at_gnu_locatedb(procdata.original_filename, nread+nread2))
1084 add_visitor(visit_locate02_format, NULL);
1085 format_name = "GNU LOCATE02";
1087 else /* Use the old format */
1089 int i;
1091 nread += nread2;
1092 /* Read the list of the most common bigrams in the database. */
1093 if (nread < 256)
1095 int more_read = fread (procdata.original_filename + nread, 1,
1096 256 - nread, procdata.fp);
1097 /* XXX: check more_read+nread! */
1100 for (i = 0; i < 128; i++)
1102 procdata.bigram1[i] = procdata.original_filename[i << 1];
1103 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
1105 format_name = "old";
1106 add_visitor(visit_old_format, NULL);
1110 if (basename_only)
1111 add_visitor(visit_basename, NULL);
1113 /* See if we need fold. */
1114 if (ignore_case && !regex)
1115 for ( argn = 0; argn < argc; argn++ )
1117 pathpart = argv[argn];
1118 if (!contains_metacharacter(pathpart))
1120 need_fold = 1;
1121 break;
1125 if (need_fold)
1127 add_visitor(visit_casefold, &casebuf);
1128 casebuf.preqlen = &procdata.pathsize;
1131 /* Add an inspector for each pattern we're looking for. */
1132 for ( argn = 0; argn < argc; argn++ )
1134 results_were_filtered = true;
1135 pathpart = argv[argn];
1136 if (regex)
1138 struct regular_expression *p = xmalloc(sizeof(*p));
1139 const char *error_message = NULL;
1141 memset (&p->regex, 0, sizeof (p->regex));
1143 re_set_syntax(regex_options);
1144 p->regex.allocated = 100;
1145 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
1146 p->regex.fastmap = NULL;
1147 p->regex.syntax = regex_options;
1148 p->regex.translate = NULL;
1150 error_message = re_compile_pattern (pathpart, strlen (pathpart),
1151 &p->regex);
1152 if (error_message)
1154 error (1, 0, "%s", error_message);
1156 else
1158 add_visitor(visit_regex, p);
1161 else if (contains_metacharacter(pathpart))
1163 if (ignore_case)
1164 add_visitor(visit_globmatch_casefold, pathpart);
1165 else
1166 add_visitor(visit_globmatch_nofold, pathpart);
1168 else
1170 /* No glob characters used. Hence we match on
1171 * _any part_ of the filename, not just the
1172 * basename. This seems odd to me, but it is the
1173 * traditional behaviour.
1174 * James Youngman <jay@gnu.org>
1176 if (ignore_case)
1178 struct casefolder * cf = xmalloc(sizeof(*cf));
1179 cf->pattern = pathpart;
1180 cf->pbuf = &casebuf;
1181 add_visitor(visit_substring_match_casefold, cf);
1182 /* If we ignore case, convert it to lower now so we don't have to
1183 * do it every time
1185 lc_strcpy(pathpart, pathpart);
1187 else
1189 add_visitor(visit_substring_match_nocasefold, pathpart);
1194 pvis = lastinspector;
1196 /* We add visit_existing_*() as late as possible to reduce the
1197 * number of stat() calls.
1199 switch (do_check_existence)
1201 case ACCEPT_EXISTING:
1202 results_were_filtered = true;
1203 if (follow_symlinks) /* -L, default */
1204 add_visitor(visit_existing_follow, NULL);
1205 else /* -P */
1206 add_visitor(visit_existing_nofollow, NULL);
1207 break;
1209 case ACCEPT_NON_EXISTING:
1210 results_were_filtered = true;
1211 if (follow_symlinks) /* -L, default */
1212 add_visitor(visit_non_existing_follow, NULL);
1213 else /* -P */
1214 add_visitor(visit_non_existing_nofollow, NULL);
1215 break;
1217 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1218 /* do nothing; no extra processing. */
1219 break;
1222 /* Security issue: The stats visitor must be added immediately
1223 * before the print visitor, because otherwise the -S option would
1224 * leak information about files that the caller cannot see.
1226 if (stats)
1227 add_visitor(visit_stats, &statistics);
1229 if (enable_print)
1231 if (print_quoted_filename)
1232 add_visitor(visit_justprint_quoted, NULL);
1233 else
1234 add_visitor(visit_justprint_unquoted, NULL);
1238 if (use_limit)
1239 add_visitor(visit_limit, plimit);
1240 else
1241 add_visitor(visit_count, plimit);
1244 if (argc > 1)
1246 past_pat_inspector = pvis->next;
1247 if (op_and)
1248 mainprocessor = process_and;
1249 else
1250 mainprocessor = process_or;
1252 else
1253 mainprocessor = process_simple;
1255 if (stats)
1257 printf(_("Database %s is in the %s format.\n"),
1258 procdata.dbfile,
1259 format_name);
1263 procdata.c = getc (procdata.fp);
1264 /* If we are searching for filename patterns, the inspector list
1265 * will contain an entry for each pattern for which we are searching.
1267 while ( (procdata.c != EOF) &&
1268 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1270 /* Do nothing; all the work is done in the visitor functions. */
1273 if (stats)
1275 if (filesize)
1276 print_stats(argc, filesize);
1279 if (ferror (procdata.fp))
1281 error (0, errno, "%s", procdata.dbfile);
1282 return 0;
1284 return plimit->items_accepted;
1290 extern char *version_string;
1292 /* The name this program was run with. */
1293 char *program_name;
1295 static void
1296 usage (FILE *stream)
1298 fprintf (stream, _("\
1299 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1300 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1301 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1302 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1303 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1304 [-version] [--help]\n\
1305 pattern...\n"),
1306 program_name);
1307 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1309 enum
1311 REGEXTYPE_OPTION = CHAR_MAX + 1
1315 static struct option const longopts[] =
1317 {"database", required_argument, NULL, 'd'},
1318 {"existing", no_argument, NULL, 'e'},
1319 {"non-existing", no_argument, NULL, 'E'},
1320 {"ignore-case", no_argument, NULL, 'i'},
1321 {"all", no_argument, NULL, 'A'},
1322 {"help", no_argument, NULL, 'h'},
1323 {"version", no_argument, NULL, 'v'},
1324 {"null", no_argument, NULL, '0'},
1325 {"count", no_argument, NULL, 'c'},
1326 {"wholename", no_argument, NULL, 'w'},
1327 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1328 {"basename", no_argument, NULL, 'b'},
1329 {"print", no_argument, NULL, 'p'},
1330 {"stdio", no_argument, NULL, 's'},
1331 {"mmap", no_argument, NULL, 'm'},
1332 {"limit", required_argument, NULL, 'l'},
1333 {"regex", no_argument, NULL, 'r'},
1334 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1335 {"statistics", no_argument, NULL, 'S'},
1336 {"follow", no_argument, NULL, 'L'},
1337 {"nofollow", no_argument, NULL, 'P'},
1338 {NULL, no_argument, NULL, 0}
1342 static int
1343 drop_privs(void)
1345 const char * what = "failed";
1346 uid_t orig_euid = geteuid();
1348 /* Use of setgroups() is restrcted to root only. */
1349 if (0 == orig_euid)
1351 gid_t groups[1];
1352 groups[1] = getgid();
1353 if (0 != setgroups(1, groups))
1355 what = _("failed to drop group privileges");
1356 goto fail;
1360 if (0 != setuid(getuid()))
1362 what = _("failed to drop setuid privileges");
1363 goto fail;
1366 /* Defend against the case where the attacker runs us with the
1367 * capability to call setuid() turned off, which on some systems
1368 * will cause the above attempt to drop privileges fail (leaving us
1369 * privileged).
1371 if (0 == setuid(0))
1373 what = _("Failed to drop privileges");
1374 goto fail;
1377 /* success. */
1378 return 0;
1380 fail:
1381 error(1, errno, "%s", what);
1382 abort();
1383 kill(0, SIGKILL);
1384 _exit(1);
1385 /*NOTREACHED*/
1386 /* ... we hope. */
1387 for (;;)
1389 /* deliberate infinite loop */
1393 static int
1394 opendb(const char *name)
1396 int fd = open(name, O_RDONLY
1397 #if defined(O_LARGEFILE)
1398 |O_LARGEFILE
1399 #endif
1401 if (fd >= 0)
1403 /* Make sure it won't survive an exec */
1404 if (0 != fcntl(fd, F_SETFD, FD_CLOEXEC))
1406 close(fd);
1407 fd = -1;
1410 return fd;
1414 dolocate (int argc, char **argv, int secure_db_fd)
1416 char *dbpath;
1417 unsigned long int found = 0uL;
1418 int optc;
1419 int ignore_case = 0;
1420 int print = 0;
1421 int just_count = 0;
1422 int basename_only = 0;
1423 int use_limit = 0;
1424 int regex = 0;
1425 int regex_options = RE_SYNTAX_EMACS;
1426 int stats = 0;
1427 int op_and = 0;
1428 const char *e;
1429 FILE *fp;
1430 int they_chose_db = 0;
1431 bool did_stdin = false; /* Set to prevent rereading stdin. */
1433 program_name = argv[0];
1435 #ifdef HAVE_SETLOCALE
1436 setlocale (LC_ALL, "");
1437 #endif
1438 bindtextdomain (PACKAGE, LOCALEDIR);
1439 textdomain (PACKAGE);
1440 atexit (close_stdout);
1442 limits.limit = 0;
1443 limits.items_accepted = 0;
1445 quote_opts = clone_quoting_options (NULL);
1446 print_quoted_filename = true;
1448 /* We cannot simultaneously trust $LOCATE_PATH and use the
1449 * setuid-access-controlled database,, since that could cause a leak
1450 * of private data.
1452 dbpath = getenv ("LOCATE_PATH");
1453 if (dbpath)
1455 they_chose_db = 1;
1458 check_existence = ACCEPT_EITHER;
1460 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1461 switch (optc)
1463 case '0':
1464 separator = 0;
1465 print_quoted_filename = false; /* print filename 'raw'. */
1466 break;
1468 case 'A':
1469 op_and = 1;
1470 break;
1472 case 'b':
1473 basename_only = 1;
1474 break;
1476 case 'c':
1477 just_count = 1;
1478 break;
1480 case 'd':
1481 dbpath = optarg;
1482 they_chose_db = 1;
1483 break;
1485 case 'e':
1486 check_existence = ACCEPT_EXISTING;
1487 break;
1489 case 'E':
1490 check_existence = ACCEPT_NON_EXISTING;
1491 break;
1493 case 'i':
1494 ignore_case = 1;
1495 break;
1497 case 'h':
1498 usage (stdout);
1499 return 0;
1501 case 'p':
1502 print = 1;
1503 break;
1505 case 'v':
1506 printf (_("GNU locate version %s\n"), version_string);
1507 return 0;
1509 case 'w':
1510 basename_only = 0;
1511 break;
1513 case 'r':
1514 regex = 1;
1515 break;
1517 case REGEXTYPE_OPTION:
1518 regex_options = get_regex_type(optarg);
1519 break;
1521 case 'S':
1522 stats = 1;
1523 break;
1525 case 'L':
1526 follow_symlinks = 1;
1527 break;
1529 /* In find, -P and -H differ in the way they handle paths
1530 * given on the command line. This is not relevant for
1531 * locate, but the -H option is supported because it is
1532 * probably more intuitive to do so.
1534 case 'P':
1535 case 'H':
1536 follow_symlinks = 0;
1537 break;
1539 case 'l':
1541 char *end = optarg;
1542 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1543 if (LONGINT_OK != err)
1545 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1547 use_limit = 1;
1549 break;
1551 case 's': /* use stdio */
1552 case 'm': /* use mmap */
1553 /* These options are implemented simply for
1554 * compatibility with FreeBSD
1556 break;
1558 default:
1559 usage (stderr);
1560 return 1;
1564 /* If the user gave the -d option or set LOCATE_PATH,
1565 * relinquish access to the secure database.
1567 if (they_chose_db)
1569 if (secure_db_fd >= 0)
1571 close(secure_db_fd);
1572 secure_db_fd = 0;
1576 if (!just_count && !stats)
1577 print = 1;
1579 if (stats)
1581 if (optind == argc)
1582 use_limit = 0;
1584 else
1586 if (!just_count && optind == argc)
1588 usage (stderr);
1589 return 1;
1594 if (1 == isatty(STDOUT_FILENO))
1595 stdout_is_a_tty = true;
1596 else
1597 stdout_is_a_tty = false;
1599 if (they_chose_db)
1600 next_element (dbpath, 0); /* Initialize. */
1602 /* Bail out early if limit already reached. */
1603 while (!use_limit || limits.limit > limits.items_accepted)
1605 struct stat st;
1606 int fd;
1607 off_t filesize;
1609 statistics.compressed_bytes =
1610 statistics.total_filename_count =
1611 statistics.total_filename_length =
1612 statistics.whitespace_count =
1613 statistics.newline_count =
1614 statistics.highbit_filename_count = 0u;
1616 if (they_chose_db)
1618 /* Take the next element from the list of databases */
1619 e = next_element ((char *) NULL, 0);
1620 if (NULL == e)
1621 break;
1623 if (0 == strcmp (e, "-"))
1625 if (did_stdin)
1627 error (0, 0,
1628 _("warning: the locate database can only be read from stdin once."));
1629 return 0;
1631 else
1633 e = "<stdin>";
1634 fd = 0;
1635 did_stdin = true;
1638 else
1640 if (0 == strlen(e) || 0 == strcmp(e, "."))
1642 e = LOCATE_DB;
1645 /* open the database */
1646 fd = opendb(e);
1647 if (fd < 0)
1649 error (0, errno, "%s", e);
1650 return 0;
1654 else
1656 if (-1 == secure_db_fd)
1658 /* Already searched the database, it's time to exit the loop */
1659 break;
1661 else
1663 e = selected_secure_db;
1664 fd = secure_db_fd;
1665 secure_db_fd = -1;
1669 /* Check the database to see if it is old. */
1670 if (fstat(fd, &st))
1672 error (0, errno, "%s", e);
1673 /* continue anyway */
1674 filesize = (off_t)0;
1676 else
1678 time_t now;
1680 filesize = st.st_size;
1682 if ((time_t)-1 == time(&now))
1684 /* If we can't tell the time, we don't know how old the
1685 * database is. But since the message is just advisory,
1686 * we continue anyway.
1688 error (0, errno, "time system call");
1690 else
1692 if (now - st.st_mtime > WARN_SECONDS)
1694 /* For example:
1695 warning: database `fred' is more than 8 days old */
1696 error (0, 0,
1697 _("warning: database `%s' is more than %d %s old"),
1698 e, WARN_NUMBER_UNITS, _(warn_name_units));
1703 fp = fdopen(fd, "r");
1704 if (NULL == fp)
1706 error (0, errno, "%s", e);
1707 return 0;
1710 /* Search this database for all patterns simultaneously */
1711 found = search_one_database (argc - optind, &argv[optind],
1712 e, fp, filesize,
1713 ignore_case, print, basename_only,
1714 use_limit, &limits, stats,
1715 op_and, regex, regex_options);
1717 /* Close the databsase (even if it is stdin) */
1718 if (fclose (fp) == EOF)
1720 error (0, errno, "%s", e);
1721 return 0;
1725 if (just_count)
1727 printf("%ld\n", found);
1730 if (found || (use_limit && (limits.limit==0)) || stats )
1731 return 0;
1732 else
1733 return 1;
1736 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1737 static int
1738 open_secure_db(void)
1740 int fd, i;
1742 const char * secure_db_list[] =
1744 LOCATE_DB,
1745 "/var/lib/slocate/slocate.db",
1746 NULL
1748 for (i=0; secure_db_list[i]; ++i)
1750 fd = opendb(secure_db_list[i]);
1751 if (fd >= 0)
1753 selected_secure_db = secure_db_list[i];
1754 return fd;
1757 return -1;
1761 main (int argc, char **argv)
1763 int dbfd = open_secure_db();
1764 drop_privs();
1766 return dolocate(argc, argv, dbfd);