Documented change to file_sparseness (for the case where st_blocks is missing).
[findutils.git] / locate / locate.c
blob965cadeb93c2cabe9c06c44290a0064d5df48a1b
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <signal.h>
66 #include <ctype.h>
67 #include <sys/types.h>
68 #include <grp.h> /* for setgroups() */
69 #include <sys/stat.h>
70 #include <time.h>
71 #include <fnmatch.h>
72 #include <getopt.h>
73 #include <xstrtol.h>
75 /* The presence of unistd.h is assumed by gnulib these days, so we
76 * might as well assume it too.
78 /* We need <unistd.h> for isatty(). */
79 #include <unistd.h>
81 #if HAVE_FCNTL_H
82 /* We use fcntl() */
83 #include <fcntl.h>
84 #endif
86 #define NDEBUG
87 #include <assert.h>
88 #include <string.h>
91 #ifdef STDC_HEADERS
92 #include <stdlib.h>
93 #endif
95 #ifdef HAVE_ERRNO_H
96 #include <errno.h>
97 #else
98 extern int errno;
99 #endif
101 #ifdef HAVE_LOCALE_H
102 #include <locale.h>
103 #endif
105 #if ENABLE_NLS
106 # include <libintl.h>
107 # define _(Text) gettext (Text)
108 #else
109 # define _(Text) Text
110 #define textdomain(Domain)
111 #define bindtextdomain(Package, Directory)
112 #endif
113 #ifdef gettext_noop
114 # define N_(String) gettext_noop (String)
115 #else
116 /* We used to use (String) instead of just String, but apparently ISO C
117 * doesn't allow this (at least, that's what HP said when someone reported
118 * this as a compiler bug). This is HP case number 1205608192. See
119 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
120 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
121 * like: static const char buf[] = ("string");
123 # define N_(String) String
124 #endif
126 #include "locatedb.h"
127 #include <getline.h>
128 #include "../gnulib/lib/xalloc.h"
129 #include "../gnulib/lib/error.h"
130 #include "../gnulib/lib/human.h"
131 #include "dirname.h"
132 #include "closeout.h"
133 #include "nextelem.h"
134 #include "regex.h"
135 #include "quote.h"
136 #include "quotearg.h"
137 #include "printquoted.h"
138 #include "regextype.h"
139 #include "gnulib-version.h"
141 /* Note that this evaluates Ch many times. */
142 #ifdef _LIBC
143 # define TOUPPER(Ch) toupper (Ch)
144 # define TOLOWER(Ch) tolower (Ch)
145 #else
146 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
147 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
148 #endif
150 /* typedef enum {false, true} boolean; */
152 /* Warn if a database is older than this. 8 days allows for a weekly
153 update that takes up to a day to perform. */
154 static unsigned int warn_number_units = 8;
156 /* Printable name of units used in WARN_SECONDS */
157 static const char warn_name_units[] = N_("days");
158 #define SECONDS_PER_UNIT (60 * 60 * 24)
160 enum visit_result
162 VISIT_CONTINUE = 1, /* please call the next visitor */
163 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
164 VISIT_REJECTED = 4, /* rejected, process next file. */
165 VISIT_ABORT = 8 /* rejected, process no more files. */
168 enum ExistenceCheckType
170 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
171 ACCEPT_EXISTING, /* Corresponds to option -e */
172 ACCEPT_NON_EXISTING /* Corresponds to option -E */
175 /* Check for existence of files before printing them out? */
176 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
178 static int follow_symlinks = 1;
180 /* What to separate the results with. */
181 static int separator = '\n';
183 static struct quoting_options * quote_opts = NULL;
184 static bool stdout_is_a_tty;
185 static bool print_quoted_filename;
186 static bool results_were_filtered;
188 static const char *selected_secure_db = NULL;
191 /* Change the number of days old the database can be
192 * before we complain about it.
194 static void
195 set_max_db_age(const char *s)
197 char *end;
198 unsigned long int val;
199 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
201 if (0 == *s)
203 error(1, 0,
204 _("The argument argument for option --max-database-age must not be empty"));
208 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
209 * we would not be able to tell if that is the correct answer, or whether it
210 * signifies an error.
212 errno = 0;
213 val = strtoul(s, &end, 10);
215 /* Diagnose number too large, non-numbes and trailing junk. */
216 if ((ULONG_MAX == val && ERANGE == errno) ||
217 (0 == val && EINVAL == errno))
219 error(1, errno,
220 _("Invalid argument %s for option --max-database-age"),
221 quotearg_n_style(0, locale_quoting_style, s));
223 else if (*end)
225 /* errno wasn't set, don't print its message */
226 error(1, 0,
227 _("Invalid argument %s for option --max-database-age"),
228 quotearg_n_style(0, locale_quoting_style, s));
230 else
232 warn_number_units = val;
238 /* Read in a 16-bit int, high byte first (network byte order). */
240 static short
241 get_short (FILE *fp)
244 register short x;
246 x = (signed char) fgetc (fp) << 8;
247 x |= (fgetc (fp) & 0xff);
248 return x;
251 const char * const metacharacters = "*?[]\\";
253 /* Return nonzero if S contains any shell glob characters.
255 static int
256 contains_metacharacter(const char *s)
258 if (NULL == strpbrk(s, metacharacters))
259 return 0;
260 else
261 return 1;
264 /* locate_read_str()
266 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
267 * until we reach DELIMITER or end-of-file. We reallocate the buffer
268 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
269 * is made regarding the content of the data (i.e. the implementation is
270 * 8-bit clean, the only delimiter is DELIMITER).
272 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
273 * has been removed from gnulib.
275 * We call the function locate_read_str() to avoid a name clash with the curses
276 * function getstr().
278 static int
279 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
281 char * p = NULL;
282 size_t sz = 0;
283 int nread;
284 size_t needed;
286 nread = getdelim(&p, &sz, delimiter, fp);
287 if (nread >= 0)
289 assert(p != NULL);
291 needed = offs + nread + 1u;
292 if (needed > (*siz))
294 char *pnew = realloc(*buf, needed);
295 if (NULL == pnew)
297 return -1; /* FAIL */
299 else
301 *siz = needed;
302 *buf = pnew;
305 memcpy((*buf)+offs, p, nread);
306 free(p);
308 return nread;
312 struct locate_limits
314 uintmax_t limit;
315 uintmax_t items_accepted;
317 static struct locate_limits limits;
320 struct locate_stats
322 uintmax_t compressed_bytes;
323 uintmax_t total_filename_count;
324 uintmax_t total_filename_length;
325 uintmax_t whitespace_count;
326 uintmax_t newline_count;
327 uintmax_t highbit_filename_count;
329 static struct locate_stats statistics;
332 struct regular_expression
334 struct re_pattern_buffer regex; /* for --regex */
338 struct process_data
340 int c; /* An input byte. */
341 char itemcount; /* Indicates we're at the beginning of an slocate db. */
342 int count; /* The length of the prefix shared with the previous database entry. */
343 int len;
344 char *original_filename; /* The current input database entry. */
345 size_t pathsize; /* Amount allocated for it. */
346 char *munged_filename; /* path or base_name(path) */
347 FILE *fp; /* The pathname database. */
348 const char *dbfile; /* Its name, or "<stdin>" */
349 int slocatedb_format; /* Allows us to cope with slocate's format variant */
350 /* for the old database format,
351 the first and second characters of the most common bigrams. */
352 char bigram1[128];
353 char bigram2[128];
357 typedef int (*visitfunc)(struct process_data *procdata,
358 void *context);
360 struct visitor
362 visitfunc inspector;
363 void * context;
364 struct visitor *next;
368 static struct visitor *inspectors = NULL;
369 static struct visitor *lastinspector = NULL;
370 static struct visitor *past_pat_inspector = NULL;
372 static inline int visit(const struct visitor *p,
373 int accept_flags,
374 struct process_data *procdata,
375 const struct visitor * const stop)
377 register int result = accept_flags;
378 while ( (accept_flags & result) && (stop != p) )
380 result = (p->inspector)(procdata, p->context);
381 p = p->next;
383 return result;
386 /* 0 or 1 pattern(s) */
387 static int
388 process_simple(struct process_data *procdata)
390 return visit(inspectors, (VISIT_CONTINUE|VISIT_ACCEPTED), procdata, NULL);
393 /* Accept if any pattern matches. */
394 static int
395 process_or (struct process_data *procdata)
397 int result;
399 result = visit(inspectors, (VISIT_CONTINUE|VISIT_REJECTED), procdata, past_pat_inspector);
400 if (result == VISIT_CONTINUE)
401 result = VISIT_REJECTED;
402 if (result & (VISIT_ABORT | VISIT_REJECTED))
403 return result;
405 result = visit(past_pat_inspector, VISIT_CONTINUE, procdata, NULL);
406 if (VISIT_CONTINUE == result)
407 return VISIT_ACCEPTED;
408 else
409 return result;
412 /* Accept if all pattern match. */
413 static int
414 process_and (struct process_data *procdata)
416 int result;
418 result = visit(inspectors, (VISIT_CONTINUE|VISIT_ACCEPTED), procdata, past_pat_inspector);
419 if (result == VISIT_CONTINUE)
420 result = VISIT_REJECTED;
421 if (result & (VISIT_ABORT | VISIT_REJECTED))
422 return result;
424 result = visit(past_pat_inspector, VISIT_CONTINUE, procdata, NULL);
425 if (VISIT_CONTINUE == result)
426 return VISIT_ACCEPTED;
427 else
428 return result;
431 typedef int (*processfunc)(struct process_data *procdata);
433 static processfunc mainprocessor = NULL;
435 static void
436 add_visitor(visitfunc fn, void *context)
438 struct visitor *p = xmalloc(sizeof(struct visitor));
439 p->inspector = fn;
440 p->context = context;
441 p->next = NULL;
443 if (NULL == lastinspector)
445 lastinspector = inspectors = p;
447 else
449 lastinspector->next = p;
450 lastinspector = p;
456 static int
457 visit_justprint_quoted(struct process_data *procdata, void *context)
459 (void) context;
460 print_quoted (stdout, quote_opts, stdout_is_a_tty,
461 "%s",
462 procdata->original_filename);
463 putchar(separator);
464 return VISIT_CONTINUE;
467 static int
468 visit_justprint_unquoted(struct process_data *procdata, void *context)
470 (void) context;
471 fputs(procdata->original_filename, stdout);
472 putchar(separator);
473 return VISIT_CONTINUE;
476 static int
477 visit_old_format(struct process_data *procdata, void *context)
479 register char *s;
480 (void) context;
482 /* Get the offset in the path where this path info starts. */
483 if (procdata->c == LOCATEDB_OLD_ESCAPE)
484 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
485 else
486 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
488 /* Overlay the old path with the remainder of the new. */
489 for (s = procdata->original_filename + procdata->count;
490 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
491 if (procdata->c < 0200)
492 *s++ = procdata->c; /* An ordinary character. */
493 else
495 /* Bigram markers have the high bit set. */
496 procdata->c &= 0177;
497 *s++ = procdata->bigram1[procdata->c];
498 *s++ = procdata->bigram2[procdata->c];
500 *s-- = '\0';
502 procdata->munged_filename = procdata->original_filename;
504 return VISIT_CONTINUE;
508 static int
509 visit_locate02_format(struct process_data *procdata, void *context)
511 register char *s;
512 int nread;
513 (void) context;
515 if (procdata->slocatedb_format)
517 if (procdata->itemcount == 0)
519 ungetc(procdata->c, procdata->fp);
520 procdata->count = 0;
521 procdata->len = 0;
523 else if (procdata->itemcount == 1)
525 procdata->count = procdata->len-1;
527 else
529 if (procdata->c == LOCATEDB_ESCAPE)
530 procdata->count += (short)get_short (procdata->fp);
531 else if (procdata->c > 127)
532 procdata->count += procdata->c - 256;
533 else
534 procdata->count += procdata->c;
537 else
539 if (procdata->c == LOCATEDB_ESCAPE)
540 procdata->count += (short)get_short (procdata->fp);
541 else if (procdata->c > 127)
542 procdata->count += procdata->c - 256;
543 else
544 procdata->count += procdata->c;
547 if (procdata->count > procdata->len || procdata->count < 0)
549 /* This should not happen generally , but since we're
550 * reading in data which is outside our control, we
551 * cannot prevent it.
553 error(1, 0, _("locate database %s is corrupt or invalid"),
554 quotearg_n_style(0, locale_quoting_style, procdata->dbfile));
557 /* Overlay the old path with the remainder of the new. */
558 nread = locate_read_str (&procdata->original_filename,
559 &procdata->pathsize,
560 procdata->fp, 0, procdata->count);
561 if (nread < 0)
562 return VISIT_ABORT;
563 procdata->c = getc (procdata->fp);
564 procdata->len = procdata->count + nread;
565 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
566 assert (s[0] != '\0');
567 assert (s[1] == '\0'); /* Our terminator. */
568 assert (s[2] == '\0'); /* Added by locate_read_str. */
570 procdata->munged_filename = procdata->original_filename;
572 if (procdata->slocatedb_format)
574 /* Don't increment indefinitely, it might overflow. */
575 if (procdata->itemcount < 6)
577 ++(procdata->itemcount);
582 return VISIT_CONTINUE;
585 static int
586 visit_basename(struct process_data *procdata, void *context)
588 (void) context;
589 procdata->munged_filename = base_name(procdata->original_filename);
591 return VISIT_CONTINUE;
595 /* visit_existing_follow implements -L -e */
596 static int
597 visit_existing_follow(struct process_data *procdata, void *context)
599 struct stat st;
600 (void) context;
602 /* munged_filename has been converted in some way (to lower case,
603 * or is just the base name of the file), and original_filename has not.
604 * Hence only original_filename is still actually the name of the file
605 * whose existence we would need to check.
607 if (stat(procdata->original_filename, &st) != 0)
609 return VISIT_REJECTED;
611 else
613 return VISIT_CONTINUE;
617 /* visit_non_existing_follow implements -L -E */
618 static int
619 visit_non_existing_follow(struct process_data *procdata, void *context)
621 struct stat st;
622 (void) context;
624 /* munged_filename has been converted in some way (to lower case,
625 * or is just the base name of the file), and original_filename has not.
626 * Hence only original_filename is still actually the name of the file
627 * whose existence we would need to check.
629 if (stat(procdata->original_filename, &st) == 0)
631 return VISIT_REJECTED;
633 else
635 return VISIT_CONTINUE;
639 /* visit_existing_nofollow implements -P -e */
640 static int
641 visit_existing_nofollow(struct process_data *procdata, void *context)
643 struct stat st;
644 (void) context;
646 /* munged_filename has been converted in some way (to lower case,
647 * or is just the base name of the file), and original_filename has not.
648 * Hence only original_filename is still actually the name of the file
649 * whose existence we would need to check.
651 if (lstat(procdata->original_filename, &st) != 0)
653 return VISIT_REJECTED;
655 else
657 return VISIT_CONTINUE;
661 /* visit_non_existing_nofollow implements -P -E */
662 static int
663 visit_non_existing_nofollow(struct process_data *procdata, void *context)
665 struct stat st;
666 (void) context;
668 /* munged_filename has been converted in some way (to lower case,
669 * or is just the base name of the file), and original_filename has not.
670 * Hence only original_filename is still actually the name of the file
671 * whose existence we would need to check.
673 if (lstat(procdata->original_filename, &st) == 0)
675 return VISIT_REJECTED;
677 else
679 return VISIT_CONTINUE;
683 static int
684 visit_substring_match_nocasefold_wide(struct process_data *procdata, void *context)
686 const char *pattern = context;
688 if (NULL != mbsstr(procdata->munged_filename, pattern))
689 return VISIT_ACCEPTED;
690 else
691 return VISIT_REJECTED;
694 static int
695 visit_substring_match_nocasefold_narrow(struct process_data *procdata, void *context)
697 const char *pattern = context;
698 assert(MB_CUR_MAX == 1);
699 if (NULL != strstr(procdata->munged_filename, pattern))
700 return VISIT_ACCEPTED;
701 else
702 return VISIT_REJECTED;
705 static int
706 visit_substring_match_casefold_wide(struct process_data *procdata, void *context)
708 const char *pattern = context;
710 if (NULL != mbscasestr(procdata->munged_filename, pattern))
711 return VISIT_ACCEPTED;
712 else
713 return VISIT_REJECTED;
717 static int
718 visit_substring_match_casefold_narrow(struct process_data *procdata, void *context)
720 const char *pattern = context;
722 assert(MB_CUR_MAX == 1);
723 if (NULL != strcasestr(procdata->munged_filename, pattern))
724 return VISIT_ACCEPTED;
725 else
726 return VISIT_REJECTED;
730 static int
731 visit_globmatch_nofold(struct process_data *procdata, void *context)
733 const char *glob = context;
734 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
735 return VISIT_REJECTED;
736 else
737 return VISIT_ACCEPTED;
741 static int
742 visit_globmatch_casefold(struct process_data *procdata, void *context)
744 const char *glob = context;
745 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
746 return VISIT_REJECTED;
747 else
748 return VISIT_ACCEPTED;
752 static int
753 visit_regex(struct process_data *procdata, void *context)
755 struct regular_expression *p = context;
756 const size_t len = strlen(procdata->munged_filename);
758 int rv = re_search (&p->regex, procdata->munged_filename,
759 len, 0, len,
760 (struct re_registers *) NULL);
761 if (rv < 0)
763 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
765 else
767 return VISIT_ACCEPTED; /* match */
772 static int
773 visit_stats(struct process_data *procdata, void *context)
775 struct locate_stats *p = context;
776 size_t len = strlen(procdata->original_filename);
777 const char *s;
778 int highbit, whitespace, newline;
780 ++(p->total_filename_count);
781 p->total_filename_length += len;
783 highbit = whitespace = newline = 0;
784 for (s=procdata->original_filename; *s; ++s)
786 if ( (int)(*s) & 128 )
787 highbit = 1;
788 if ('\n' == *s)
790 newline = whitespace = 1;
792 else if (isspace((unsigned char)*s))
794 whitespace = 1;
798 if (highbit)
799 ++(p->highbit_filename_count);
800 if (whitespace)
801 ++(p->whitespace_count);
802 if (newline)
803 ++(p->newline_count);
805 return VISIT_CONTINUE;
809 static int
810 visit_limit(struct process_data *procdata, void *context)
812 struct locate_limits *p = context;
814 (void) procdata;
816 if (++p->items_accepted >= p->limit)
817 return VISIT_ABORT;
818 else
819 return VISIT_CONTINUE;
822 static int
823 visit_count(struct process_data *procdata, void *context)
825 struct locate_limits *p = context;
827 (void) procdata;
829 ++p->items_accepted;
830 return VISIT_CONTINUE;
833 /* Emit the statistics.
835 static void
836 print_stats(int argc, size_t database_file_size)
838 char hbuf[LONGEST_HUMAN_READABLE + 1];
840 printf(_("Locate database size: %s bytes\n"),
841 human_readable ((uintmax_t) database_file_size,
842 hbuf, human_ceiling, 1, 1));
844 printf( (results_were_filtered ?
845 _("Matching Filenames: %s ") :
846 _("All Filenames: %s ")),
847 human_readable (statistics.total_filename_count,
848 hbuf, human_ceiling, 1, 1));
849 printf(_("with a cumulative length of %s bytes"),
850 human_readable (statistics.total_filename_length,
851 hbuf, human_ceiling, 1, 1));
853 printf(_("\n\tof which %s contain whitespace, "),
854 human_readable (statistics.whitespace_count,
855 hbuf, human_ceiling, 1, 1));
856 printf(_("\n\t%s contain newline characters, "),
857 human_readable (statistics.newline_count,
858 hbuf, human_ceiling, 1, 1));
859 printf(_("\n\tand %s contain characters with the high bit set.\n"),
860 human_readable (statistics.highbit_filename_count,
861 hbuf, human_ceiling, 1, 1));
863 if (!argc)
865 if (results_were_filtered)
867 printf(_("Some filenames may have been filtered out, "
868 "so we cannot compute the compression ratio.\n"));
870 else
872 if (statistics.total_filename_length)
874 printf(_("Compression ratio %4.2f%% (higher is better)\n"),
875 100.0 * ((double)statistics.total_filename_length
876 - (double) database_file_size)
877 / (double) statistics.total_filename_length);
879 else
881 printf(_("Compression ratio is undefined\n"));
885 printf("\n");
889 * Return nonzero if the data we read in indicates that we are
890 * looking at a LOCATE02 locate database.
892 static int
893 looking_at_gnu_locatedb (const char *data, size_t len)
895 if (len < sizeof (LOCATEDB_MAGIC))
896 return 0;
897 else if (0 == memcmp (data, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
898 return 1; /* We saw the magic byte sequence */
899 else
900 return 0;
904 * Return nonzero if the data we read in indicates that we are
905 * looking at an slocate database.
907 static int
908 looking_at_slocate_locatedb (const char *filename,
909 const char *data,
910 size_t len,
911 int *seclevel)
913 assert(len <= 2);
915 if (len < 2)
917 return 0;
919 else
921 /* Check that the magic number is a one-byte string */
922 if (0 == data[1])
924 if (isdigit((unsigned char)data[0]))
926 /* looks promising. */
927 *seclevel = (data[0] - '0');
929 if (*seclevel > 1)
931 /* Hmm, well it's probably an slocate database
932 * of some awsomely huge security level, like 2.
933 * We don't know how to handle those.
935 error(0, 0,
936 _("locate database %s looks like an slocate "
937 "database but it seems to have security level %c, "
938 "which GNU findutils does not currently support"),
939 quotearg_n_style(0, locale_quoting_style, filename),
940 data[1]);
941 return 1;
943 else
945 return 1;
948 else
950 /* Not a digit. */
951 return 0;
954 else
956 /* Definitely not slocate. */
957 return 0;
962 /* Print or count the entries in DBFILE that match shell globbing patterns in
963 ARGV. Return the number of entries matched. */
965 static unsigned long
966 search_one_database (int argc,
967 char **argv,
968 const char *dbfile,
969 FILE *fp,
970 off_t filesize,
971 int ignore_case,
972 int enable_print,
973 int basename_only,
974 int use_limit,
975 struct locate_limits *plimit,
976 int stats,
977 int op_and,
978 int regex,
979 int regex_options)
981 char *pathpart; /* A pattern to consider. */
982 int argn; /* Index to current pattern in argv. */
983 int nread; /* number of bytes read from an entry. */
984 struct process_data procdata; /* Storage for data shared with visitors. */
985 int slocate_seclevel;
986 struct visitor* pvis; /* temp for determining past_pat_inspector. */
987 const char *format_name;
988 enum ExistenceCheckType do_check_existence;
991 /* We may turn on existence checking for a given database.
992 * We ensure that we can return to the previous behaviour
993 * by using two variables, do_check_existence (which we act on)
994 * and check_existence (whcih indicates the default before we
995 * adjust it on the bassis of what kind of database we;re using
997 do_check_existence = check_existence;
1000 if (ignore_case)
1001 regex_options |= RE_ICASE;
1003 procdata.len = procdata.count = 0;
1004 procdata.slocatedb_format = 0;
1005 procdata.itemcount = 0;
1007 procdata.dbfile = dbfile;
1008 procdata.fp = fp;
1010 /* Set up the inspection regime */
1011 inspectors = NULL;
1012 lastinspector = NULL;
1013 past_pat_inspector = NULL;
1014 results_were_filtered = false;
1015 #if 0
1016 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
1017 #else
1018 procdata.pathsize = 128; /* Increased as necessary by locate_read_str. */
1019 #endif
1020 procdata.original_filename = xmalloc (procdata.pathsize);
1023 nread = fread (procdata.original_filename, 1, SLOCATE_DB_MAGIC_LEN,
1024 procdata.fp);
1025 slocate_seclevel = 0;
1026 if (looking_at_slocate_locatedb(procdata.dbfile,
1027 procdata.original_filename,
1028 nread,
1029 &slocate_seclevel))
1031 error(0, 0,
1032 _("%s is an slocate database. "
1033 "Support for these is new, expect problems for now."),
1034 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1036 /* slocate also uses frcode, but with a different header.
1037 * We handle the header here and then work with the data
1038 * in the normal way.
1040 if (slocate_seclevel > 1)
1042 /* We don't know what those security levels mean,
1043 * so do nothing further
1045 error(0, 0,
1046 _("%s is an slocate database of unsupported security level %d; skipping it."),
1047 quotearg_n_style(0, locale_quoting_style, procdata.dbfile),
1048 slocate_seclevel);
1049 return 0;
1051 else if (slocate_seclevel > 0)
1053 /* Don't show the filenames to the user if they don't exist.
1054 * Showing stats is safe since filenames are only counted
1055 * after the existence check
1057 if (ACCEPT_NON_EXISTING == check_existence)
1059 /* Do not allow the user to see a list of filenames that they
1060 * cannot stat().
1062 error(0, 0,
1063 _("You specified the -E option, but that option "
1064 "cannot be used with slocate-format databases "
1065 "with a non-zero security level. No results will be "
1066 "generated for this database.\n"));
1067 return 0;
1069 if (ACCEPT_EXISTING != do_check_existence)
1071 if (enable_print || stats)
1073 error(0, 0,
1074 _("%s is an slocate database. "
1075 "Turning on the '-e' option."),
1076 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1078 do_check_existence = ACCEPT_EXISTING;
1081 add_visitor(visit_locate02_format, NULL);
1082 format_name = "slocate";
1083 procdata.slocatedb_format = 1;
1085 else
1087 int nread2;
1089 procdata.slocatedb_format = 0;
1090 nread2 = fread (procdata.original_filename+nread, 1, sizeof (LOCATEDB_MAGIC)-nread,
1091 procdata.fp);
1092 if (looking_at_gnu_locatedb(procdata.original_filename, nread+nread2))
1094 add_visitor(visit_locate02_format, NULL);
1095 format_name = "GNU LOCATE02";
1097 else /* Use the old format */
1099 int i;
1101 nread += nread2;
1102 /* Read the list of the most common bigrams in the database. */
1103 if (nread < 256)
1105 int more_read = fread (procdata.original_filename + nread, 1,
1106 256 - nread, procdata.fp);
1107 if ( (more_read + nread) != 256 )
1109 error(1, 0,
1110 _("Old-format locate database %s is "
1111 "too short to be valid"),
1112 quotearg_n_style(0, locale_quoting_style, dbfile));
1117 for (i = 0; i < 128; i++)
1119 procdata.bigram1[i] = procdata.original_filename[i << 1];
1120 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
1122 format_name = "old";
1123 add_visitor(visit_old_format, NULL);
1127 if (basename_only)
1128 add_visitor(visit_basename, NULL);
1130 /* Add an inspector for each pattern we're looking for. */
1131 for ( argn = 0; argn < argc; argn++ )
1133 results_were_filtered = true;
1134 pathpart = argv[argn];
1135 if (regex)
1137 struct regular_expression *p = xmalloc(sizeof(*p));
1138 const char *error_message = NULL;
1140 memset (&p->regex, 0, sizeof (p->regex));
1142 re_set_syntax(regex_options);
1143 p->regex.allocated = 100;
1144 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
1145 p->regex.fastmap = NULL;
1146 p->regex.syntax = regex_options;
1147 p->regex.translate = NULL;
1149 error_message = re_compile_pattern (pathpart, strlen (pathpart),
1150 &p->regex);
1151 if (error_message)
1153 error (1, 0, "%s", error_message);
1155 else
1157 add_visitor(visit_regex, p);
1160 else if (contains_metacharacter(pathpart))
1162 if (ignore_case)
1163 add_visitor(visit_globmatch_casefold, pathpart);
1164 else
1165 add_visitor(visit_globmatch_nofold, pathpart);
1167 else
1169 /* No glob characters used. Hence we match on
1170 * _any part_ of the filename, not just the
1171 * basename. This seems odd to me, but it is the
1172 * traditional behaviour.
1173 * James Youngman <jay@gnu.org>
1175 visitfunc matcher;
1176 if (1 == MB_CUR_MAX)
1178 /* As an optimisation, use a strstr() matcher if we are
1179 * in a unibyte locale. This can give a x2 speedup in
1180 * the C locale. Some light testing reveals that
1181 * glibc's strstr() is somewhere around 40% faster than
1182 * gnulib's, so we just use strstr().
1184 matcher = ignore_case ?
1185 visit_substring_match_casefold_narrow :
1186 visit_substring_match_nocasefold_narrow;
1188 else
1190 matcher = ignore_case ?
1191 visit_substring_match_casefold_wide :
1192 visit_substring_match_nocasefold_wide;
1194 add_visitor(matcher, pathpart);
1198 pvis = lastinspector;
1200 /* We add visit_existing_*() as late as possible to reduce the
1201 * number of stat() calls.
1203 switch (do_check_existence)
1205 case ACCEPT_EXISTING:
1206 results_were_filtered = true;
1207 if (follow_symlinks) /* -L, default */
1208 add_visitor(visit_existing_follow, NULL);
1209 else /* -P */
1210 add_visitor(visit_existing_nofollow, NULL);
1211 break;
1213 case ACCEPT_NON_EXISTING:
1214 results_were_filtered = true;
1215 if (follow_symlinks) /* -L, default */
1216 add_visitor(visit_non_existing_follow, NULL);
1217 else /* -P */
1218 add_visitor(visit_non_existing_nofollow, NULL);
1219 break;
1221 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1222 /* do nothing; no extra processing. */
1223 break;
1226 /* Security issue: The stats visitor must be added immediately
1227 * before the print visitor, because otherwise the -S option would
1228 * leak information about files that the caller cannot see.
1230 if (stats)
1231 add_visitor(visit_stats, &statistics);
1233 if (enable_print)
1235 if (print_quoted_filename)
1236 add_visitor(visit_justprint_quoted, NULL);
1237 else
1238 add_visitor(visit_justprint_unquoted, NULL);
1242 if (use_limit)
1243 add_visitor(visit_limit, plimit);
1244 else
1245 add_visitor(visit_count, plimit);
1248 if (argc > 1)
1250 past_pat_inspector = pvis->next;
1251 if (op_and)
1252 mainprocessor = process_and;
1253 else
1254 mainprocessor = process_or;
1256 else
1257 mainprocessor = process_simple;
1259 if (stats)
1261 printf(_("Database %s is in the %s format.\n"),
1262 procdata.dbfile,
1263 format_name);
1267 procdata.c = getc (procdata.fp);
1268 /* If we are searching for filename patterns, the inspector list
1269 * will contain an entry for each pattern for which we are searching.
1271 while ( (procdata.c != EOF) &&
1272 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1274 /* Do nothing; all the work is done in the visitor functions. */
1277 if (stats)
1279 if (filesize)
1280 print_stats(argc, filesize);
1283 if (ferror (procdata.fp))
1285 error (0, errno, "%s",
1286 quotearg_n_style(0, locale_quoting_style, procdata.dbfile));
1287 return 0;
1289 return plimit->items_accepted;
1295 extern char *version_string;
1297 /* The name this program was run with. */
1298 char *program_name;
1300 static void
1301 usage (FILE *stream)
1303 fprintf (stream, _("\
1304 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1305 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1306 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1307 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1308 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1309 [--max-database-age D] [--version] [--help]\n\
1310 pattern...\n"),
1311 program_name);
1312 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1314 enum
1316 REGEXTYPE_OPTION = CHAR_MAX + 1,
1317 MAX_DB_AGE
1321 static struct option const longopts[] =
1323 {"database", required_argument, NULL, 'd'},
1324 {"existing", no_argument, NULL, 'e'},
1325 {"non-existing", no_argument, NULL, 'E'},
1326 {"ignore-case", no_argument, NULL, 'i'},
1327 {"all", no_argument, NULL, 'A'},
1328 {"help", no_argument, NULL, 'h'},
1329 {"version", no_argument, NULL, 'v'},
1330 {"null", no_argument, NULL, '0'},
1331 {"count", no_argument, NULL, 'c'},
1332 {"wholename", no_argument, NULL, 'w'},
1333 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1334 {"basename", no_argument, NULL, 'b'},
1335 {"print", no_argument, NULL, 'p'},
1336 {"stdio", no_argument, NULL, 's'},
1337 {"mmap", no_argument, NULL, 'm'},
1338 {"limit", required_argument, NULL, 'l'},
1339 {"regex", no_argument, NULL, 'r'},
1340 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1341 {"statistics", no_argument, NULL, 'S'},
1342 {"follow", no_argument, NULL, 'L'},
1343 {"nofollow", no_argument, NULL, 'P'},
1344 {"max-database-age", required_argument, NULL, MAX_DB_AGE},
1345 {NULL, no_argument, NULL, 0}
1349 static int
1350 drop_privs(void)
1352 const char * what = "failed";
1353 const uid_t orig_euid = geteuid();
1354 const uid_t uid = getuid();
1355 const gid_t gid = getgid();
1357 /* Use of setgroups() is restricted to root only. */
1358 if (0 == orig_euid)
1360 /* We're either root or running setuid-root. */
1361 gid_t groups[1];
1362 groups[1] = gid;
1363 if (0 != setgroups(1u, groups))
1365 what = _("failed to drop group privileges");
1366 goto fail;
1370 /* Drop any setuid privileges */
1371 if (uid != orig_euid)
1373 if (0 == uid)
1375 /* We're really root anyway, but are setuid to something else. Leave it. */
1377 else
1379 errno = 0;
1380 if (0 != setuid(getuid()))
1382 what = _("failed to drop setuid privileges");
1383 goto fail;
1386 /* Defend against the case where the attacker runs us with the
1387 * capability to call setuid() turned off, which on some systems
1388 * will cause the above attempt to drop privileges fail (leaving us
1389 * privileged).
1391 else
1393 /* Check that we can no longer switch bask to root */
1394 if (0 == setuid(0))
1396 what = _("Failed to fully drop privileges");
1397 /* The errno value here is not interesting (since
1398 * the system call we are complaining about
1399 * succeeded when we wanted it to fail). Arrange
1400 * for the call to error() not to print the errno
1401 * value by setting errno=0.
1403 errno = 0;
1404 goto fail;
1410 /* Drop any setgid privileges */
1411 errno = 0;
1412 if (0 != setgid(gid))
1414 what = _("failed to drop setgid privileges");
1415 goto fail;
1418 /* success. */
1419 return 0;
1421 fail:
1422 error(1, errno, "%s",
1423 quotearg_n_style(0, locale_quoting_style, what));
1424 abort();
1425 kill(0, SIGKILL);
1426 _exit(1);
1427 /*NOTREACHED*/
1428 /* ... we hope. */
1429 for (;;)
1431 /* deliberate infinite loop */
1435 static int
1436 opendb(const char *name)
1438 int fd = open(name, O_RDONLY
1439 #if defined(O_LARGEFILE)
1440 |O_LARGEFILE
1441 #endif
1443 if (fd >= 0)
1445 /* Make sure it won't survive an exec */
1446 if (0 != fcntl(fd, F_SETFD, FD_CLOEXEC))
1448 close(fd);
1449 fd = -1;
1452 return fd;
1456 dolocate (int argc, char **argv, int secure_db_fd)
1458 char *dbpath;
1459 unsigned long int found = 0uL;
1460 int optc;
1461 int ignore_case = 0;
1462 int print = 0;
1463 int just_count = 0;
1464 int basename_only = 0;
1465 int use_limit = 0;
1466 int regex = 0;
1467 int regex_options = RE_SYNTAX_EMACS;
1468 int stats = 0;
1469 int op_and = 0;
1470 const char *e;
1471 FILE *fp;
1472 int they_chose_db = 0;
1473 bool did_stdin = false; /* Set to prevent rereading stdin. */
1475 program_name = argv[0];
1477 #ifdef HAVE_SETLOCALE
1478 setlocale (LC_ALL, "");
1479 #endif
1480 bindtextdomain (PACKAGE, LOCALEDIR);
1481 textdomain (PACKAGE);
1482 atexit (close_stdout);
1484 limits.limit = 0;
1485 limits.items_accepted = 0;
1487 quote_opts = clone_quoting_options (NULL);
1488 print_quoted_filename = true;
1490 /* We cannot simultaneously trust $LOCATE_PATH and use the
1491 * setuid-access-controlled database,, since that could cause a leak
1492 * of private data.
1494 dbpath = getenv ("LOCATE_PATH");
1495 if (dbpath)
1497 they_chose_db = 1;
1500 check_existence = ACCEPT_EITHER;
1502 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1503 switch (optc)
1505 case '0':
1506 separator = 0;
1507 print_quoted_filename = false; /* print filename 'raw'. */
1508 break;
1510 case 'A':
1511 op_and = 1;
1512 break;
1514 case 'b':
1515 basename_only = 1;
1516 break;
1518 case 'c':
1519 just_count = 1;
1520 break;
1522 case 'd':
1523 dbpath = optarg;
1524 they_chose_db = 1;
1525 break;
1527 case 'e':
1528 check_existence = ACCEPT_EXISTING;
1529 break;
1531 case 'E':
1532 check_existence = ACCEPT_NON_EXISTING;
1533 break;
1535 case 'i':
1536 ignore_case = 1;
1537 break;
1539 case 'h':
1540 usage (stdout);
1541 return 0;
1543 case MAX_DB_AGE:
1544 /* XXX: nothing in the test suite for this option. */
1545 set_max_db_age(optarg);
1546 break;
1548 case 'p':
1549 print = 1;
1550 break;
1552 case 'v':
1553 printf (_("GNU locate version %s\n"), version_string);
1554 printf (_("Built using GNU gnulib version %s\n"), gnulib_version);
1555 return 0;
1557 case 'w':
1558 basename_only = 0;
1559 break;
1561 case 'r':
1562 regex = 1;
1563 break;
1565 case REGEXTYPE_OPTION:
1566 regex_options = get_regex_type(optarg);
1567 break;
1569 case 'S':
1570 stats = 1;
1571 break;
1573 case 'L':
1574 follow_symlinks = 1;
1575 break;
1577 /* In find, -P and -H differ in the way they handle paths
1578 * given on the command line. This is not relevant for
1579 * locate, but the -H option is supported because it is
1580 * probably more intuitive to do so.
1582 case 'P':
1583 case 'H':
1584 follow_symlinks = 0;
1585 break;
1587 case 'l':
1589 char *end = optarg;
1590 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1591 if (LONGINT_OK != err)
1593 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1595 use_limit = 1;
1597 break;
1599 case 's': /* use stdio */
1600 case 'm': /* use mmap */
1601 /* These options are implemented simply for
1602 * compatibility with FreeBSD
1604 break;
1606 default:
1607 usage (stderr);
1608 return 1;
1612 /* If the user gave the -d option or set LOCATE_PATH,
1613 * relinquish access to the secure database.
1615 if (they_chose_db)
1617 if (secure_db_fd >= 0)
1619 close(secure_db_fd);
1620 secure_db_fd = -1;
1624 if (!just_count && !stats)
1625 print = 1;
1627 if (stats)
1629 if (optind == argc)
1630 use_limit = 0;
1632 else
1634 if (!just_count && optind == argc)
1636 usage (stderr);
1637 return 1;
1642 if (1 == isatty(STDOUT_FILENO))
1643 stdout_is_a_tty = true;
1644 else
1645 stdout_is_a_tty = false;
1647 if (they_chose_db)
1648 next_element (dbpath, 0); /* Initialize. */
1650 /* Bail out early if limit already reached. */
1651 while (!use_limit || limits.limit > limits.items_accepted)
1653 struct stat st;
1654 int fd;
1655 off_t filesize;
1657 statistics.compressed_bytes =
1658 statistics.total_filename_count =
1659 statistics.total_filename_length =
1660 statistics.whitespace_count =
1661 statistics.newline_count =
1662 statistics.highbit_filename_count = 0u;
1664 if (they_chose_db)
1666 /* Take the next element from the list of databases */
1667 e = next_element ((char *) NULL, 0);
1668 if (NULL == e)
1669 break;
1671 if (0 == strcmp (e, "-"))
1673 if (did_stdin)
1675 error (0, 0,
1676 _("warning: the locate database can only be read from stdin once."));
1677 return 0;
1679 else
1681 e = "<stdin>";
1682 fd = 0;
1683 did_stdin = true;
1686 else
1688 if (0 == strlen(e) || 0 == strcmp(e, "."))
1690 e = LOCATE_DB;
1693 /* open the database */
1694 fd = opendb(e);
1695 if (fd < 0)
1697 error (0, errno, "%s",
1698 quotearg_n_style(0, locale_quoting_style, e));
1699 return 0;
1703 else
1705 if (-1 == secure_db_fd)
1707 /* Already searched the database, it's time to exit the loop */
1708 break;
1710 else
1712 e = selected_secure_db;
1713 fd = secure_db_fd;
1714 secure_db_fd = -1;
1718 /* Check the database to see if it is old. */
1719 if (fstat(fd, &st))
1721 error (0, errno, "%s",
1722 quotearg_n_style(0, locale_quoting_style, e));
1723 /* continue anyway */
1724 filesize = (off_t)0;
1726 else
1728 time_t now;
1730 filesize = st.st_size;
1732 if ((time_t)-1 == time(&now))
1734 /* If we can't tell the time, we don't know how old the
1735 * database is. But since the message is just advisory,
1736 * we continue anyway.
1738 error (0, errno, "time system call");
1740 else
1742 double age = difftime(now, st.st_mtime);
1743 double warn_seconds = SECONDS_PER_UNIT * warn_number_units;
1744 if (age > warn_seconds)
1746 /* For example:
1747 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1748 error (0, 0,
1749 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1750 quotearg_n_style(0, locale_quoting_style, e),
1751 warn_number_units, _(warn_name_units),
1752 (age/(double)SECONDS_PER_UNIT), _(warn_name_units));
1757 fp = fdopen(fd, "r");
1758 if (NULL == fp)
1760 error (0, errno, "%s",
1761 quotearg_n_style(0, locale_quoting_style, e));
1762 return 0;
1765 /* Search this database for all patterns simultaneously */
1766 found = search_one_database (argc - optind, &argv[optind],
1767 e, fp, filesize,
1768 ignore_case, print, basename_only,
1769 use_limit, &limits, stats,
1770 op_and, regex, regex_options);
1772 /* Close the databsase (even if it is stdin) */
1773 if (fclose (fp) == EOF)
1775 error (0, errno, "%s",
1776 quotearg_n_style(0, locale_quoting_style, e));
1777 return 0;
1781 if (just_count)
1783 printf("%ld\n", found);
1786 if (found || (use_limit && (limits.limit==0)) || stats )
1787 return 0;
1788 else
1789 return 1;
1792 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1793 static int
1794 open_secure_db(void)
1796 int fd, i;
1798 const char * secure_db_list[] =
1800 LOCATE_DB,
1801 "/var/lib/slocate/slocate.db",
1802 NULL
1804 for (i=0; secure_db_list[i]; ++i)
1806 fd = opendb(secure_db_list[i]);
1807 if (fd >= 0)
1809 selected_secure_db = secure_db_list[i];
1810 return fd;
1813 return -1;
1817 main (int argc, char **argv)
1819 int dbfd = open_secure_db();
1820 drop_privs();
1822 return dolocate(argc, argv, dbfd);