Eliminated a few compiler warnings
[findutils.git] / locate / locate.c
blob0249cde77a86aaa6a0b8aac0cc2e67f1b71967c0
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <signal.h>
66 #include <ctype.h>
67 #include <sys/types.h>
68 #include <grp.h> /* for setgroups() */
69 #include <sys/stat.h>
70 #include <time.h>
71 #include <fnmatch.h>
72 #include <getopt.h>
73 #include <xstrtol.h>
75 /* The presence of unistd.h is assumed by gnulib these days, so we
76 * might as well assume it too.
78 /* We need <unistd.h> for isatty(). */
79 #include <unistd.h>
81 #if HAVE_FCNTL_H
82 /* We use fcntl() */
83 #include <fcntl.h>
84 #endif
86 #define NDEBUG
87 #include <assert.h>
89 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
90 #include <string.h>
91 #else
92 #include <strings.h>
93 #define strchr index
94 #endif
96 #ifdef STDC_HEADERS
97 #include <stdlib.h>
98 #endif
100 #ifdef HAVE_ERRNO_H
101 #include <errno.h>
102 #else
103 extern int errno;
104 #endif
106 #ifdef HAVE_LOCALE_H
107 #include <locale.h>
108 #endif
110 #if ENABLE_NLS
111 # include <libintl.h>
112 # define _(Text) gettext (Text)
113 #else
114 # define _(Text) Text
115 #define textdomain(Domain)
116 #define bindtextdomain(Package, Directory)
117 #endif
118 #ifdef gettext_noop
119 # define N_(String) gettext_noop (String)
120 #else
121 /* We used to use (String) instead of just String, but apparentl;y ISO C
122 * doesn't allow this (at least, that's what HP said when someone reported
123 * this as a compiler bug). This is HP case number 1205608192. See
124 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
125 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
126 * like: static const char buf[] = ("string");
128 # define N_(String) String
129 #endif
131 #include "locatedb.h"
132 #include <getline.h>
133 #include "../gnulib/lib/xalloc.h"
134 #include "../gnulib/lib/error.h"
135 #include "../gnulib/lib/human.h"
136 #include "dirname.h"
137 #include "closeout.h"
138 #include "nextelem.h"
139 #include "regex.h"
140 #include "quote.h"
141 #include "quotearg.h"
142 #include "printquoted.h"
143 #include "regextype.h"
146 /* Note that this evaluates C many times. */
147 #ifdef _LIBC
148 # define TOUPPER(Ch) toupper (Ch)
149 # define TOLOWER(Ch) tolower (Ch)
150 #else
151 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
152 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
153 #endif
155 /* typedef enum {false, true} boolean; */
157 /* Warn if a database is older than this. 8 days allows for a weekly
158 update that takes up to a day to perform. */
159 static unsigned int warn_number_units = 8;
161 /* Printable name of units used in WARN_SECONDS */
162 static const char warn_name_units[] = N_("days");
163 #define SECONDS_PER_UNIT (60 * 60 * 24)
165 enum visit_result
167 VISIT_CONTINUE = 1, /* please call the next visitor */
168 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
169 VISIT_REJECTED = 4, /* rejected, process next file. */
170 VISIT_ABORT = 8 /* rejected, process no more files. */
173 enum ExistenceCheckType
175 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
176 ACCEPT_EXISTING, /* Corresponds to option -e */
177 ACCEPT_NON_EXISTING /* Corresponds to option -E */
180 /* Check for existence of files before printing them out? */
181 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
183 static int follow_symlinks = 1;
185 /* What to separate the results with. */
186 static int separator = '\n';
188 static struct quoting_options * quote_opts = NULL;
189 static bool stdout_is_a_tty;
190 static bool print_quoted_filename;
191 static bool results_were_filtered;
193 /* static char* slocate_db_pathname = "/var/lib/slocate/slocate.db"; */
195 static const char *selected_secure_db = NULL;
198 /* Change the number of days old the database can be
199 * before we complain about it.
201 static void
202 set_max_db_age(const char *s)
204 char *end;
205 unsigned long int val;
206 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
208 if (0 == *s)
210 error(1, 0,
211 _("The argument argument for option --max-database-age must not be empty"));
215 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
216 * we would not be able to tell if that is the correct answer, or whether it
217 * signifies an error.
219 errno = 0;
220 val = strtoul(s, &end, 10);
222 /* Diagnose number too large, non-numbes and trailing junk. */
223 if ((ULONG_MAX == val && ERANGE == errno) ||
224 (0 == val && EINVAL == errno))
226 error(1, errno,
227 _("Invalid argument `%s' for option --max-database-age"),
230 else if (*end)
232 /* errno wasn't set, don't print its message */
233 error(1, 0,
234 _("Invalid argument `%s' for option --max-database-age"),
237 else
239 warn_number_units = val;
245 /* Read in a 16-bit int, high byte first (network byte order). */
247 static short
248 get_short (FILE *fp)
251 register short x;
253 x = (signed char) fgetc (fp) << 8;
254 x |= (fgetc (fp) & 0xff);
255 return x;
258 const char * const metacharacters = "*?[]\\";
260 /* Return nonzero if S contains any shell glob characters.
262 static int
263 contains_metacharacter(const char *s)
265 if (NULL == strpbrk(s, metacharacters))
266 return 0;
267 else
268 return 1;
271 /* locate_read_str()
273 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
274 * until we reach DELIMITER or end-of-file. We reallocate the buffer
275 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
276 * is made regarding the content of the data (i.e. the implementation is
277 * 8-bit clean, the only delimiter is DELIMITER).
279 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
280 * has been removed from gnulib.
282 * We call the function locate_read_str() to avoid a name clash with the curses
283 * function getstr().
285 static int
286 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
288 char * p = NULL;
289 size_t sz = 0;
290 int nread;
291 size_t needed;
293 nread = getdelim(&p, &sz, delimiter, fp);
294 if (nread >= 0)
296 assert(p != NULL);
298 needed = offs + nread + 1u;
299 if (needed > (*siz))
301 char *pnew = realloc(*buf, needed);
302 if (NULL == pnew)
304 return -1; /* FAIL */
306 else
308 *siz = needed;
309 *buf = pnew;
312 memcpy((*buf)+offs, p, nread);
313 free(p);
315 return nread;
319 static void
320 lc_strcpy(char *dest, const char *src)
322 while (*src)
324 *dest++ = TOLOWER(*src);
325 ++src;
327 *dest = 0;
330 struct locate_limits
332 uintmax_t limit;
333 uintmax_t items_accepted;
335 static struct locate_limits limits;
338 struct locate_stats
340 uintmax_t compressed_bytes;
341 uintmax_t total_filename_count;
342 uintmax_t total_filename_length;
343 uintmax_t whitespace_count;
344 uintmax_t newline_count;
345 uintmax_t highbit_filename_count;
347 static struct locate_stats statistics;
350 struct stringbuf
352 char *buffer;
353 size_t buffersize;
354 size_t *preqlen;
356 static struct stringbuf casebuf;
359 struct casefolder
361 const char *pattern;
362 struct stringbuf *pbuf;
365 struct regular_expression
367 struct re_pattern_buffer regex; /* for --regex */
371 struct process_data
373 int c; /* An input byte. */
374 char itemcount; /* Indicates we're at the beginning of an slocate db. */
375 int count; /* The length of the prefix shared with the previous database entry. */
376 int len;
377 char *original_filename; /* The current input database entry. */
378 size_t pathsize; /* Amount allocated for it. */
379 char *munged_filename; /* path or base_name(path) */
380 FILE *fp; /* The pathname database. */
381 const char *dbfile; /* Its name, or "<stdin>" */
382 int slocatedb_format; /* Allows us to cope with slocate's format variant */
383 /* for the old database format,
384 the first and second characters of the most common bigrams. */
385 char bigram1[128];
386 char bigram2[128];
390 typedef int (*visitfunc)(struct process_data *procdata,
391 void *context);
393 struct visitor
395 visitfunc inspector;
396 void * context;
397 struct visitor *next;
401 static struct visitor *inspectors = NULL;
402 static struct visitor *lastinspector = NULL;
403 static struct visitor *past_pat_inspector = NULL;
405 /* 0 or 1 pattern(s) */
406 static int
407 process_simple(struct process_data *procdata)
409 int result = VISIT_CONTINUE;
410 const struct visitor *p = inspectors;
412 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
414 result = (p->inspector)(procdata, p->context);
415 p = p->next;
418 return result;
421 /* Accept if any pattern matches. */
422 static int
423 process_or (struct process_data *procdata)
425 int result = VISIT_CONTINUE;
426 const struct visitor *p = inspectors;
428 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
430 result = (p->inspector)(procdata, p->context);
431 p = p->next;
434 if (result == VISIT_CONTINUE)
435 result = VISIT_REJECTED;
436 if (result & (VISIT_ABORT | VISIT_REJECTED))
437 return result;
439 p = past_pat_inspector;
440 result = VISIT_CONTINUE;
442 while ( (VISIT_CONTINUE == result) && (NULL != p) )
444 result = (p->inspector)(procdata, p->context);
445 p = p->next;
448 if (VISIT_CONTINUE == result)
449 return VISIT_ACCEPTED;
450 else
451 return result;
454 /* Accept if all pattern match. */
455 static int
456 process_and (struct process_data *procdata)
458 int result = VISIT_CONTINUE;
459 const struct visitor *p = inspectors;
461 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
463 result = (p->inspector)(procdata, p->context);
464 p = p->next;
467 if (result == VISIT_CONTINUE)
468 result = VISIT_REJECTED;
469 if (result & (VISIT_ABORT | VISIT_REJECTED))
470 return result;
472 p = past_pat_inspector;
473 result = VISIT_CONTINUE;
475 while ( (VISIT_CONTINUE == result) && (NULL != p) )
477 result = (p->inspector)(procdata, p->context);
478 p = p->next;
481 if (VISIT_CONTINUE == result)
482 return VISIT_ACCEPTED;
483 else
484 return result;
487 typedef int (*processfunc)(struct process_data *procdata);
489 static processfunc mainprocessor = NULL;
491 static void
492 add_visitor(visitfunc fn, void *context)
494 struct visitor *p = xmalloc(sizeof(struct visitor));
495 p->inspector = fn;
496 p->context = context;
497 p->next = NULL;
499 if (NULL == lastinspector)
501 lastinspector = inspectors = p;
503 else
505 lastinspector->next = p;
506 lastinspector = p;
512 static int
513 visit_justprint_quoted(struct process_data *procdata, void *context)
515 (void) context;
516 print_quoted (stdout, quote_opts, stdout_is_a_tty,
517 "%s",
518 procdata->original_filename);
519 putchar(separator);
520 return VISIT_CONTINUE;
523 static int
524 visit_justprint_unquoted(struct process_data *procdata, void *context)
526 (void) context;
527 fputs(procdata->original_filename, stdout);
528 putchar(separator);
529 return VISIT_CONTINUE;
532 static int
533 visit_old_format(struct process_data *procdata, void *context)
535 register char *s;
536 (void) context;
538 /* Get the offset in the path where this path info starts. */
539 if (procdata->c == LOCATEDB_OLD_ESCAPE)
540 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
541 else
542 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
544 /* Overlay the old path with the remainder of the new. */
545 for (s = procdata->original_filename + procdata->count;
546 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
547 if (procdata->c < 0200)
548 *s++ = procdata->c; /* An ordinary character. */
549 else
551 /* Bigram markers have the high bit set. */
552 procdata->c &= 0177;
553 *s++ = procdata->bigram1[procdata->c];
554 *s++ = procdata->bigram2[procdata->c];
556 *s-- = '\0';
558 procdata->munged_filename = procdata->original_filename;
560 return VISIT_CONTINUE;
564 static int
565 visit_locate02_format(struct process_data *procdata, void *context)
567 register char *s;
568 int nread;
569 (void) context;
571 if (procdata->slocatedb_format)
573 if (procdata->itemcount == 0)
575 ungetc(procdata->c, procdata->fp);
576 procdata->count = 0;
577 procdata->len = 0;
579 else if (procdata->itemcount == 1)
581 procdata->count = procdata->len-1;
583 else
585 if (procdata->c == LOCATEDB_ESCAPE)
586 procdata->count += (short)get_short (procdata->fp);
587 else if (procdata->c > 127)
588 procdata->count += procdata->c - 256;
589 else
590 procdata->count += procdata->c;
593 else
595 if (procdata->c == LOCATEDB_ESCAPE)
596 procdata->count += (short)get_short (procdata->fp);
597 else if (procdata->c > 127)
598 procdata->count += procdata->c - 256;
599 else
600 procdata->count += procdata->c;
603 if (procdata->count > procdata->len || procdata->count < 0)
605 /* This should not happen generally , but since we're
606 * reading in data which is outside our control, we
607 * cannot prevent it.
609 error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
612 /* Overlay the old path with the remainder of the new. */
613 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
614 procdata->fp, 0, procdata->count);
615 if (nread < 0)
616 return VISIT_ABORT;
617 procdata->c = getc (procdata->fp);
618 procdata->len = procdata->count + nread;
619 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
620 assert (s[0] != '\0');
621 assert (s[1] == '\0'); /* Our terminator. */
622 assert (s[2] == '\0'); /* Added by locate_read_str. */
624 procdata->munged_filename = procdata->original_filename;
626 if (procdata->slocatedb_format)
628 /* Don't increment indefinitely, it might overflow. */
629 if (procdata->itemcount < 6)
631 ++(procdata->itemcount);
636 return VISIT_CONTINUE;
639 static int
640 visit_basename(struct process_data *procdata, void *context)
642 (void) context;
643 procdata->munged_filename = base_name(procdata->original_filename);
645 return VISIT_CONTINUE;
649 static int
650 visit_casefold(struct process_data *procdata, void *context)
652 struct stringbuf *b = context;
654 if (*b->preqlen+1 > b->buffersize)
656 b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
657 b->buffersize = *b->preqlen+1;
659 lc_strcpy(b->buffer, procdata->munged_filename);
661 return VISIT_CONTINUE;
664 /* visit_existing_follow implements -L -e */
665 static int
666 visit_existing_follow(struct process_data *procdata, void *context)
668 struct stat st;
669 (void) context;
671 /* munged_filename has been converted in some way (to lower case,
672 * or is just the base name of the file), and original_filename has not.
673 * Hence only original_filename is still actually the name of the file
674 * whose existence we would need to check.
676 if (stat(procdata->original_filename, &st) != 0)
678 return VISIT_REJECTED;
680 else
682 return VISIT_CONTINUE;
686 /* visit_non_existing_follow implements -L -E */
687 static int
688 visit_non_existing_follow(struct process_data *procdata, void *context)
690 struct stat st;
691 (void) context;
693 /* munged_filename has been converted in some way (to lower case,
694 * or is just the base name of the file), and original_filename has not.
695 * Hence only original_filename is still actually the name of the file
696 * whose existence we would need to check.
698 if (stat(procdata->original_filename, &st) == 0)
700 return VISIT_REJECTED;
702 else
704 return VISIT_CONTINUE;
708 /* visit_existing_nofollow implements -P -e */
709 static int
710 visit_existing_nofollow(struct process_data *procdata, void *context)
712 struct stat st;
713 (void) context;
715 /* munged_filename has been converted in some way (to lower case,
716 * or is just the base name of the file), and original_filename has not.
717 * Hence only original_filename is still actually the name of the file
718 * whose existence we would need to check.
720 if (lstat(procdata->original_filename, &st) != 0)
722 return VISIT_REJECTED;
724 else
726 return VISIT_CONTINUE;
730 /* visit_non_existing_nofollow implements -P -E */
731 static int
732 visit_non_existing_nofollow(struct process_data *procdata, void *context)
734 struct stat st;
735 (void) context;
737 /* munged_filename has been converted in some way (to lower case,
738 * or is just the base name of the file), and original_filename has not.
739 * Hence only original_filename is still actually the name of the file
740 * whose existence we would need to check.
742 if (lstat(procdata->original_filename, &st) == 0)
744 return VISIT_REJECTED;
746 else
748 return VISIT_CONTINUE;
752 static int
753 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
755 const char *pattern = context;
757 if (NULL != strstr(procdata->munged_filename, pattern))
758 return VISIT_ACCEPTED;
759 else
760 return VISIT_REJECTED;
763 static int
764 visit_substring_match_casefold(struct process_data *procdata, void *context)
766 const struct casefolder * p = context;
767 const struct stringbuf * b = p->pbuf;
768 (void) procdata;
770 if (NULL != strstr(b->buffer, p->pattern))
771 return VISIT_ACCEPTED;
772 else
773 return VISIT_REJECTED;
777 static int
778 visit_globmatch_nofold(struct process_data *procdata, void *context)
780 const char *glob = context;
781 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
782 return VISIT_REJECTED;
783 else
784 return VISIT_ACCEPTED;
788 static int
789 visit_globmatch_casefold(struct process_data *procdata, void *context)
791 const char *glob = context;
792 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
793 return VISIT_REJECTED;
794 else
795 return VISIT_ACCEPTED;
799 static int
800 visit_regex(struct process_data *procdata, void *context)
802 struct regular_expression *p = context;
803 const size_t len = strlen(procdata->munged_filename);
805 int rv = re_search (&p->regex, procdata->munged_filename,
806 len, 0, len,
807 (struct re_registers *) NULL);
808 if (rv < 0)
810 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
812 else
814 return VISIT_ACCEPTED; /* match */
819 static int
820 visit_stats(struct process_data *procdata, void *context)
822 struct locate_stats *p = context;
823 size_t len = strlen(procdata->original_filename);
824 const char *s;
825 int highbit, whitespace, newline;
827 ++(p->total_filename_count);
828 p->total_filename_length += len;
830 highbit = whitespace = newline = 0;
831 for (s=procdata->original_filename; *s; ++s)
833 if ( (int)(*s) & 128 )
834 highbit = 1;
835 if ('\n' == *s)
837 newline = whitespace = 1;
839 else if (isspace((unsigned char)*s))
841 whitespace = 1;
845 if (highbit)
846 ++(p->highbit_filename_count);
847 if (whitespace)
848 ++(p->whitespace_count);
849 if (newline)
850 ++(p->newline_count);
852 return VISIT_CONTINUE;
856 static int
857 visit_limit(struct process_data *procdata, void *context)
859 struct locate_limits *p = context;
861 (void) procdata;
863 if (++p->items_accepted >= p->limit)
864 return VISIT_ABORT;
865 else
866 return VISIT_CONTINUE;
869 static int
870 visit_count(struct process_data *procdata, void *context)
872 struct locate_limits *p = context;
874 (void) procdata;
876 ++p->items_accepted;
877 return VISIT_CONTINUE;
880 /* Emit the statistics.
882 static void
883 print_stats(int argc, size_t database_file_size)
885 char hbuf[LONGEST_HUMAN_READABLE + 1];
887 printf(_("Locate database size: %s bytes\n"),
888 human_readable ((uintmax_t) database_file_size,
889 hbuf, human_ceiling, 1, 1));
891 printf( (results_were_filtered ?
892 _("Matching Filenames: %s ") :
893 _("All Filenames: %s ")),
894 human_readable (statistics.total_filename_count,
895 hbuf, human_ceiling, 1, 1));
896 printf(_("with a cumulative length of %s bytes"),
897 human_readable (statistics.total_filename_length,
898 hbuf, human_ceiling, 1, 1));
900 printf(_("\n\tof which %s contain whitespace, "),
901 human_readable (statistics.whitespace_count,
902 hbuf, human_ceiling, 1, 1));
903 printf(_("\n\t%s contain newline characters, "),
904 human_readable (statistics.newline_count,
905 hbuf, human_ceiling, 1, 1));
906 printf(_("\n\tand %s contain characters with the high bit set.\n"),
907 human_readable (statistics.highbit_filename_count,
908 hbuf, human_ceiling, 1, 1));
910 if (!argc)
912 if (results_were_filtered)
914 printf(_("Some filenames may have been filtered out, "
915 "so we cannot compute the compression ratio.\n"));
917 else
919 if (statistics.total_filename_length)
921 printf(_("Compression ratio %4.2f%%\n"),
922 100.0 * ((double)statistics.total_filename_length
923 - (double) database_file_size)
924 / (double) statistics.total_filename_length);
926 else
928 printf(_("Compression ratio is undefined\n"));
932 printf("\n");
936 * Return nonzero if the data we read in indicates that we are
937 * looking at a LOCATE02 locate database.
939 static int
940 looking_at_gnu_locatedb (const char *data, size_t len)
942 if (len < sizeof (LOCATEDB_MAGIC))
943 return 0;
944 else if (0 == memcmp (data, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
945 return 1; /* We saw the magic byte sequence */
946 else
947 return 0;
951 * Return nonzero if the data we read in indicates that we are
952 * looking at an slocate database.
954 static int
955 looking_at_slocate_locatedb (const char *filename,
956 const char *data,
957 size_t len,
958 int *seclevel)
960 assert(len <= 2);
962 if (len < 2)
964 return 0;
966 else
968 /* Check that the magic number is a one-byte string */
969 if (0 == data[1])
971 if (isdigit((unsigned char)data[0]))
973 /* looks promising. */
974 *seclevel = (data[0] - '0');
976 if (*seclevel > 1)
978 /* Hmm, well it's probably an slocate database
979 * of some awsomely huge security level, like 2.
980 * We don't know how to handle those.
982 error(0, 0,
983 _("locate database `%s' looks like an slocate "
984 "database but it seems to have security level %c, "
985 "which GNU findutils does not currently support"),
986 filename, data[1]);
987 return 1;
989 else
991 return 1;
994 else
996 /* Not a digit. */
997 return 0;
1000 else
1002 /* Definitely not slocate. */
1003 return 0;
1008 /* Print or count the entries in DBFILE that match shell globbing patterns in
1009 ARGV. Return the number of entries matched. */
1011 static unsigned long
1012 search_one_database (int argc,
1013 char **argv,
1014 const char *dbfile,
1015 FILE *fp,
1016 off_t filesize,
1017 int ignore_case,
1018 int enable_print,
1019 int basename_only,
1020 int use_limit,
1021 struct locate_limits *plimit,
1022 int stats,
1023 int op_and,
1024 int regex,
1025 int regex_options)
1027 char *pathpart; /* A pattern to consider. */
1028 int argn; /* Index to current pattern in argv. */
1029 int need_fold; /* Set when folding and any pattern is non-glob. */
1030 int nread; /* number of bytes read from an entry. */
1031 struct process_data procdata; /* Storage for data shared with visitors. */
1032 int slocate_seclevel;
1033 struct visitor* pvis; /* temp for determining past_pat_inspector. */
1034 const char *format_name;
1035 enum ExistenceCheckType do_check_existence;
1038 /* We may turn on existence checking for a given database.
1039 * We ensure that we can return to the previous behaviour
1040 * by using two variables, do_check_existence (which we act on)
1041 * and check_existence (whcih indicates the default before we
1042 * adjust it on the bassis of what kind of database we;re using
1044 do_check_existence = check_existence;
1047 if (ignore_case)
1048 regex_options |= RE_ICASE;
1050 procdata.len = procdata.count = 0;
1051 procdata.slocatedb_format = 0;
1052 procdata.itemcount = 0;
1054 procdata.dbfile = dbfile;
1055 procdata.fp = fp;
1057 /* Set up the inspection regime */
1058 inspectors = NULL;
1059 lastinspector = NULL;
1060 past_pat_inspector = NULL;
1061 results_were_filtered = false;
1063 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
1064 procdata.original_filename = xmalloc (procdata.pathsize);
1067 nread = fread (procdata.original_filename, 1, SLOCATE_DB_MAGIC_LEN,
1068 procdata.fp);
1069 slocate_seclevel = 0;
1070 if (looking_at_slocate_locatedb(procdata.dbfile,
1071 procdata.original_filename,
1072 nread,
1073 &slocate_seclevel))
1075 error(0, 0,
1076 _("`%s' is an slocate database. "
1077 "Support for these is new, expect problems for now "
1078 "(you are, after all, using the CVS code)."),
1079 procdata.dbfile);
1081 /* slocate also uses frcode, but with a different header.
1082 * We handle the header here and then work with the data
1083 * in the normal way.
1085 if (slocate_seclevel > 1)
1087 /* We don't know what those security levels mean,
1088 * so do nothing further
1090 return 0;
1092 else if (slocate_seclevel > 0)
1094 /* Don't show the filenames to the user if they don't exist.
1095 * Showing stats is safe since filenames are only counted
1096 * after the existence check
1098 if (ACCEPT_NON_EXISTING == check_existence)
1100 /* Do not allow the user to see a list of filenames that they
1101 * cannot stat().
1103 error(0, 0,
1104 _("You specified the -E option, but that option "
1105 "cannot be used with slocate-format databases "
1106 "with a non-zero security level. No results will be "
1107 "generated for this database.\n"));
1108 return 0;
1110 if (ACCEPT_EXISTING != do_check_existence)
1112 if (enable_print || stats)
1114 error(0, 0,
1115 _("`%s' is an slocate database. "
1116 "Turning on the '-e' option."),
1117 procdata.dbfile);
1119 do_check_existence = ACCEPT_EXISTING;
1122 add_visitor(visit_locate02_format, NULL);
1123 format_name = "slocate";
1124 procdata.slocatedb_format = 1;
1126 else
1128 int nread2;
1130 procdata.slocatedb_format = 0;
1131 nread2 = fread (procdata.original_filename+nread, 1, sizeof (LOCATEDB_MAGIC)-nread,
1132 procdata.fp);
1133 if (looking_at_gnu_locatedb(procdata.original_filename, nread+nread2))
1135 add_visitor(visit_locate02_format, NULL);
1136 format_name = "GNU LOCATE02";
1138 else /* Use the old format */
1140 int i;
1142 nread += nread2;
1143 /* Read the list of the most common bigrams in the database. */
1144 if (nread < 256)
1146 int more_read = fread (procdata.original_filename + nread, 1,
1147 256 - nread, procdata.fp);
1148 /* XXX: check more_read+nread! */
1151 for (i = 0; i < 128; i++)
1153 procdata.bigram1[i] = procdata.original_filename[i << 1];
1154 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
1156 format_name = "old";
1157 add_visitor(visit_old_format, NULL);
1161 if (basename_only)
1162 add_visitor(visit_basename, NULL);
1164 /* See if we need fold. */
1165 if (ignore_case && !regex)
1166 for ( argn = 0; argn < argc; argn++ )
1168 pathpart = argv[argn];
1169 if (!contains_metacharacter(pathpart))
1171 need_fold = 1;
1172 break;
1176 if (need_fold)
1178 add_visitor(visit_casefold, &casebuf);
1179 casebuf.preqlen = &procdata.pathsize;
1182 /* Add an inspector for each pattern we're looking for. */
1183 for ( argn = 0; argn < argc; argn++ )
1185 results_were_filtered = true;
1186 pathpart = argv[argn];
1187 if (regex)
1189 struct regular_expression *p = xmalloc(sizeof(*p));
1190 const char *error_message = NULL;
1192 memset (&p->regex, 0, sizeof (p->regex));
1194 re_set_syntax(regex_options);
1195 p->regex.allocated = 100;
1196 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
1197 p->regex.fastmap = NULL;
1198 p->regex.syntax = regex_options;
1199 p->regex.translate = NULL;
1201 error_message = re_compile_pattern (pathpart, strlen (pathpart),
1202 &p->regex);
1203 if (error_message)
1205 error (1, 0, "%s", error_message);
1207 else
1209 add_visitor(visit_regex, p);
1212 else if (contains_metacharacter(pathpart))
1214 if (ignore_case)
1215 add_visitor(visit_globmatch_casefold, pathpart);
1216 else
1217 add_visitor(visit_globmatch_nofold, pathpart);
1219 else
1221 /* No glob characters used. Hence we match on
1222 * _any part_ of the filename, not just the
1223 * basename. This seems odd to me, but it is the
1224 * traditional behaviour.
1225 * James Youngman <jay@gnu.org>
1227 if (ignore_case)
1229 struct casefolder * cf = xmalloc(sizeof(*cf));
1230 cf->pattern = pathpart;
1231 cf->pbuf = &casebuf;
1232 add_visitor(visit_substring_match_casefold, cf);
1233 /* If we ignore case, convert it to lower now so we don't have to
1234 * do it every time
1236 lc_strcpy(pathpart, pathpart);
1238 else
1240 add_visitor(visit_substring_match_nocasefold, pathpart);
1245 pvis = lastinspector;
1247 /* We add visit_existing_*() as late as possible to reduce the
1248 * number of stat() calls.
1250 switch (do_check_existence)
1252 case ACCEPT_EXISTING:
1253 results_were_filtered = true;
1254 if (follow_symlinks) /* -L, default */
1255 add_visitor(visit_existing_follow, NULL);
1256 else /* -P */
1257 add_visitor(visit_existing_nofollow, NULL);
1258 break;
1260 case ACCEPT_NON_EXISTING:
1261 results_were_filtered = true;
1262 if (follow_symlinks) /* -L, default */
1263 add_visitor(visit_non_existing_follow, NULL);
1264 else /* -P */
1265 add_visitor(visit_non_existing_nofollow, NULL);
1266 break;
1268 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1269 /* do nothing; no extra processing. */
1270 break;
1273 /* Security issue: The stats visitor must be added immediately
1274 * before the print visitor, because otherwise the -S option would
1275 * leak information about files that the caller cannot see.
1277 if (stats)
1278 add_visitor(visit_stats, &statistics);
1280 if (enable_print)
1282 if (print_quoted_filename)
1283 add_visitor(visit_justprint_quoted, NULL);
1284 else
1285 add_visitor(visit_justprint_unquoted, NULL);
1289 if (use_limit)
1290 add_visitor(visit_limit, plimit);
1291 else
1292 add_visitor(visit_count, plimit);
1295 if (argc > 1)
1297 past_pat_inspector = pvis->next;
1298 if (op_and)
1299 mainprocessor = process_and;
1300 else
1301 mainprocessor = process_or;
1303 else
1304 mainprocessor = process_simple;
1306 if (stats)
1308 printf(_("Database %s is in the %s format.\n"),
1309 procdata.dbfile,
1310 format_name);
1314 procdata.c = getc (procdata.fp);
1315 /* If we are searching for filename patterns, the inspector list
1316 * will contain an entry for each pattern for which we are searching.
1318 while ( (procdata.c != EOF) &&
1319 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1321 /* Do nothing; all the work is done in the visitor functions. */
1324 if (stats)
1326 if (filesize)
1327 print_stats(argc, filesize);
1330 if (ferror (procdata.fp))
1332 error (0, errno, "%s", procdata.dbfile);
1333 return 0;
1335 return plimit->items_accepted;
1341 extern char *version_string;
1343 /* The name this program was run with. */
1344 char *program_name;
1346 static void
1347 usage (FILE *stream)
1349 fprintf (stream, _("\
1350 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1351 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1352 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1353 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1354 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1355 [--max-database-age D] [-version] [--help]\n\
1356 pattern...\n"),
1357 program_name);
1358 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1360 enum
1362 REGEXTYPE_OPTION = CHAR_MAX + 1,
1363 MAX_DB_AGE
1367 static struct option const longopts[] =
1369 {"database", required_argument, NULL, 'd'},
1370 {"existing", no_argument, NULL, 'e'},
1371 {"non-existing", no_argument, NULL, 'E'},
1372 {"ignore-case", no_argument, NULL, 'i'},
1373 {"all", no_argument, NULL, 'A'},
1374 {"help", no_argument, NULL, 'h'},
1375 {"version", no_argument, NULL, 'v'},
1376 {"null", no_argument, NULL, '0'},
1377 {"count", no_argument, NULL, 'c'},
1378 {"wholename", no_argument, NULL, 'w'},
1379 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1380 {"basename", no_argument, NULL, 'b'},
1381 {"print", no_argument, NULL, 'p'},
1382 {"stdio", no_argument, NULL, 's'},
1383 {"mmap", no_argument, NULL, 'm'},
1384 {"limit", required_argument, NULL, 'l'},
1385 {"regex", no_argument, NULL, 'r'},
1386 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1387 {"statistics", no_argument, NULL, 'S'},
1388 {"follow", no_argument, NULL, 'L'},
1389 {"nofollow", no_argument, NULL, 'P'},
1390 {"max-database-age", required_argument, NULL, MAX_DB_AGE},
1391 {NULL, no_argument, NULL, 0}
1395 static int
1396 drop_privs(void)
1398 const char * what = "failed";
1399 uid_t orig_euid = geteuid();
1401 /* Use of setgroups() is restrcted to root only. */
1402 if (0 == orig_euid)
1404 gid_t groups[1];
1405 groups[1] = getgid();
1406 if (0 != setgroups(1, groups))
1408 what = _("failed to drop group privileges");
1409 goto fail;
1413 if (0 != setuid(getuid()))
1415 what = _("failed to drop setuid privileges");
1416 goto fail;
1419 /* Defend against the case where the attacker runs us with the
1420 * capability to call setuid() turned off, which on some systems
1421 * will cause the above attempt to drop privileges fail (leaving us
1422 * privileged).
1424 if (0 == setuid(0))
1426 what = _("Failed to drop privileges");
1427 goto fail;
1430 /* success. */
1431 return 0;
1433 fail:
1434 error(1, errno, "%s", what);
1435 abort();
1436 kill(0, SIGKILL);
1437 _exit(1);
1438 /*NOTREACHED*/
1439 /* ... we hope. */
1440 for (;;)
1442 /* deliberate infinite loop */
1446 static int
1447 opendb(const char *name)
1449 int fd = open(name, O_RDONLY
1450 #if defined(O_LARGEFILE)
1451 |O_LARGEFILE
1452 #endif
1454 if (fd >= 0)
1456 /* Make sure it won't survive an exec */
1457 if (0 != fcntl(fd, F_SETFD, FD_CLOEXEC))
1459 close(fd);
1460 fd = -1;
1463 return fd;
1467 dolocate (int argc, char **argv, int secure_db_fd)
1469 char *dbpath;
1470 unsigned long int found = 0uL;
1471 int optc;
1472 int ignore_case = 0;
1473 int print = 0;
1474 int just_count = 0;
1475 int basename_only = 0;
1476 int use_limit = 0;
1477 int regex = 0;
1478 int regex_options = RE_SYNTAX_EMACS;
1479 int stats = 0;
1480 int op_and = 0;
1481 const char *e;
1482 FILE *fp;
1483 int they_chose_db = 0;
1484 bool did_stdin = false; /* Set to prevent rereading stdin. */
1486 program_name = argv[0];
1488 #ifdef HAVE_SETLOCALE
1489 setlocale (LC_ALL, "");
1490 #endif
1491 bindtextdomain (PACKAGE, LOCALEDIR);
1492 textdomain (PACKAGE);
1493 atexit (close_stdout);
1495 limits.limit = 0;
1496 limits.items_accepted = 0;
1498 quote_opts = clone_quoting_options (NULL);
1499 print_quoted_filename = true;
1501 /* We cannot simultaneously trust $LOCATE_PATH and use the
1502 * setuid-access-controlled database,, since that could cause a leak
1503 * of private data.
1505 dbpath = getenv ("LOCATE_PATH");
1506 if (dbpath)
1508 they_chose_db = 1;
1511 check_existence = ACCEPT_EITHER;
1513 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1514 switch (optc)
1516 case '0':
1517 separator = 0;
1518 print_quoted_filename = false; /* print filename 'raw'. */
1519 break;
1521 case 'A':
1522 op_and = 1;
1523 break;
1525 case 'b':
1526 basename_only = 1;
1527 break;
1529 case 'c':
1530 just_count = 1;
1531 break;
1533 case 'd':
1534 dbpath = optarg;
1535 they_chose_db = 1;
1536 break;
1538 case 'e':
1539 check_existence = ACCEPT_EXISTING;
1540 break;
1542 case 'E':
1543 check_existence = ACCEPT_NON_EXISTING;
1544 break;
1546 case 'i':
1547 ignore_case = 1;
1548 break;
1550 case 'h':
1551 usage (stdout);
1552 return 0;
1554 case MAX_DB_AGE:
1555 /* XXX: nothing in the test suite for this option. */
1556 set_max_db_age(optarg);
1557 break;
1559 case 'p':
1560 print = 1;
1561 break;
1563 case 'v':
1564 printf (_("GNU locate version %s\n"), version_string);
1565 return 0;
1567 case 'w':
1568 basename_only = 0;
1569 break;
1571 case 'r':
1572 regex = 1;
1573 break;
1575 case REGEXTYPE_OPTION:
1576 regex_options = get_regex_type(optarg);
1577 break;
1579 case 'S':
1580 stats = 1;
1581 break;
1583 case 'L':
1584 follow_symlinks = 1;
1585 break;
1587 /* In find, -P and -H differ in the way they handle paths
1588 * given on the command line. This is not relevant for
1589 * locate, but the -H option is supported because it is
1590 * probably more intuitive to do so.
1592 case 'P':
1593 case 'H':
1594 follow_symlinks = 0;
1595 break;
1597 case 'l':
1599 char *end = optarg;
1600 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1601 if (LONGINT_OK != err)
1603 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1605 use_limit = 1;
1607 break;
1609 case 's': /* use stdio */
1610 case 'm': /* use mmap */
1611 /* These options are implemented simply for
1612 * compatibility with FreeBSD
1614 break;
1616 default:
1617 usage (stderr);
1618 return 1;
1622 /* If the user gave the -d option or set LOCATE_PATH,
1623 * relinquish access to the secure database.
1625 if (they_chose_db)
1627 if (secure_db_fd >= 0)
1629 close(secure_db_fd);
1630 secure_db_fd = 0;
1634 if (!just_count && !stats)
1635 print = 1;
1637 if (stats)
1639 if (optind == argc)
1640 use_limit = 0;
1642 else
1644 if (!just_count && optind == argc)
1646 usage (stderr);
1647 return 1;
1652 if (1 == isatty(STDOUT_FILENO))
1653 stdout_is_a_tty = true;
1654 else
1655 stdout_is_a_tty = false;
1657 if (they_chose_db)
1658 next_element (dbpath, 0); /* Initialize. */
1660 /* Bail out early if limit already reached. */
1661 while (!use_limit || limits.limit > limits.items_accepted)
1663 struct stat st;
1664 int fd;
1665 off_t filesize;
1667 statistics.compressed_bytes =
1668 statistics.total_filename_count =
1669 statistics.total_filename_length =
1670 statistics.whitespace_count =
1671 statistics.newline_count =
1672 statistics.highbit_filename_count = 0u;
1674 if (they_chose_db)
1676 /* Take the next element from the list of databases */
1677 e = next_element ((char *) NULL, 0);
1678 if (NULL == e)
1679 break;
1681 if (0 == strcmp (e, "-"))
1683 if (did_stdin)
1685 error (0, 0,
1686 _("warning: the locate database can only be read from stdin once."));
1687 return 0;
1689 else
1691 e = "<stdin>";
1692 fd = 0;
1693 did_stdin = true;
1696 else
1698 if (0 == strlen(e) || 0 == strcmp(e, "."))
1700 e = LOCATE_DB;
1703 /* open the database */
1704 fd = opendb(e);
1705 if (fd < 0)
1707 error (0, errno, "%s", e);
1708 return 0;
1712 else
1714 if (-1 == secure_db_fd)
1716 /* Already searched the database, it's time to exit the loop */
1717 break;
1719 else
1721 e = selected_secure_db;
1722 fd = secure_db_fd;
1723 secure_db_fd = -1;
1727 /* Check the database to see if it is old. */
1728 if (fstat(fd, &st))
1730 error (0, errno, "%s", e);
1731 /* continue anyway */
1732 filesize = (off_t)0;
1734 else
1736 time_t now;
1738 filesize = st.st_size;
1740 if ((time_t)-1 == time(&now))
1742 /* If we can't tell the time, we don't know how old the
1743 * database is. But since the message is just advisory,
1744 * we continue anyway.
1746 error (0, errno, "time system call");
1748 else
1750 double age = difftime(now, st.st_mtime);
1751 double warn_seconds = SECONDS_PER_UNIT * warn_number_units;
1752 if (age > warn_seconds)
1754 /* For example:
1755 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1756 error (0, 0,
1757 _("warning: database `%s' is more than %d %s old (actual age is %.1f %s)"),
1759 warn_number_units, _(warn_name_units),
1760 (age/(double)SECONDS_PER_UNIT), _(warn_name_units));
1765 fp = fdopen(fd, "r");
1766 if (NULL == fp)
1768 error (0, errno, "%s", e);
1769 return 0;
1772 /* Search this database for all patterns simultaneously */
1773 found = search_one_database (argc - optind, &argv[optind],
1774 e, fp, filesize,
1775 ignore_case, print, basename_only,
1776 use_limit, &limits, stats,
1777 op_and, regex, regex_options);
1779 /* Close the databsase (even if it is stdin) */
1780 if (fclose (fp) == EOF)
1782 error (0, errno, "%s", e);
1783 return 0;
1787 if (just_count)
1789 printf("%ld\n", found);
1792 if (found || (use_limit && (limits.limit==0)) || stats )
1793 return 0;
1794 else
1795 return 1;
1798 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1799 static int
1800 open_secure_db(void)
1802 int fd, i;
1804 const char * secure_db_list[] =
1806 LOCATE_DB,
1807 "/var/lib/slocate/slocate.db",
1808 NULL
1810 for (i=0; secure_db_list[i]; ++i)
1812 fd = opendb(secure_db_list[i]);
1813 if (fd >= 0)
1815 selected_secure_db = secure_db_list[i];
1816 return fd;
1819 return -1;
1823 main (int argc, char **argv)
1825 int dbfd = open_secure_db();
1826 drop_privs();
1828 return dolocate(argc, argv, dbfd);