1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
67 #include <sys/types.h>
68 #include <grp.h> /* for setgroups() */
75 /* The presence of unistd.h is assumed by gnulib these days, so we
76 * might as well assume it too.
78 /* We need <unistd.h> for isatty(). */
106 # include <libintl.h>
107 # define _(Text) gettext (Text)
109 # define _(Text) Text
110 #define textdomain(Domain)
111 #define bindtextdomain(Package, Directory)
114 # define N_(String) gettext_noop (String)
116 /* We used to use (String) instead of just String, but apparently ISO C
117 * doesn't allow this (at least, that's what HP said when someone reported
118 * this as a compiler bug). This is HP case number 1205608192. See
119 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
120 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
121 * like: static const char buf[] = ("string");
123 # define N_(String) String
126 #include "locatedb.h"
128 #include "../gnulib/lib/xalloc.h"
129 #include "../gnulib/lib/error.h"
130 #include "../gnulib/lib/human.h"
132 #include "closeout.h"
133 #include "nextelem.h"
136 #include "quotearg.h"
137 #include "printquoted.h"
138 #include "regextype.h"
139 #include "gnulib-version.h"
141 /* Note that this evaluates Ch many times. */
143 # define TOUPPER(Ch) toupper (Ch)
144 # define TOLOWER(Ch) tolower (Ch)
146 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
147 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
150 /* typedef enum {false, true} boolean; */
152 /* Warn if a database is older than this. 8 days allows for a weekly
153 update that takes up to a day to perform. */
154 static unsigned int warn_number_units
= 8;
156 /* Printable name of units used in WARN_SECONDS */
157 static const char warn_name_units
[] = N_("days");
158 #define SECONDS_PER_UNIT (60 * 60 * 24)
162 VISIT_CONTINUE
= 1, /* please call the next visitor */
163 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
164 VISIT_REJECTED
= 4, /* rejected, process next file. */
165 VISIT_ABORT
= 8 /* rejected, process no more files. */
168 enum ExistenceCheckType
170 ACCEPT_EITHER
, /* Corresponds to lack of -E/-e option */
171 ACCEPT_EXISTING
, /* Corresponds to option -e */
172 ACCEPT_NON_EXISTING
/* Corresponds to option -E */
175 /* Check for existence of files before printing them out? */
176 enum ExistenceCheckType check_existence
= ACCEPT_EITHER
;
178 static int follow_symlinks
= 1;
180 /* What to separate the results with. */
181 static int separator
= '\n';
183 static struct quoting_options
* quote_opts
= NULL
;
184 static bool stdout_is_a_tty
;
185 static bool print_quoted_filename
;
186 static bool results_were_filtered
;
188 static const char *selected_secure_db
= NULL
;
191 /* Change the number of days old the database can be
192 * before we complain about it.
195 set_max_db_age(const char *s
)
198 unsigned long int val
;
199 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
204 _("The argument argument for option --max-database-age must not be empty"));
208 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
209 * we would not be able to tell if that is the correct answer, or whether it
210 * signifies an error.
213 val
= strtoul(s
, &end
, 10);
215 /* Diagnose number too large, non-numbes and trailing junk. */
216 if ((ULONG_MAX
== val
&& ERANGE
== errno
) ||
217 (0 == val
&& EINVAL
== errno
))
220 _("Invalid argument %s for option --max-database-age"),
221 quotearg_n_style(0, locale_quoting_style
, s
));
225 /* errno wasn't set, don't print its message */
227 _("Invalid argument %s for option --max-database-age"),
228 quotearg_n_style(0, locale_quoting_style
, s
));
232 warn_number_units
= val
;
238 /* Read in a 16-bit int, high byte first (network byte order). */
246 x
= (signed char) fgetc (fp
) << 8;
247 x
|= (fgetc (fp
) & 0xff);
251 const char * const metacharacters
= "*?[]\\";
253 /* Return nonzero if S contains any shell glob characters.
256 contains_metacharacter(const char *s
)
258 if (NULL
== strpbrk(s
, metacharacters
))
266 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
267 * until we reach DELIMITER or end-of-file. We reallocate the buffer
268 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
269 * is made regarding the content of the data (i.e. the implementation is
270 * 8-bit clean, the only delimiter is DELIMITER).
272 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
273 * has been removed from gnulib.
275 * We call the function locate_read_str() to avoid a name clash with the curses
279 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
286 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
291 needed
= offs
+ nread
+ 1u;
294 char *pnew
= realloc(*buf
, needed
);
297 return -1; /* FAIL */
305 memcpy((*buf
)+offs
, p
, nread
);
315 uintmax_t items_accepted
;
317 static struct locate_limits limits
;
322 uintmax_t compressed_bytes
;
323 uintmax_t total_filename_count
;
324 uintmax_t total_filename_length
;
325 uintmax_t whitespace_count
;
326 uintmax_t newline_count
;
327 uintmax_t highbit_filename_count
;
329 static struct locate_stats statistics
;
332 struct regular_expression
334 struct re_pattern_buffer regex
; /* for --regex */
340 int c
; /* An input byte. */
341 char itemcount
; /* Indicates we're at the beginning of an slocate db. */
342 int count
; /* The length of the prefix shared with the previous database entry. */
344 char *original_filename
; /* The current input database entry. */
345 size_t pathsize
; /* Amount allocated for it. */
346 char *munged_filename
; /* path or base_name(path) */
347 FILE *fp
; /* The pathname database. */
348 const char *dbfile
; /* Its name, or "<stdin>" */
349 int slocatedb_format
; /* Allows us to cope with slocate's format variant */
350 /* for the old database format,
351 the first and second characters of the most common bigrams. */
357 typedef int (*visitfunc
)(struct process_data
*procdata
,
364 struct visitor
*next
;
368 static struct visitor
*inspectors
= NULL
;
369 static struct visitor
*lastinspector
= NULL
;
370 static struct visitor
*past_pat_inspector
= NULL
;
372 static inline int visit(const struct visitor
*p
,
374 struct process_data
*procdata
,
375 const struct visitor
* const stop
)
377 register int result
= accept_flags
;
378 while ( (accept_flags
& result
) && (stop
!= p
) )
380 result
= (p
->inspector
)(procdata
, p
->context
);
386 /* 0 or 1 pattern(s) */
388 process_simple(struct process_data
*procdata
)
390 return visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, NULL
);
393 /* Accept if any pattern matches. */
395 process_or (struct process_data
*procdata
)
399 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_REJECTED
), procdata
, past_pat_inspector
);
400 if (result
== VISIT_CONTINUE
)
401 result
= VISIT_REJECTED
;
402 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
405 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
406 if (VISIT_CONTINUE
== result
)
407 return VISIT_ACCEPTED
;
412 /* Accept if all pattern match. */
414 process_and (struct process_data
*procdata
)
418 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, past_pat_inspector
);
419 if (result
== VISIT_CONTINUE
)
420 result
= VISIT_REJECTED
;
421 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
424 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
425 if (VISIT_CONTINUE
== result
)
426 return VISIT_ACCEPTED
;
431 typedef int (*processfunc
)(struct process_data
*procdata
);
433 static processfunc mainprocessor
= NULL
;
436 add_visitor(visitfunc fn
, void *context
)
438 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
440 p
->context
= context
;
443 if (NULL
== lastinspector
)
445 lastinspector
= inspectors
= p
;
449 lastinspector
->next
= p
;
457 visit_justprint_quoted(struct process_data
*procdata
, void *context
)
460 print_quoted (stdout
, quote_opts
, stdout_is_a_tty
,
462 procdata
->original_filename
);
464 return VISIT_CONTINUE
;
468 visit_justprint_unquoted(struct process_data
*procdata
, void *context
)
471 fputs(procdata
->original_filename
, stdout
);
473 return VISIT_CONTINUE
;
477 visit_old_format(struct process_data
*procdata
, void *context
)
482 /* Get the offset in the path where this path info starts. */
483 if (procdata
->c
== LOCATEDB_OLD_ESCAPE
)
484 procdata
->count
+= getw (procdata
->fp
) - LOCATEDB_OLD_OFFSET
;
486 procdata
->count
+= procdata
->c
- LOCATEDB_OLD_OFFSET
;
488 /* Overlay the old path with the remainder of the new. */
489 for (s
= procdata
->original_filename
+ procdata
->count
;
490 (procdata
->c
= getc (procdata
->fp
)) > LOCATEDB_OLD_ESCAPE
;)
491 if (procdata
->c
< 0200)
492 *s
++ = procdata
->c
; /* An ordinary character. */
495 /* Bigram markers have the high bit set. */
497 *s
++ = procdata
->bigram1
[procdata
->c
];
498 *s
++ = procdata
->bigram2
[procdata
->c
];
502 procdata
->munged_filename
= procdata
->original_filename
;
504 return VISIT_CONTINUE
;
509 visit_locate02_format(struct process_data
*procdata
, void *context
)
515 if (procdata
->slocatedb_format
)
517 if (procdata
->itemcount
== 0)
519 ungetc(procdata
->c
, procdata
->fp
);
523 else if (procdata
->itemcount
== 1)
525 procdata
->count
= procdata
->len
-1;
529 if (procdata
->c
== LOCATEDB_ESCAPE
)
530 procdata
->count
+= (short)get_short (procdata
->fp
);
531 else if (procdata
->c
> 127)
532 procdata
->count
+= procdata
->c
- 256;
534 procdata
->count
+= procdata
->c
;
539 if (procdata
->c
== LOCATEDB_ESCAPE
)
540 procdata
->count
+= (short)get_short (procdata
->fp
);
541 else if (procdata
->c
> 127)
542 procdata
->count
+= procdata
->c
- 256;
544 procdata
->count
+= procdata
->c
;
547 if (procdata
->count
> procdata
->len
|| procdata
->count
< 0)
549 /* This should not happen generally , but since we're
550 * reading in data which is outside our control, we
553 error(1, 0, _("locate database %s is corrupt or invalid"),
554 quotearg_n_style(0, locale_quoting_style
, procdata
->dbfile
));
557 /* Overlay the old path with the remainder of the new. */
558 nread
= locate_read_str (&procdata
->original_filename
,
560 procdata
->fp
, 0, procdata
->count
);
563 procdata
->c
= getc (procdata
->fp
);
564 procdata
->len
= procdata
->count
+ nread
;
565 s
= procdata
->original_filename
+ procdata
->len
- 1; /* Move to the last char in path. */
566 assert (s
[0] != '\0');
567 assert (s
[1] == '\0'); /* Our terminator. */
568 assert (s
[2] == '\0'); /* Added by locate_read_str. */
570 procdata
->munged_filename
= procdata
->original_filename
;
572 if (procdata
->slocatedb_format
)
574 /* Don't increment indefinitely, it might overflow. */
575 if (procdata
->itemcount
< 6)
577 ++(procdata
->itemcount
);
582 return VISIT_CONTINUE
;
586 visit_basename(struct process_data
*procdata
, void *context
)
589 procdata
->munged_filename
= base_name(procdata
->original_filename
);
591 return VISIT_CONTINUE
;
595 /* visit_existing_follow implements -L -e */
597 visit_existing_follow(struct process_data
*procdata
, void *context
)
602 /* munged_filename has been converted in some way (to lower case,
603 * or is just the base name of the file), and original_filename has not.
604 * Hence only original_filename is still actually the name of the file
605 * whose existence we would need to check.
607 if (stat(procdata
->original_filename
, &st
) != 0)
609 return VISIT_REJECTED
;
613 return VISIT_CONTINUE
;
617 /* visit_non_existing_follow implements -L -E */
619 visit_non_existing_follow(struct process_data
*procdata
, void *context
)
624 /* munged_filename has been converted in some way (to lower case,
625 * or is just the base name of the file), and original_filename has not.
626 * Hence only original_filename is still actually the name of the file
627 * whose existence we would need to check.
629 if (stat(procdata
->original_filename
, &st
) == 0)
631 return VISIT_REJECTED
;
635 return VISIT_CONTINUE
;
639 /* visit_existing_nofollow implements -P -e */
641 visit_existing_nofollow(struct process_data
*procdata
, void *context
)
646 /* munged_filename has been converted in some way (to lower case,
647 * or is just the base name of the file), and original_filename has not.
648 * Hence only original_filename is still actually the name of the file
649 * whose existence we would need to check.
651 if (lstat(procdata
->original_filename
, &st
) != 0)
653 return VISIT_REJECTED
;
657 return VISIT_CONTINUE
;
661 /* visit_non_existing_nofollow implements -P -E */
663 visit_non_existing_nofollow(struct process_data
*procdata
, void *context
)
668 /* munged_filename has been converted in some way (to lower case,
669 * or is just the base name of the file), and original_filename has not.
670 * Hence only original_filename is still actually the name of the file
671 * whose existence we would need to check.
673 if (lstat(procdata
->original_filename
, &st
) == 0)
675 return VISIT_REJECTED
;
679 return VISIT_CONTINUE
;
684 visit_substring_match_nocasefold_wide(struct process_data
*procdata
, void *context
)
686 const char *pattern
= context
;
688 if (NULL
!= mbsstr(procdata
->munged_filename
, pattern
))
689 return VISIT_ACCEPTED
;
691 return VISIT_REJECTED
;
695 visit_substring_match_nocasefold_narrow(struct process_data
*procdata
, void *context
)
697 const char *pattern
= context
;
698 assert(MB_CUR_MAX
== 1);
699 if (NULL
!= strstr(procdata
->munged_filename
, pattern
))
700 return VISIT_ACCEPTED
;
702 return VISIT_REJECTED
;
706 visit_substring_match_casefold_wide(struct process_data
*procdata
, void *context
)
708 const char *pattern
= context
;
710 if (NULL
!= mbscasestr(procdata
->munged_filename
, pattern
))
711 return VISIT_ACCEPTED
;
713 return VISIT_REJECTED
;
718 visit_substring_match_casefold_narrow(struct process_data
*procdata
, void *context
)
720 const char *pattern
= context
;
722 assert(MB_CUR_MAX
== 1);
723 if (NULL
!= strcasestr(procdata
->munged_filename
, pattern
))
724 return VISIT_ACCEPTED
;
726 return VISIT_REJECTED
;
731 visit_globmatch_nofold(struct process_data
*procdata
, void *context
)
733 const char *glob
= context
;
734 if (fnmatch(glob
, procdata
->munged_filename
, 0) != 0)
735 return VISIT_REJECTED
;
737 return VISIT_ACCEPTED
;
742 visit_globmatch_casefold(struct process_data
*procdata
, void *context
)
744 const char *glob
= context
;
745 if (fnmatch(glob
, procdata
->munged_filename
, FNM_CASEFOLD
) != 0)
746 return VISIT_REJECTED
;
748 return VISIT_ACCEPTED
;
753 visit_regex(struct process_data
*procdata
, void *context
)
755 struct regular_expression
*p
= context
;
756 const size_t len
= strlen(procdata
->munged_filename
);
758 int rv
= re_search (&p
->regex
, procdata
->munged_filename
,
760 (struct re_registers
*) NULL
);
763 return VISIT_REJECTED
; /* no match (-1), or internal error (-2) */
767 return VISIT_ACCEPTED
; /* match */
773 visit_stats(struct process_data
*procdata
, void *context
)
775 struct locate_stats
*p
= context
;
776 size_t len
= strlen(procdata
->original_filename
);
778 int highbit
, whitespace
, newline
;
780 ++(p
->total_filename_count
);
781 p
->total_filename_length
+= len
;
783 highbit
= whitespace
= newline
= 0;
784 for (s
=procdata
->original_filename
; *s
; ++s
)
786 if ( (int)(*s
) & 128 )
790 newline
= whitespace
= 1;
792 else if (isspace((unsigned char)*s
))
799 ++(p
->highbit_filename_count
);
801 ++(p
->whitespace_count
);
803 ++(p
->newline_count
);
805 return VISIT_CONTINUE
;
810 visit_limit(struct process_data
*procdata
, void *context
)
812 struct locate_limits
*p
= context
;
816 if (++p
->items_accepted
>= p
->limit
)
819 return VISIT_CONTINUE
;
823 visit_count(struct process_data
*procdata
, void *context
)
825 struct locate_limits
*p
= context
;
830 return VISIT_CONTINUE
;
833 /* Emit the statistics.
836 print_stats(int argc
, size_t database_file_size
)
838 char hbuf
[LONGEST_HUMAN_READABLE
+ 1];
840 printf(_("Locate database size: %s bytes\n"),
841 human_readable ((uintmax_t) database_file_size
,
842 hbuf
, human_ceiling
, 1, 1));
844 printf( (results_were_filtered
?
845 _("Matching Filenames: %s ") :
846 _("All Filenames: %s ")),
847 human_readable (statistics
.total_filename_count
,
848 hbuf
, human_ceiling
, 1, 1));
849 printf(_("with a cumulative length of %s bytes"),
850 human_readable (statistics
.total_filename_length
,
851 hbuf
, human_ceiling
, 1, 1));
853 printf(_("\n\tof which %s contain whitespace, "),
854 human_readable (statistics
.whitespace_count
,
855 hbuf
, human_ceiling
, 1, 1));
856 printf(_("\n\t%s contain newline characters, "),
857 human_readable (statistics
.newline_count
,
858 hbuf
, human_ceiling
, 1, 1));
859 printf(_("\n\tand %s contain characters with the high bit set.\n"),
860 human_readable (statistics
.highbit_filename_count
,
861 hbuf
, human_ceiling
, 1, 1));
865 if (results_were_filtered
)
867 printf(_("Some filenames may have been filtered out, "
868 "so we cannot compute the compression ratio.\n"));
872 if (statistics
.total_filename_length
)
874 printf(_("Compression ratio %4.2f%% (higher is better)\n"),
875 100.0 * ((double)statistics
.total_filename_length
876 - (double) database_file_size
)
877 / (double) statistics
.total_filename_length
);
881 printf(_("Compression ratio is undefined\n"));
889 * Return nonzero if the data we read in indicates that we are
890 * looking at a LOCATE02 locate database.
893 looking_at_gnu_locatedb (const char *data
, size_t len
)
895 if (len
< sizeof (LOCATEDB_MAGIC
))
897 else if (0 == memcmp (data
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
898 return 1; /* We saw the magic byte sequence */
904 * Return nonzero if the data we read in indicates that we are
905 * looking at an slocate database.
908 looking_at_slocate_locatedb (const char *filename
,
921 /* Check that the magic number is a one-byte string */
924 if (isdigit((unsigned char)data
[0]))
926 /* looks promising. */
927 *seclevel
= (data
[0] - '0');
931 /* Hmm, well it's probably an slocate database
932 * of some awsomely huge security level, like 2.
933 * We don't know how to handle those.
936 _("locate database %s looks like an slocate "
937 "database but it seems to have security level %c, "
938 "which GNU findutils does not currently support"),
939 quotearg_n_style(0, locale_quoting_style
, filename
),
956 /* Definitely not slocate. */
962 /* Print or count the entries in DBFILE that match shell globbing patterns in
963 ARGV. Return the number of entries matched. */
966 search_one_database (int argc
,
975 struct locate_limits
*plimit
,
981 char *pathpart
; /* A pattern to consider. */
982 int argn
; /* Index to current pattern in argv. */
983 int nread
; /* number of bytes read from an entry. */
984 struct process_data procdata
; /* Storage for data shared with visitors. */
985 int slocate_seclevel
;
986 struct visitor
* pvis
; /* temp for determining past_pat_inspector. */
987 const char *format_name
;
988 enum ExistenceCheckType do_check_existence
;
991 /* We may turn on existence checking for a given database.
992 * We ensure that we can return to the previous behaviour
993 * by using two variables, do_check_existence (which we act on)
994 * and check_existence (whcih indicates the default before we
995 * adjust it on the bassis of what kind of database we;re using
997 do_check_existence
= check_existence
;
1001 regex_options
|= RE_ICASE
;
1003 procdata
.len
= procdata
.count
= 0;
1004 procdata
.slocatedb_format
= 0;
1005 procdata
.itemcount
= 0;
1007 procdata
.dbfile
= dbfile
;
1010 /* Set up the inspection regime */
1012 lastinspector
= NULL
;
1013 past_pat_inspector
= NULL
;
1014 results_were_filtered
= false;
1016 procdata
.pathsize
= 1026; /* Increased as necessary by locate_read_str. */
1018 procdata
.pathsize
= 128; /* Increased as necessary by locate_read_str. */
1020 procdata
.original_filename
= xmalloc (procdata
.pathsize
);
1023 nread
= fread (procdata
.original_filename
, 1, SLOCATE_DB_MAGIC_LEN
,
1025 slocate_seclevel
= 0;
1026 if (looking_at_slocate_locatedb(procdata
.dbfile
,
1027 procdata
.original_filename
,
1032 _("%s is an slocate database. "
1033 "Support for these is new, expect problems for now."),
1034 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1036 /* slocate also uses frcode, but with a different header.
1037 * We handle the header here and then work with the data
1038 * in the normal way.
1040 if (slocate_seclevel
> 1)
1042 /* We don't know what those security levels mean,
1043 * so do nothing further
1046 _("%s is an slocate database of unsupported security level %d; skipping it."),
1047 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
),
1051 else if (slocate_seclevel
> 0)
1053 /* Don't show the filenames to the user if they don't exist.
1054 * Showing stats is safe since filenames are only counted
1055 * after the existence check
1057 if (ACCEPT_NON_EXISTING
== check_existence
)
1059 /* Do not allow the user to see a list of filenames that they
1063 _("You specified the -E option, but that option "
1064 "cannot be used with slocate-format databases "
1065 "with a non-zero security level. No results will be "
1066 "generated for this database.\n"));
1069 if (ACCEPT_EXISTING
!= do_check_existence
)
1071 if (enable_print
|| stats
)
1074 _("%s is an slocate database. "
1075 "Turning on the '-e' option."),
1076 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1078 do_check_existence
= ACCEPT_EXISTING
;
1081 add_visitor(visit_locate02_format
, NULL
);
1082 format_name
= "slocate";
1083 procdata
.slocatedb_format
= 1;
1089 procdata
.slocatedb_format
= 0;
1090 nread2
= fread (procdata
.original_filename
+nread
, 1, sizeof (LOCATEDB_MAGIC
)-nread
,
1092 if (looking_at_gnu_locatedb(procdata
.original_filename
, nread
+nread2
))
1094 add_visitor(visit_locate02_format
, NULL
);
1095 format_name
= "GNU LOCATE02";
1097 else /* Use the old format */
1102 /* Read the list of the most common bigrams in the database. */
1105 int more_read
= fread (procdata
.original_filename
+ nread
, 1,
1106 256 - nread
, procdata
.fp
);
1107 if ( (more_read
+ nread
) != 256 )
1110 _("Old-format locate database %s is "
1111 "too short to be valid"),
1112 quotearg_n_style(0, locale_quoting_style
, dbfile
));
1117 for (i
= 0; i
< 128; i
++)
1119 procdata
.bigram1
[i
] = procdata
.original_filename
[i
<< 1];
1120 procdata
.bigram2
[i
] = procdata
.original_filename
[(i
<< 1) + 1];
1122 format_name
= "old";
1123 add_visitor(visit_old_format
, NULL
);
1128 add_visitor(visit_basename
, NULL
);
1130 /* Add an inspector for each pattern we're looking for. */
1131 for ( argn
= 0; argn
< argc
; argn
++ )
1133 results_were_filtered
= true;
1134 pathpart
= argv
[argn
];
1137 struct regular_expression
*p
= xmalloc(sizeof(*p
));
1138 const char *error_message
= NULL
;
1140 memset (&p
->regex
, 0, sizeof (p
->regex
));
1142 re_set_syntax(regex_options
);
1143 p
->regex
.allocated
= 100;
1144 p
->regex
.buffer
= (unsigned char *) xmalloc (p
->regex
.allocated
);
1145 p
->regex
.fastmap
= NULL
;
1146 p
->regex
.syntax
= regex_options
;
1147 p
->regex
.translate
= NULL
;
1149 error_message
= re_compile_pattern (pathpart
, strlen (pathpart
),
1153 error (1, 0, "%s", error_message
);
1157 add_visitor(visit_regex
, p
);
1160 else if (contains_metacharacter(pathpart
))
1163 add_visitor(visit_globmatch_casefold
, pathpart
);
1165 add_visitor(visit_globmatch_nofold
, pathpart
);
1169 /* No glob characters used. Hence we match on
1170 * _any part_ of the filename, not just the
1171 * basename. This seems odd to me, but it is the
1172 * traditional behaviour.
1173 * James Youngman <jay@gnu.org>
1176 if (1 == MB_CUR_MAX
)
1178 /* As an optimisation, use a strstr() matcher if we are
1179 * in a unibyte locale. This can give a x2 speedup in
1180 * the C locale. Some light testing reveals that
1181 * glibc's strstr() is somewhere around 40% faster than
1182 * gnulib's, so we just use strstr().
1184 matcher
= ignore_case
?
1185 visit_substring_match_casefold_narrow
:
1186 visit_substring_match_nocasefold_narrow
;
1190 matcher
= ignore_case
?
1191 visit_substring_match_casefold_wide
:
1192 visit_substring_match_nocasefold_wide
;
1194 add_visitor(matcher
, pathpart
);
1198 pvis
= lastinspector
;
1200 /* We add visit_existing_*() as late as possible to reduce the
1201 * number of stat() calls.
1203 switch (do_check_existence
)
1205 case ACCEPT_EXISTING
:
1206 results_were_filtered
= true;
1207 if (follow_symlinks
) /* -L, default */
1208 add_visitor(visit_existing_follow
, NULL
);
1210 add_visitor(visit_existing_nofollow
, NULL
);
1213 case ACCEPT_NON_EXISTING
:
1214 results_were_filtered
= true;
1215 if (follow_symlinks
) /* -L, default */
1216 add_visitor(visit_non_existing_follow
, NULL
);
1218 add_visitor(visit_non_existing_nofollow
, NULL
);
1221 case ACCEPT_EITHER
: /* Default, neither -E nor -e */
1222 /* do nothing; no extra processing. */
1226 /* Security issue: The stats visitor must be added immediately
1227 * before the print visitor, because otherwise the -S option would
1228 * leak information about files that the caller cannot see.
1231 add_visitor(visit_stats
, &statistics
);
1235 if (print_quoted_filename
)
1236 add_visitor(visit_justprint_quoted
, NULL
);
1238 add_visitor(visit_justprint_unquoted
, NULL
);
1243 add_visitor(visit_limit
, plimit
);
1245 add_visitor(visit_count
, plimit
);
1250 past_pat_inspector
= pvis
->next
;
1252 mainprocessor
= process_and
;
1254 mainprocessor
= process_or
;
1257 mainprocessor
= process_simple
;
1261 printf(_("Database %s is in the %s format.\n"),
1267 procdata
.c
= getc (procdata
.fp
);
1268 /* If we are searching for filename patterns, the inspector list
1269 * will contain an entry for each pattern for which we are searching.
1271 while ( (procdata
.c
!= EOF
) &&
1272 (VISIT_ABORT
!= (mainprocessor
)(&procdata
)) )
1274 /* Do nothing; all the work is done in the visitor functions. */
1280 print_stats(argc
, filesize
);
1283 if (ferror (procdata
.fp
))
1285 error (0, errno
, "%s",
1286 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1289 return plimit
->items_accepted
;
1295 extern char *version_string
;
1297 /* The name this program was run with. */
1301 usage (FILE *stream
)
1303 fprintf (stream
, _("\
1304 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1305 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1306 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1307 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1308 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1309 [--max-database-age D] [--version] [--help]\n\
1312 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
1316 REGEXTYPE_OPTION
= CHAR_MAX
+ 1,
1321 static struct option
const longopts
[] =
1323 {"database", required_argument
, NULL
, 'd'},
1324 {"existing", no_argument
, NULL
, 'e'},
1325 {"non-existing", no_argument
, NULL
, 'E'},
1326 {"ignore-case", no_argument
, NULL
, 'i'},
1327 {"all", no_argument
, NULL
, 'A'},
1328 {"help", no_argument
, NULL
, 'h'},
1329 {"version", no_argument
, NULL
, 'v'},
1330 {"null", no_argument
, NULL
, '0'},
1331 {"count", no_argument
, NULL
, 'c'},
1332 {"wholename", no_argument
, NULL
, 'w'},
1333 {"wholepath", no_argument
, NULL
, 'w'}, /* Synonym. */
1334 {"basename", no_argument
, NULL
, 'b'},
1335 {"print", no_argument
, NULL
, 'p'},
1336 {"stdio", no_argument
, NULL
, 's'},
1337 {"mmap", no_argument
, NULL
, 'm'},
1338 {"limit", required_argument
, NULL
, 'l'},
1339 {"regex", no_argument
, NULL
, 'r'},
1340 {"regextype", required_argument
, NULL
, REGEXTYPE_OPTION
},
1341 {"statistics", no_argument
, NULL
, 'S'},
1342 {"follow", no_argument
, NULL
, 'L'},
1343 {"nofollow", no_argument
, NULL
, 'P'},
1344 {"max-database-age", required_argument
, NULL
, MAX_DB_AGE
},
1345 {NULL
, no_argument
, NULL
, 0}
1352 const char * what
= "failed";
1353 const uid_t orig_euid
= geteuid();
1354 const uid_t uid
= getuid();
1355 const gid_t gid
= getgid();
1357 /* Use of setgroups() is restricted to root only. */
1360 /* We're either root or running setuid-root. */
1363 if (0 != setgroups(1u, groups
))
1365 what
= _("failed to drop group privileges");
1370 /* Drop any setuid privileges */
1371 if (uid
!= orig_euid
)
1375 /* We're really root anyway, but are setuid to something else. Leave it. */
1380 if (0 != setuid(getuid()))
1382 what
= _("failed to drop setuid privileges");
1386 /* Defend against the case where the attacker runs us with the
1387 * capability to call setuid() turned off, which on some systems
1388 * will cause the above attempt to drop privileges fail (leaving us
1393 /* Check that we can no longer switch bask to root */
1396 what
= _("Failed to fully drop privileges");
1397 /* The errno value here is not interesting (since
1398 * the system call we are complaining about
1399 * succeeded when we wanted it to fail). Arrange
1400 * for the call to error() not to print the errno
1401 * value by setting errno=0.
1410 /* Drop any setgid privileges */
1412 if (0 != setgid(gid
))
1414 what
= _("failed to drop setgid privileges");
1422 error(1, errno
, "%s",
1423 quotearg_n_style(0, locale_quoting_style
, what
));
1431 /* deliberate infinite loop */
1436 opendb(const char *name
)
1438 int fd
= open(name
, O_RDONLY
1439 #if defined(O_LARGEFILE)
1445 /* Make sure it won't survive an exec */
1446 if (0 != fcntl(fd
, F_SETFD
, FD_CLOEXEC
))
1456 dolocate (int argc
, char **argv
, int secure_db_fd
)
1459 unsigned long int found
= 0uL;
1461 int ignore_case
= 0;
1464 int basename_only
= 0;
1467 int regex_options
= RE_SYNTAX_EMACS
;
1472 int they_chose_db
= 0;
1473 bool did_stdin
= false; /* Set to prevent rereading stdin. */
1475 program_name
= argv
[0];
1477 #ifdef HAVE_SETLOCALE
1478 setlocale (LC_ALL
, "");
1480 bindtextdomain (PACKAGE
, LOCALEDIR
);
1481 textdomain (PACKAGE
);
1482 atexit (close_stdout
);
1485 limits
.items_accepted
= 0;
1487 quote_opts
= clone_quoting_options (NULL
);
1488 print_quoted_filename
= true;
1490 /* We cannot simultaneously trust $LOCATE_PATH and use the
1491 * setuid-access-controlled database,, since that could cause a leak
1494 dbpath
= getenv ("LOCATE_PATH");
1500 check_existence
= ACCEPT_EITHER
;
1502 while ((optc
= getopt_long (argc
, argv
, "Abcd:eEil:prsm0SwHPL", longopts
, (int *) 0)) != -1)
1507 print_quoted_filename
= false; /* print filename 'raw'. */
1528 check_existence
= ACCEPT_EXISTING
;
1532 check_existence
= ACCEPT_NON_EXISTING
;
1544 /* XXX: nothing in the test suite for this option. */
1545 set_max_db_age(optarg
);
1553 printf (_("GNU locate version %s\n"), version_string
);
1554 printf (_("Built using GNU gnulib version %s\n"), gnulib_version
);
1565 case REGEXTYPE_OPTION
:
1566 regex_options
= get_regex_type(optarg
);
1574 follow_symlinks
= 1;
1577 /* In find, -P and -H differ in the way they handle paths
1578 * given on the command line. This is not relevant for
1579 * locate, but the -H option is supported because it is
1580 * probably more intuitive to do so.
1584 follow_symlinks
= 0;
1590 strtol_error err
= xstrtoumax(optarg
, &end
, 10, &limits
.limit
, NULL
);
1591 if (LONGINT_OK
!= err
)
1593 STRTOL_FATAL_ERROR(optarg
, _("argument to --limit"), err
);
1599 case 's': /* use stdio */
1600 case 'm': /* use mmap */
1601 /* These options are implemented simply for
1602 * compatibility with FreeBSD
1612 /* If the user gave the -d option or set LOCATE_PATH,
1613 * relinquish access to the secure database.
1617 if (secure_db_fd
>= 0)
1619 close(secure_db_fd
);
1624 if (!just_count
&& !stats
)
1634 if (!just_count
&& optind
== argc
)
1642 if (1 == isatty(STDOUT_FILENO
))
1643 stdout_is_a_tty
= true;
1645 stdout_is_a_tty
= false;
1648 next_element (dbpath
, 0); /* Initialize. */
1650 /* Bail out early if limit already reached. */
1651 while (!use_limit
|| limits
.limit
> limits
.items_accepted
)
1657 statistics
.compressed_bytes
=
1658 statistics
.total_filename_count
=
1659 statistics
.total_filename_length
=
1660 statistics
.whitespace_count
=
1661 statistics
.newline_count
=
1662 statistics
.highbit_filename_count
= 0u;
1666 /* Take the next element from the list of databases */
1667 e
= next_element ((char *) NULL
, 0);
1671 if (0 == strcmp (e
, "-"))
1676 _("warning: the locate database can only be read from stdin once."));
1688 if (0 == strlen(e
) || 0 == strcmp(e
, "."))
1693 /* open the database */
1697 error (0, errno
, "%s",
1698 quotearg_n_style(0, locale_quoting_style
, e
));
1705 if (-1 == secure_db_fd
)
1707 /* Already searched the database, it's time to exit the loop */
1712 e
= selected_secure_db
;
1718 /* Check the database to see if it is old. */
1721 error (0, errno
, "%s",
1722 quotearg_n_style(0, locale_quoting_style
, e
));
1723 /* continue anyway */
1724 filesize
= (off_t
)0;
1730 filesize
= st
.st_size
;
1732 if ((time_t)-1 == time(&now
))
1734 /* If we can't tell the time, we don't know how old the
1735 * database is. But since the message is just advisory,
1736 * we continue anyway.
1738 error (0, errno
, "time system call");
1742 double age
= difftime(now
, st
.st_mtime
);
1743 double warn_seconds
= SECONDS_PER_UNIT
* warn_number_units
;
1744 if (age
> warn_seconds
)
1747 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1749 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1750 quotearg_n_style(0, locale_quoting_style
, e
),
1751 warn_number_units
, _(warn_name_units
),
1752 (age
/(double)SECONDS_PER_UNIT
), _(warn_name_units
));
1757 fp
= fdopen(fd
, "r");
1760 error (0, errno
, "%s",
1761 quotearg_n_style(0, locale_quoting_style
, e
));
1765 /* Search this database for all patterns simultaneously */
1766 found
= search_one_database (argc
- optind
, &argv
[optind
],
1768 ignore_case
, print
, basename_only
,
1769 use_limit
, &limits
, stats
,
1770 op_and
, regex
, regex_options
);
1772 /* Close the databsase (even if it is stdin) */
1773 if (fclose (fp
) == EOF
)
1775 error (0, errno
, "%s",
1776 quotearg_n_style(0, locale_quoting_style
, e
));
1783 printf("%ld\n", found
);
1786 if (found
|| (use_limit
&& (limits
.limit
==0)) || stats
)
1792 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1794 open_secure_db(void)
1798 const char * secure_db_list
[] =
1801 "/var/lib/slocate/slocate.db",
1804 for (i
=0; secure_db_list
[i
]; ++i
)
1806 fd
= opendb(secure_db_list
[i
]);
1809 selected_secure_db
= secure_db_list
[i
];
1817 main (int argc
, char **argv
)
1819 int dbfd
= open_secure_db();
1822 return dolocate(argc
, argv
, dbfd
);