1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
67 #include <sys/types.h>
68 #include <grp.h> /* for setgroups() */
75 /* The presence of unistd.h is assumed by gnulib these days, so we
76 * might as well assume it too.
78 /* We need <unistd.h> for isatty(). */
102 # include <libintl.h>
103 # define _(Text) gettext (Text)
105 # define _(Text) Text
106 #define textdomain(Domain)
107 #define bindtextdomain(Package, Directory)
110 # define N_(String) gettext_noop (String)
112 /* We used to use (String) instead of just String, but apparently ISO C
113 * doesn't allow this (at least, that's what HP said when someone reported
114 * this as a compiler bug). This is HP case number 1205608192. See
115 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
116 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
117 * like: static const char buf[] = ("string");
119 # define N_(String) String
122 #include "locatedb.h"
128 #include "closeout.h"
129 #include "nextelem.h"
132 #include "quotearg.h"
133 #include "printquoted.h"
134 #include "regextype.h"
135 #include "gnulib-version.h"
137 /* Note that this evaluates Ch many times. */
139 # define TOUPPER(Ch) toupper (Ch)
140 # define TOLOWER(Ch) tolower (Ch)
142 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
143 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
146 /* typedef enum {false, true} boolean; */
148 /* Warn if a database is older than this. 8 days allows for a weekly
149 update that takes up to a day to perform. */
150 static unsigned int warn_number_units
= 8;
152 /* Printable name of units used in WARN_SECONDS */
153 static const char warn_name_units
[] = N_("days");
154 #define SECONDS_PER_UNIT (60 * 60 * 24)
158 VISIT_CONTINUE
= 1, /* please call the next visitor */
159 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
160 VISIT_REJECTED
= 4, /* rejected, process next file. */
161 VISIT_ABORT
= 8 /* rejected, process no more files. */
164 enum ExistenceCheckType
166 ACCEPT_EITHER
, /* Corresponds to lack of -E/-e option */
167 ACCEPT_EXISTING
, /* Corresponds to option -e */
168 ACCEPT_NON_EXISTING
/* Corresponds to option -E */
171 /* Check for existence of files before printing them out? */
172 enum ExistenceCheckType check_existence
= ACCEPT_EITHER
;
174 static int follow_symlinks
= 1;
176 /* What to separate the results with. */
177 static int separator
= '\n';
179 static struct quoting_options
* quote_opts
= NULL
;
180 static bool stdout_is_a_tty
;
181 static bool print_quoted_filename
;
182 static bool results_were_filtered
;
184 static const char *selected_secure_db
= NULL
;
187 /* Change the number of days old the database can be
188 * before we complain about it.
191 set_max_db_age(const char *s
)
194 unsigned long int val
;
195 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
200 _("The argument argument for option --max-database-age must not be empty"));
204 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
205 * we would not be able to tell if that is the correct answer, or whether it
206 * signifies an error.
209 val
= strtoul(s
, &end
, 10);
211 /* Diagnose number too large, non-numbes and trailing junk. */
212 if ((ULONG_MAX
== val
&& ERANGE
== errno
) ||
213 (0 == val
&& EINVAL
== errno
))
216 _("Invalid argument %s for option --max-database-age"),
217 quotearg_n_style(0, locale_quoting_style
, s
));
221 /* errno wasn't set, don't print its message */
223 _("Invalid argument %s for option --max-database-age"),
224 quotearg_n_style(0, locale_quoting_style
, s
));
228 warn_number_units
= val
;
234 /* Read in a 16-bit int, high byte first (network byte order). */
242 x
= (signed char) fgetc (fp
) << 8;
243 x
|= (fgetc (fp
) & 0xff);
247 const char * const metacharacters
= "*?[]\\";
249 /* Return nonzero if S contains any shell glob characters.
252 contains_metacharacter(const char *s
)
254 if (NULL
== strpbrk(s
, metacharacters
))
262 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
263 * until we reach DELIMITER or end-of-file. We reallocate the buffer
264 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
265 * is made regarding the content of the data (i.e. the implementation is
266 * 8-bit clean, the only delimiter is DELIMITER).
268 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
269 * has been removed from gnulib.
271 * We call the function locate_read_str() to avoid a name clash with the curses
275 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
282 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
287 needed
= offs
+ nread
+ 1u;
290 char *pnew
= realloc(*buf
, needed
);
293 return -1; /* FAIL */
301 memcpy((*buf
)+offs
, p
, nread
);
311 uintmax_t items_accepted
;
313 static struct locate_limits limits
;
318 uintmax_t compressed_bytes
;
319 uintmax_t total_filename_count
;
320 uintmax_t total_filename_length
;
321 uintmax_t whitespace_count
;
322 uintmax_t newline_count
;
323 uintmax_t highbit_filename_count
;
325 static struct locate_stats statistics
;
328 struct regular_expression
330 struct re_pattern_buffer regex
; /* for --regex */
336 int c
; /* An input byte. */
337 char itemcount
; /* Indicates we're at the beginning of an slocate db. */
338 int count
; /* The length of the prefix shared with the previous database entry. */
340 char *original_filename
; /* The current input database entry. */
341 size_t pathsize
; /* Amount allocated for it. */
342 char *munged_filename
; /* path or base_name(path) */
343 FILE *fp
; /* The pathname database. */
344 const char *dbfile
; /* Its name, or "<stdin>" */
345 int slocatedb_format
; /* Allows us to cope with slocate's format variant */
346 GetwordEndianState endian_state
;
347 /* for the old database format,
348 the first and second characters of the most common bigrams. */
354 typedef int (*visitfunc
)(struct process_data
*procdata
,
361 struct visitor
*next
;
365 static struct visitor
*inspectors
= NULL
;
366 static struct visitor
*lastinspector
= NULL
;
367 static struct visitor
*past_pat_inspector
= NULL
;
369 static inline int visit(const struct visitor
*p
,
371 struct process_data
*procdata
,
372 const struct visitor
* const stop
)
374 register int result
= accept_flags
;
375 while ( (accept_flags
& result
) && (stop
!= p
) )
377 result
= (p
->inspector
)(procdata
, p
->context
);
383 /* 0 or 1 pattern(s) */
385 process_simple(struct process_data
*procdata
)
387 return visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, NULL
);
390 /* Accept if any pattern matches. */
392 process_or (struct process_data
*procdata
)
396 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_REJECTED
), procdata
, past_pat_inspector
);
397 if (result
== VISIT_CONTINUE
)
398 result
= VISIT_REJECTED
;
399 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
402 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
403 if (VISIT_CONTINUE
== result
)
404 return VISIT_ACCEPTED
;
409 /* Accept if all pattern match. */
411 process_and (struct process_data
*procdata
)
415 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, past_pat_inspector
);
416 if (result
== VISIT_CONTINUE
)
417 result
= VISIT_REJECTED
;
418 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
421 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
422 if (VISIT_CONTINUE
== result
)
423 return VISIT_ACCEPTED
;
428 typedef int (*processfunc
)(struct process_data
*procdata
);
430 static processfunc mainprocessor
= NULL
;
433 add_visitor(visitfunc fn
, void *context
)
435 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
437 p
->context
= context
;
440 if (NULL
== lastinspector
)
442 lastinspector
= inspectors
= p
;
446 lastinspector
->next
= p
;
452 visit_justprint_quoted(struct process_data
*procdata
, void *context
)
455 print_quoted (stdout
, quote_opts
, stdout_is_a_tty
,
457 procdata
->original_filename
);
459 return VISIT_CONTINUE
;
463 visit_justprint_unquoted(struct process_data
*procdata
, void *context
)
466 fputs(procdata
->original_filename
, stdout
);
468 return VISIT_CONTINUE
;
472 toolong (struct process_data
*procdata
)
475 _("locate database %s contains a "
476 "filename longer than locate can handle"),
481 extend (struct process_data
*procdata
, size_t siz1
, size_t siz2
)
483 /* Figure out if the addition operation is safe before performing it. */
484 if (SIZE_MAX
- siz1
< siz2
)
488 else if (procdata
->pathsize
< (siz1
+siz2
))
490 procdata
->pathsize
= siz1
+siz2
;
491 procdata
->original_filename
= x2nrealloc (procdata
->original_filename
,
498 visit_old_format(struct process_data
*procdata
, void *context
)
503 if (EOF
== procdata
->c
)
506 /* Get the offset in the path where this path info starts. */
507 if (procdata
->c
== LOCATEDB_OLD_ESCAPE
)
512 procdata
->count
-= LOCATEDB_OLD_OFFSET
;
513 minval
= (0 - procdata
->count
);
514 if (procdata
->count
>= 0)
515 maxval
= (procdata
->len
- procdata
->count
);
517 maxval
= (procdata
->len
- 0);
518 word
= getword(procdata
->fp
, procdata
->dbfile
,
519 minval
, maxval
, &procdata
->endian_state
);
520 procdata
->count
+= word
;
521 assert(procdata
->count
>= 0);
525 procdata
->count
+= (procdata
->c
- LOCATEDB_OLD_OFFSET
);
526 assert(procdata
->count
>= 0);
529 /* Overlay the old path with the remainder of the new. Read
530 * more data until we get to the next filename.
532 for (i
=procdata
->count
;
533 (procdata
->c
= getc (procdata
->fp
)) > LOCATEDB_OLD_ESCAPE
;)
535 if (EOF
== procdata
->c
)
538 if (procdata
->c
< 0200)
540 /* An ordinary character. */
541 extend (procdata
, i
, 1u);
542 procdata
->original_filename
[i
++] = procdata
->c
;
546 /* Bigram markers have the high bit set. */
547 extend (procdata
, i
, 2u);
549 procdata
->original_filename
[i
++] = procdata
->bigram1
[procdata
->c
];
550 procdata
->original_filename
[i
++] = procdata
->bigram2
[procdata
->c
];
554 /* Consider the case where we executed the loop body zero times; we
555 * still need space for the terminating null byte.
557 extend (procdata
, i
, 1u);
558 procdata
->original_filename
[i
] = 0;
560 procdata
->munged_filename
= procdata
->original_filename
;
562 return VISIT_CONTINUE
;
566 visit_locate02_format(struct process_data
*procdata
, void *context
)
572 if (procdata
->slocatedb_format
)
574 if (procdata
->itemcount
== 0)
576 ungetc(procdata
->c
, procdata
->fp
);
580 else if (procdata
->itemcount
== 1)
582 procdata
->count
= procdata
->len
-1;
586 if (procdata
->c
== LOCATEDB_ESCAPE
)
587 procdata
->count
+= (short)get_short (procdata
->fp
);
588 else if (procdata
->c
> 127)
589 procdata
->count
+= procdata
->c
- 256;
591 procdata
->count
+= procdata
->c
;
596 if (procdata
->c
== LOCATEDB_ESCAPE
)
597 procdata
->count
+= (short)get_short (procdata
->fp
);
598 else if (procdata
->c
> 127)
599 procdata
->count
+= procdata
->c
- 256;
601 procdata
->count
+= procdata
->c
;
604 if (procdata
->count
> procdata
->len
|| procdata
->count
< 0)
606 /* This should not happen generally , but since we're
607 * reading in data which is outside our control, we
610 error(1, 0, _("locate database %s is corrupt or invalid"),
611 quotearg_n_style(0, locale_quoting_style
, procdata
->dbfile
));
614 /* Overlay the old path with the remainder of the new. */
615 nread
= locate_read_str (&procdata
->original_filename
,
617 procdata
->fp
, 0, procdata
->count
);
620 procdata
->c
= getc (procdata
->fp
);
621 procdata
->len
= procdata
->count
+ nread
;
622 s
= procdata
->original_filename
+ procdata
->len
- 1; /* Move to the last char in path. */
623 assert (s
[0] != '\0');
624 assert (s
[1] == '\0'); /* Our terminator. */
625 assert (s
[2] == '\0'); /* Added by locate_read_str. */
627 procdata
->munged_filename
= procdata
->original_filename
;
629 if (procdata
->slocatedb_format
)
631 /* Don't increment indefinitely, it might overflow. */
632 if (procdata
->itemcount
< 6)
634 ++(procdata
->itemcount
);
639 return VISIT_CONTINUE
;
643 visit_basename(struct process_data
*procdata
, void *context
)
646 procdata
->munged_filename
= base_name(procdata
->original_filename
);
648 return VISIT_CONTINUE
;
652 /* visit_existing_follow implements -L -e */
654 visit_existing_follow(struct process_data
*procdata
, void *context
)
659 /* munged_filename has been converted in some way (to lower case,
660 * or is just the base name of the file), and original_filename has not.
661 * Hence only original_filename is still actually the name of the file
662 * whose existence we would need to check.
664 if (stat(procdata
->original_filename
, &st
) != 0)
666 return VISIT_REJECTED
;
670 return VISIT_CONTINUE
;
674 /* visit_non_existing_follow implements -L -E */
676 visit_non_existing_follow(struct process_data
*procdata
, void *context
)
681 /* munged_filename has been converted in some way (to lower case,
682 * or is just the base name of the file), and original_filename has not.
683 * Hence only original_filename is still actually the name of the file
684 * whose existence we would need to check.
686 if (stat(procdata
->original_filename
, &st
) == 0)
688 return VISIT_REJECTED
;
692 return VISIT_CONTINUE
;
696 /* visit_existing_nofollow implements -P -e */
698 visit_existing_nofollow(struct process_data
*procdata
, void *context
)
703 /* munged_filename has been converted in some way (to lower case,
704 * or is just the base name of the file), and original_filename has not.
705 * Hence only original_filename is still actually the name of the file
706 * whose existence we would need to check.
708 if (lstat(procdata
->original_filename
, &st
) != 0)
710 return VISIT_REJECTED
;
714 return VISIT_CONTINUE
;
718 /* visit_non_existing_nofollow implements -P -E */
720 visit_non_existing_nofollow(struct process_data
*procdata
, void *context
)
725 /* munged_filename has been converted in some way (to lower case,
726 * or is just the base name of the file), and original_filename has not.
727 * Hence only original_filename is still actually the name of the file
728 * whose existence we would need to check.
730 if (lstat(procdata
->original_filename
, &st
) == 0)
732 return VISIT_REJECTED
;
736 return VISIT_CONTINUE
;
741 visit_substring_match_nocasefold_wide(struct process_data
*procdata
, void *context
)
743 const char *pattern
= context
;
745 if (NULL
!= mbsstr(procdata
->munged_filename
, pattern
))
746 return VISIT_ACCEPTED
;
748 return VISIT_REJECTED
;
752 visit_substring_match_nocasefold_narrow(struct process_data
*procdata
, void *context
)
754 const char *pattern
= context
;
755 assert(MB_CUR_MAX
== 1);
756 if (NULL
!= strstr(procdata
->munged_filename
, pattern
))
757 return VISIT_ACCEPTED
;
759 return VISIT_REJECTED
;
763 visit_substring_match_casefold_wide(struct process_data
*procdata
, void *context
)
765 const char *pattern
= context
;
767 if (NULL
!= mbscasestr(procdata
->munged_filename
, pattern
))
768 return VISIT_ACCEPTED
;
770 return VISIT_REJECTED
;
775 visit_substring_match_casefold_narrow(struct process_data
*procdata
, void *context
)
777 const char *pattern
= context
;
779 assert(MB_CUR_MAX
== 1);
780 if (NULL
!= strcasestr(procdata
->munged_filename
, pattern
))
781 return VISIT_ACCEPTED
;
783 return VISIT_REJECTED
;
788 visit_globmatch_nofold(struct process_data
*procdata
, void *context
)
790 const char *glob
= context
;
791 if (fnmatch(glob
, procdata
->munged_filename
, 0) != 0)
792 return VISIT_REJECTED
;
794 return VISIT_ACCEPTED
;
799 visit_globmatch_casefold(struct process_data
*procdata
, void *context
)
801 const char *glob
= context
;
802 if (fnmatch(glob
, procdata
->munged_filename
, FNM_CASEFOLD
) != 0)
803 return VISIT_REJECTED
;
805 return VISIT_ACCEPTED
;
810 visit_regex(struct process_data
*procdata
, void *context
)
812 struct regular_expression
*p
= context
;
813 const size_t len
= strlen(procdata
->munged_filename
);
815 int rv
= re_search (&p
->regex
, procdata
->munged_filename
,
817 (struct re_registers
*) NULL
);
820 return VISIT_REJECTED
; /* no match (-1), or internal error (-2) */
824 return VISIT_ACCEPTED
; /* match */
830 visit_stats(struct process_data
*procdata
, void *context
)
832 struct locate_stats
*p
= context
;
833 size_t len
= strlen(procdata
->original_filename
);
835 int highbit
, whitespace
, newline
;
837 ++(p
->total_filename_count
);
838 p
->total_filename_length
+= len
;
840 highbit
= whitespace
= newline
= 0;
841 for (s
=procdata
->original_filename
; *s
; ++s
)
843 if ( (int)(*s
) & 128 )
847 newline
= whitespace
= 1;
849 else if (isspace((unsigned char)*s
))
856 ++(p
->highbit_filename_count
);
858 ++(p
->whitespace_count
);
860 ++(p
->newline_count
);
862 return VISIT_CONTINUE
;
867 visit_limit(struct process_data
*procdata
, void *context
)
869 struct locate_limits
*p
= context
;
873 if (++p
->items_accepted
>= p
->limit
)
876 return VISIT_CONTINUE
;
880 visit_count(struct process_data
*procdata
, void *context
)
882 struct locate_limits
*p
= context
;
887 return VISIT_CONTINUE
;
890 /* Emit the statistics.
893 print_stats(int argc
, size_t database_file_size
)
895 char hbuf
[LONGEST_HUMAN_READABLE
+ 1];
897 printf(_("Locate database size: %s bytes\n"),
898 human_readable ((uintmax_t) database_file_size
,
899 hbuf
, human_ceiling
, 1, 1));
901 printf( (results_were_filtered
?
902 _("Matching Filenames: %s ") :
903 _("All Filenames: %s ")),
904 human_readable (statistics
.total_filename_count
,
905 hbuf
, human_ceiling
, 1, 1));
906 printf(_("with a cumulative length of %s bytes"),
907 human_readable (statistics
.total_filename_length
,
908 hbuf
, human_ceiling
, 1, 1));
910 printf(_("\n\tof which %s contain whitespace, "),
911 human_readable (statistics
.whitespace_count
,
912 hbuf
, human_ceiling
, 1, 1));
913 printf(_("\n\t%s contain newline characters, "),
914 human_readable (statistics
.newline_count
,
915 hbuf
, human_ceiling
, 1, 1));
916 printf(_("\n\tand %s contain characters with the high bit set.\n"),
917 human_readable (statistics
.highbit_filename_count
,
918 hbuf
, human_ceiling
, 1, 1));
922 if (results_were_filtered
)
924 printf(_("Some filenames may have been filtered out, "
925 "so we cannot compute the compression ratio.\n"));
929 if (statistics
.total_filename_length
)
931 /* A negative compression ratio just means that the
932 * compressed database is larger than the list of
933 * filenames. This can happen for example for
934 * old-format databases containing a small list of short
935 * filenames, because the bigram list is 256 bytes.
937 printf(_("Compression ratio %4.2f%% (higher is better)\n"),
938 100.0 * ((double)statistics
.total_filename_length
939 - (double) database_file_size
)
940 / (double) statistics
.total_filename_length
);
944 printf(_("Compression ratio is undefined\n"));
952 * Return nonzero if the data we read in indicates that we are
953 * looking at a LOCATE02 locate database.
956 looking_at_gnu_locatedb (const char *data
, size_t len
)
958 if (len
< sizeof (LOCATEDB_MAGIC
))
960 else if (0 == memcmp (data
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
961 return 1; /* We saw the magic byte sequence */
967 * Return nonzero if the data we read in indicates that we are
968 * looking at an slocate database.
971 looking_at_slocate_locatedb (const char *filename
,
984 /* Check that the magic number is a one-byte string */
987 if (isdigit((unsigned char)data
[0]))
989 /* looks promising. */
990 *seclevel
= (data
[0] - '0');
994 /* Hmm, well it's probably an slocate database
995 * of some awsomely huge security level, like 2.
996 * We don't know how to handle those.
999 _("locate database %s looks like an slocate "
1000 "database but it seems to have security level %c, "
1001 "which GNU findutils does not currently support"),
1002 quotearg_n_style(0, locale_quoting_style
, filename
),
1019 /* Definitely not slocate. */
1027 i_am_little_endian(void)
1031 unsigned char uch
[4];
1036 u
.uch
[1] = u
.uch
[2] = u
.uch
[3] = 0;
1043 /* Print or count the entries in DBFILE that match shell globbing patterns in
1044 ARGV. Return the number of entries matched. */
1046 static unsigned long
1047 search_one_database (int argc
,
1056 struct locate_limits
*plimit
,
1062 char *pathpart
; /* A pattern to consider. */
1063 int argn
; /* Index to current pattern in argv. */
1064 int nread
; /* number of bytes read from an entry. */
1065 struct process_data procdata
; /* Storage for data shared with visitors. */
1066 int slocate_seclevel
;
1068 struct visitor
* pvis
; /* temp for determining past_pat_inspector. */
1069 const char *format_name
;
1070 enum ExistenceCheckType do_check_existence
;
1073 /* We may turn on existence checking for a given database.
1074 * We ensure that we can return to the previous behaviour
1075 * by using two variables, do_check_existence (which we act on)
1076 * and check_existence (whcih indicates the default before we
1077 * adjust it on the bassis of what kind of database we;re using
1079 do_check_existence
= check_existence
;
1083 regex_options
|= RE_ICASE
;
1086 procdata
.endian_state
= GetwordEndianStateInitial
;
1087 procdata
.len
= procdata
.count
= 0;
1088 procdata
.slocatedb_format
= 0;
1089 procdata
.itemcount
= 0;
1091 procdata
.dbfile
= dbfile
;
1094 /* Set up the inspection regime */
1096 lastinspector
= NULL
;
1097 past_pat_inspector
= NULL
;
1098 results_were_filtered
= false;
1100 procdata
.pathsize
= 1026; /* Increased as necessary by locate_read_str. */
1102 procdata
.pathsize
= 128; /* Increased as necessary by locate_read_str. */
1104 procdata
.original_filename
= xmalloc (procdata
.pathsize
);
1107 nread
= fread (procdata
.original_filename
, 1, SLOCATE_DB_MAGIC_LEN
,
1109 slocate_seclevel
= 0;
1110 if (looking_at_slocate_locatedb(procdata
.dbfile
,
1111 procdata
.original_filename
,
1116 _("%s is an slocate database. "
1117 "Support for these is new, expect problems for now."),
1118 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1120 /* slocate also uses frcode, but with a different header.
1121 * We handle the header here and then work with the data
1122 * in the normal way.
1124 if (slocate_seclevel
> 1)
1126 /* We don't know what those security levels mean,
1127 * so do nothing further
1130 _("%s is an slocate database of unsupported security level %d; skipping it."),
1131 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
),
1135 else if (slocate_seclevel
> 0)
1137 /* Don't show the filenames to the user if they don't exist.
1138 * Showing stats is safe since filenames are only counted
1139 * after the existence check
1141 if (ACCEPT_NON_EXISTING
== check_existence
)
1143 /* Do not allow the user to see a list of filenames that they
1147 _("You specified the -E option, but that option "
1148 "cannot be used with slocate-format databases "
1149 "with a non-zero security level. No results will be "
1150 "generated for this database.\n"));
1153 if (ACCEPT_EXISTING
!= do_check_existence
)
1155 if (enable_print
|| stats
)
1158 _("%s is an slocate database. "
1159 "Turning on the '-e' option."),
1160 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1162 do_check_existence
= ACCEPT_EXISTING
;
1165 add_visitor(visit_locate02_format
, NULL
);
1166 format_name
= "slocate";
1167 procdata
.slocatedb_format
= 1;
1173 procdata
.slocatedb_format
= 0;
1174 extend (&procdata
, sizeof(LOCATEDB_MAGIC
), 0u);
1175 nread2
= fread (procdata
.original_filename
+nread
, 1, sizeof (LOCATEDB_MAGIC
)-nread
,
1177 if (looking_at_gnu_locatedb(procdata
.original_filename
, nread
+nread2
))
1179 add_visitor(visit_locate02_format
, NULL
);
1180 format_name
= "GNU LOCATE02";
1182 else /* Use the old format */
1187 /* Read the list of the most common bigrams in the database. */
1190 extend (&procdata
, 256u, 0u);
1191 int more_read
= fread (procdata
.original_filename
+ nread
, 1,
1192 256 - nread
, procdata
.fp
);
1193 if ( (more_read
+ nread
) != 256 )
1196 _("Old-format locate database %s is "
1197 "too short to be valid"),
1198 quotearg_n_style(0, locale_quoting_style
, dbfile
));
1203 for (i
= 0; i
< 128; i
++)
1205 procdata
.bigram1
[i
] = procdata
.original_filename
[i
<< 1];
1206 procdata
.bigram2
[i
] = procdata
.original_filename
[(i
<< 1) + 1];
1208 format_name
= "old";
1210 add_visitor(visit_old_format
, NULL
);
1215 add_visitor(visit_basename
, NULL
);
1217 /* Add an inspector for each pattern we're looking for. */
1218 for ( argn
= 0; argn
< argc
; argn
++ )
1220 results_were_filtered
= true;
1221 pathpart
= argv
[argn
];
1224 struct regular_expression
*p
= xmalloc(sizeof(*p
));
1225 const char *error_message
= NULL
;
1227 memset (&p
->regex
, 0, sizeof (p
->regex
));
1229 re_set_syntax(regex_options
);
1230 p
->regex
.allocated
= 100;
1231 p
->regex
.buffer
= (unsigned char *) xmalloc (p
->regex
.allocated
);
1232 p
->regex
.fastmap
= NULL
;
1233 p
->regex
.syntax
= regex_options
;
1234 p
->regex
.translate
= NULL
;
1236 error_message
= re_compile_pattern (pathpart
, strlen (pathpart
),
1240 error (1, 0, "%s", error_message
);
1244 add_visitor(visit_regex
, p
);
1247 else if (contains_metacharacter(pathpart
))
1250 add_visitor(visit_globmatch_casefold
, pathpart
);
1252 add_visitor(visit_globmatch_nofold
, pathpart
);
1256 /* No glob characters used. Hence we match on
1257 * _any part_ of the filename, not just the
1258 * basename. This seems odd to me, but it is the
1259 * traditional behaviour.
1260 * James Youngman <jay@gnu.org>
1263 if (1 == MB_CUR_MAX
)
1265 /* As an optimisation, use a strstr() matcher if we are
1266 * in a unibyte locale. This can give a x2 speedup in
1267 * the C locale. Some light testing reveals that
1268 * glibc's strstr() is somewhere around 40% faster than
1269 * gnulib's, so we just use strstr().
1271 matcher
= ignore_case
?
1272 visit_substring_match_casefold_narrow
:
1273 visit_substring_match_nocasefold_narrow
;
1277 matcher
= ignore_case
?
1278 visit_substring_match_casefold_wide
:
1279 visit_substring_match_nocasefold_wide
;
1281 add_visitor(matcher
, pathpart
);
1285 pvis
= lastinspector
;
1287 /* We add visit_existing_*() as late as possible to reduce the
1288 * number of stat() calls.
1290 switch (do_check_existence
)
1292 case ACCEPT_EXISTING
:
1293 results_were_filtered
= true;
1294 if (follow_symlinks
) /* -L, default */
1295 add_visitor(visit_existing_follow
, NULL
);
1297 add_visitor(visit_existing_nofollow
, NULL
);
1300 case ACCEPT_NON_EXISTING
:
1301 results_were_filtered
= true;
1302 if (follow_symlinks
) /* -L, default */
1303 add_visitor(visit_non_existing_follow
, NULL
);
1305 add_visitor(visit_non_existing_nofollow
, NULL
);
1308 case ACCEPT_EITHER
: /* Default, neither -E nor -e */
1309 /* do nothing; no extra processing. */
1313 /* Security issue: The stats visitor must be added immediately
1314 * before the print visitor, because otherwise the -S option would
1315 * leak information about files that the caller cannot see.
1318 add_visitor(visit_stats
, &statistics
);
1322 if (print_quoted_filename
)
1323 add_visitor(visit_justprint_quoted
, NULL
);
1325 add_visitor(visit_justprint_unquoted
, NULL
);
1330 add_visitor(visit_limit
, plimit
);
1332 add_visitor(visit_count
, plimit
);
1337 past_pat_inspector
= pvis
->next
;
1339 mainprocessor
= process_and
;
1341 mainprocessor
= process_or
;
1344 mainprocessor
= process_simple
;
1348 printf(_("Database %s is in the %s format.\n"),
1354 procdata
.c
= getc (procdata
.fp
);
1355 /* If we are searching for filename patterns, the inspector list
1356 * will contain an entry for each pattern for which we are searching.
1358 while ( (procdata
.c
!= EOF
) &&
1359 (VISIT_ABORT
!= (mainprocessor
)(&procdata
)) )
1361 /* Do nothing; all the work is done in the visitor functions. */
1368 int host_little_endian
= i_am_little_endian();
1369 const char *little
= _("The database has little-endian "
1370 "machine-word encoding.\n");
1371 const char *big
= _("The database has big-endian "
1372 "machine-word encoding.\n");
1374 if (GetwordEndianStateNative
== procdata
.endian_state
)
1376 printf("%s", (host_little_endian
? little
: big
));
1378 else if (GetwordEndianStateSwab
== procdata
.endian_state
)
1380 printf("%s", (host_little_endian
? big
: little
));
1384 printf(_("The database machine-word encoding order "
1385 "is not obvious.\n"));
1389 print_stats(argc
, filesize
);
1392 if (ferror (procdata
.fp
))
1394 error (0, errno
, "%s",
1395 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1398 return plimit
->items_accepted
;
1404 extern char *version_string
;
1406 /* The name this program was run with. */
1410 usage (FILE *stream
)
1412 fprintf (stream
, _("\
1413 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1414 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1415 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1416 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1417 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1418 [--max-database-age D] [--version] [--help]\n\
1421 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
1425 REGEXTYPE_OPTION
= CHAR_MAX
+ 1,
1430 static struct option
const longopts
[] =
1432 {"database", required_argument
, NULL
, 'd'},
1433 {"existing", no_argument
, NULL
, 'e'},
1434 {"non-existing", no_argument
, NULL
, 'E'},
1435 {"ignore-case", no_argument
, NULL
, 'i'},
1436 {"all", no_argument
, NULL
, 'A'},
1437 {"help", no_argument
, NULL
, 'h'},
1438 {"version", no_argument
, NULL
, 'v'},
1439 {"null", no_argument
, NULL
, '0'},
1440 {"count", no_argument
, NULL
, 'c'},
1441 {"wholename", no_argument
, NULL
, 'w'},
1442 {"wholepath", no_argument
, NULL
, 'w'}, /* Synonym. */
1443 {"basename", no_argument
, NULL
, 'b'},
1444 {"print", no_argument
, NULL
, 'p'},
1445 {"stdio", no_argument
, NULL
, 's'},
1446 {"mmap", no_argument
, NULL
, 'm'},
1447 {"limit", required_argument
, NULL
, 'l'},
1448 {"regex", no_argument
, NULL
, 'r'},
1449 {"regextype", required_argument
, NULL
, REGEXTYPE_OPTION
},
1450 {"statistics", no_argument
, NULL
, 'S'},
1451 {"follow", no_argument
, NULL
, 'L'},
1452 {"nofollow", no_argument
, NULL
, 'P'},
1453 {"max-database-age", required_argument
, NULL
, MAX_DB_AGE
},
1454 {NULL
, no_argument
, NULL
, 0}
1461 const char * what
= "failed";
1462 const uid_t orig_euid
= geteuid();
1463 const uid_t uid
= getuid();
1464 const gid_t gid
= getgid();
1467 /* Use of setgroups() is restricted to root only. */
1470 /* We're either root or running setuid-root. */
1473 if (0 != setgroups(1u, groups
))
1475 what
= _("failed to drop group privileges");
1481 /* Drop any setuid privileges */
1482 if (uid
!= orig_euid
)
1486 /* We're really root anyway, but are setuid to something else. Leave it. */
1491 if (0 != setuid(getuid()))
1493 what
= _("failed to drop setuid privileges");
1497 /* Defend against the case where the attacker runs us with the
1498 * capability to call setuid() turned off, which on some systems
1499 * will cause the above attempt to drop privileges fail (leaving us
1504 /* Check that we can no longer switch bask to root */
1507 what
= _("Failed to fully drop privileges");
1508 /* The errno value here is not interesting (since
1509 * the system call we are complaining about
1510 * succeeded when we wanted it to fail). Arrange
1511 * for the call to error() not to print the errno
1512 * value by setting errno=0.
1521 /* Drop any setgid privileges */
1523 if (0 != setgid(gid
))
1525 what
= _("failed to drop setgid privileges");
1533 error(1, errno
, "%s",
1534 quotearg_n_style(0, locale_quoting_style
, what
));
1542 /* deliberate infinite loop */
1547 opendb(const char *name
)
1549 int fd
= open(name
, O_RDONLY
1550 #if defined(O_LARGEFILE)
1556 /* Make sure it won't survive an exec */
1557 if (0 != fcntl(fd
, F_SETFD
, FD_CLOEXEC
))
1567 dolocate (int argc
, char **argv
, int secure_db_fd
)
1570 unsigned long int found
= 0uL;
1572 int ignore_case
= 0;
1575 int basename_only
= 0;
1578 int regex_options
= RE_SYNTAX_EMACS
;
1583 int they_chose_db
= 0;
1584 bool did_stdin
= false; /* Set to prevent rereading stdin. */
1586 program_name
= argv
[0];
1588 #ifdef HAVE_SETLOCALE
1589 setlocale (LC_ALL
, "");
1591 bindtextdomain (PACKAGE
, LOCALEDIR
);
1592 textdomain (PACKAGE
);
1593 atexit (close_stdout
);
1596 limits
.items_accepted
= 0;
1598 quote_opts
= clone_quoting_options (NULL
);
1599 print_quoted_filename
= true;
1601 /* We cannot simultaneously trust $LOCATE_PATH and use the
1602 * setuid-access-controlled database,, since that could cause a leak
1605 dbpath
= getenv ("LOCATE_PATH");
1611 check_existence
= ACCEPT_EITHER
;
1613 while ((optc
= getopt_long (argc
, argv
, "Abcd:eEil:prsm0SwHPL", longopts
, (int *) 0)) != -1)
1618 print_quoted_filename
= false; /* print filename 'raw'. */
1639 check_existence
= ACCEPT_EXISTING
;
1643 check_existence
= ACCEPT_NON_EXISTING
;
1655 /* XXX: nothing in the test suite for this option. */
1656 set_max_db_age(optarg
);
1664 printf (_("GNU locate version %s\n"), version_string
);
1665 printf (_("Built using GNU gnulib version %s\n"), gnulib_version
);
1676 case REGEXTYPE_OPTION
:
1677 regex_options
= get_regex_type(optarg
);
1685 follow_symlinks
= 1;
1688 /* In find, -P and -H differ in the way they handle paths
1689 * given on the command line. This is not relevant for
1690 * locate, but the -H option is supported because it is
1691 * probably more intuitive to do so.
1695 follow_symlinks
= 0;
1701 strtol_error err
= xstrtoumax(optarg
, &end
, 10, &limits
.limit
, NULL
);
1702 if (LONGINT_OK
!= err
)
1704 STRTOL_FATAL_ERROR(optarg
, _("argument to --limit"), err
);
1710 case 's': /* use stdio */
1711 case 'm': /* use mmap */
1712 /* These options are implemented simply for
1713 * compatibility with FreeBSD
1723 /* If the user gave the -d option or set LOCATE_PATH,
1724 * relinquish access to the secure database.
1728 if (secure_db_fd
>= 0)
1730 close(secure_db_fd
);
1735 if (!just_count
&& !stats
)
1745 if (!just_count
&& optind
== argc
)
1753 if (1 == isatty(STDOUT_FILENO
))
1754 stdout_is_a_tty
= true;
1756 stdout_is_a_tty
= false;
1759 next_element (dbpath
, 0); /* Initialize. */
1761 /* Bail out early if limit already reached. */
1762 while (!use_limit
|| limits
.limit
> limits
.items_accepted
)
1768 statistics
.compressed_bytes
=
1769 statistics
.total_filename_count
=
1770 statistics
.total_filename_length
=
1771 statistics
.whitespace_count
=
1772 statistics
.newline_count
=
1773 statistics
.highbit_filename_count
= 0u;
1777 /* Take the next element from the list of databases */
1778 e
= next_element ((char *) NULL
, 0);
1782 if (0 == strcmp (e
, "-"))
1787 _("warning: the locate database can only be read from stdin once."));
1799 if (0 == strlen(e
) || 0 == strcmp(e
, "."))
1804 /* open the database */
1808 error (0, errno
, "%s",
1809 quotearg_n_style(0, locale_quoting_style
, e
));
1816 if (-1 == secure_db_fd
)
1818 /* Already searched the database, it's time to exit the loop */
1823 e
= selected_secure_db
;
1829 /* Check the database to see if it is old. */
1832 error (0, errno
, "%s",
1833 quotearg_n_style(0, locale_quoting_style
, e
));
1834 /* continue anyway */
1835 filesize
= (off_t
)0;
1841 filesize
= st
.st_size
;
1843 if ((time_t)-1 == time(&now
))
1845 /* If we can't tell the time, we don't know how old the
1846 * database is. But since the message is just advisory,
1847 * we continue anyway.
1849 error (0, errno
, "time system call");
1853 double age
= difftime(now
, st
.st_mtime
);
1854 double warn_seconds
= SECONDS_PER_UNIT
* warn_number_units
;
1855 if (age
> warn_seconds
)
1858 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1860 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1861 quotearg_n_style(0, locale_quoting_style
, e
),
1862 warn_number_units
, _(warn_name_units
),
1863 (age
/(double)SECONDS_PER_UNIT
), _(warn_name_units
));
1868 fp
= fdopen(fd
, "r");
1871 error (0, errno
, "%s",
1872 quotearg_n_style(0, locale_quoting_style
, e
));
1876 /* Search this database for all patterns simultaneously */
1877 found
= search_one_database (argc
- optind
, &argv
[optind
],
1879 ignore_case
, print
, basename_only
,
1880 use_limit
, &limits
, stats
,
1881 op_and
, regex
, regex_options
);
1883 /* Close the databsase (even if it is stdin) */
1884 if (fclose (fp
) == EOF
)
1886 error (0, errno
, "%s",
1887 quotearg_n_style(0, locale_quoting_style
, e
));
1894 printf("%ld\n", found
);
1897 if (found
|| (use_limit
&& (limits
.limit
==0)) || stats
)
1903 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1905 open_secure_db(void)
1909 const char * secure_db_list
[] =
1912 "/var/lib/slocate/slocate.db",
1915 for (i
=0; secure_db_list
[i
]; ++i
)
1917 fd
= opendb(secure_db_list
[i
]);
1920 selected_secure_db
= secure_db_list
[i
];
1928 main (int argc
, char **argv
)
1930 int dbfd
= open_secure_db();
1933 return dolocate(argc
, argv
, dbfd
);