1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 /* Usage: locate [options] pattern...
21 Scan a pathname list for the full pathname of a file, given only
22 a piece of the name (possibly containing shell globbing metacharacters).
23 The list has been processed with front-compression, which reduces
24 the list size by a factor of 4-5.
25 Recognizes two database formats, old and new. The old format is
26 bigram coded, which reduces space by a further 20-25% and uses the
27 following encoding of the database bytes:
29 0-28 likeliest differential counts + offset (14) to make nonnegative
30 30 escape code for out-of-range count to follow in next halfword
31 128-255 bigram codes (the 128 most common, as determined by `updatedb')
32 32-127 single character (printable) ASCII remainder
34 Earlier versions of GNU locate used to use a novel two-tiered
35 string search technique, which was described in Usenix ;login:, Vol
36 8, No 1, February/March, 1983, p. 8.
38 However, latterly code changes to provide additional functionality
39 became dificult to make with the existing reading scheme, and so
40 we no longer perform the matching as efficiently as we used to (that is,
41 we no longer use the same algorithm).
43 The old algorithm was:
45 First, match a metacharacter-free subpattern and a partial
46 pathname BACKWARDS to avoid full expansion of the pathname list.
47 The time savings is 40-50% over forward matching, which cannot
48 efficiently handle overlapped search patterns and compressed
51 Then, match the actual shell glob pattern (if in this form)
52 against the candidate pathnames using the slower shell filename
56 Written by James A. Woods <jwoods@adobe.com>.
57 Modified by David MacKenzie <djm@gnu.org>.
58 Additional work by James Youngman and Bas van Gompel.
66 #include <sys/types.h>
67 #include <grp.h> /* for setgroups() */
74 #include <stdbool.h> /* for bool/boolean */
76 /* The presence of unistd.h is assumed by gnulib these days, so we
77 * might as well assume it too.
79 /* We need <unistd.h> for isatty(). */
103 # include <libintl.h>
104 # define _(Text) gettext (Text)
106 # define _(Text) Text
107 #define textdomain(Domain)
108 #define bindtextdomain(Package, Directory)
111 # define N_(String) gettext_noop (String)
113 /* We used to use (String) instead of just String, but apparently ISO C
114 * doesn't allow this (at least, that's what HP said when someone reported
115 * this as a compiler bug). This is HP case number 1205608192. See
116 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
117 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
118 * like: static const char buf[] = ("string");
120 # define N_(String) String
123 #include "locatedb.h"
129 #include "closeout.h"
130 #include "nextelem.h"
133 #include "quotearg.h"
134 #include "printquoted.h"
135 #include "regextype.h"
136 #include "gnulib-version.h"
138 /* Note that this evaluates Ch many times. */
140 # define TOUPPER(Ch) toupper (Ch)
141 # define TOLOWER(Ch) tolower (Ch)
143 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
144 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
147 /* typedef enum {false, true} boolean; */
149 /* Warn if a database is older than this. 8 days allows for a weekly
150 update that takes up to a day to perform. */
151 static unsigned int warn_number_units
= 8;
153 /* Printable name of units used in WARN_SECONDS */
154 static const char warn_name_units
[] = N_("days");
155 #define SECONDS_PER_UNIT (60 * 60 * 24)
159 VISIT_CONTINUE
= 1, /* please call the next visitor */
160 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
161 VISIT_REJECTED
= 4, /* rejected, process next file. */
162 VISIT_ABORT
= 8 /* rejected, process no more files. */
165 enum ExistenceCheckType
167 ACCEPT_EITHER
, /* Corresponds to lack of -E/-e option */
168 ACCEPT_EXISTING
, /* Corresponds to option -e */
169 ACCEPT_NON_EXISTING
/* Corresponds to option -E */
172 /* Check for existence of files before printing them out? */
173 enum ExistenceCheckType check_existence
= ACCEPT_EITHER
;
175 static int follow_symlinks
= 1;
177 /* What to separate the results with. */
178 static int separator
= '\n';
180 static struct quoting_options
* quote_opts
= NULL
;
181 static bool stdout_is_a_tty
;
182 static bool print_quoted_filename
;
183 static bool results_were_filtered
;
185 static const char *selected_secure_db
= NULL
;
188 /* Change the number of days old the database can be
189 * before we complain about it.
192 set_max_db_age(const char *s
)
195 unsigned long int val
;
196 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
201 _("The argument for option --max-database-age must not be empty"));
205 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
206 * we would not be able to tell if that is the correct answer, or whether it
207 * signifies an error.
210 val
= strtoul(s
, &end
, 10);
212 /* Diagnose number too large, non-numbes and trailing junk. */
213 if ((ULONG_MAX
== val
&& ERANGE
== errno
) ||
214 (0 == val
&& EINVAL
== errno
))
217 _("Invalid argument %s for option --max-database-age"),
218 quotearg_n_style(0, locale_quoting_style
, s
));
222 /* errno wasn't set, don't print its message */
224 _("Invalid argument %s for option --max-database-age"),
225 quotearg_n_style(0, locale_quoting_style
, s
));
229 warn_number_units
= val
;
235 /* Read in a 16-bit int, high byte first (network byte order). */
243 x
= (signed char) fgetc (fp
) << 8;
244 x
|= (fgetc (fp
) & 0xff);
248 const char * const metacharacters
= "*?[]\\";
250 /* Return nonzero if S contains any shell glob characters.
253 contains_metacharacter(const char *s
)
255 if (NULL
== strpbrk(s
, metacharacters
))
263 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
264 * until we reach DELIMITER or end-of-file. We reallocate the buffer
265 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
266 * is made regarding the content of the data (i.e. the implementation is
267 * 8-bit clean, the only delimiter is DELIMITER).
269 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
270 * has been removed from gnulib.
272 * We call the function locate_read_str() to avoid a name clash with the curses
276 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
283 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
288 needed
= offs
+ nread
+ 1u;
291 char *pnew
= realloc(*buf
, needed
);
294 return -1; /* FAIL */
302 memcpy((*buf
)+offs
, p
, nread
);
312 uintmax_t items_accepted
;
314 static struct locate_limits limits
;
319 uintmax_t compressed_bytes
;
320 uintmax_t total_filename_count
;
321 uintmax_t total_filename_length
;
322 uintmax_t whitespace_count
;
323 uintmax_t newline_count
;
324 uintmax_t highbit_filename_count
;
326 static struct locate_stats statistics
;
329 struct regular_expression
331 struct re_pattern_buffer regex
; /* for --regex */
337 int c
; /* An input byte. */
338 char itemcount
; /* Indicates we're at the beginning of an slocate db. */
339 int count
; /* The length of the prefix shared with the previous database entry. */
341 char *original_filename
; /* The current input database entry. */
342 size_t pathsize
; /* Amount allocated for it. */
343 char *munged_filename
; /* path or base_name(path) */
344 FILE *fp
; /* The pathname database. */
345 const char *dbfile
; /* Its name, or "<stdin>" */
346 int slocatedb_format
; /* Allows us to cope with slocate's format variant */
347 GetwordEndianState endian_state
;
348 /* for the old database format,
349 the first and second characters of the most common bigrams. */
355 typedef int (*visitfunc
)(struct process_data
*procdata
,
362 struct visitor
*next
;
366 static struct visitor
*inspectors
= NULL
;
367 static struct visitor
*lastinspector
= NULL
;
368 static struct visitor
*past_pat_inspector
= NULL
;
370 static inline int visit(const struct visitor
*p
,
372 struct process_data
*procdata
,
373 const struct visitor
* const stop
)
375 register int result
= accept_flags
;
376 while ( (accept_flags
& result
) && (stop
!= p
) )
378 result
= (p
->inspector
)(procdata
, p
->context
);
384 /* 0 or 1 pattern(s) */
386 process_simple(struct process_data
*procdata
)
388 return visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, NULL
);
391 /* Accept if any pattern matches. */
393 process_or (struct process_data
*procdata
)
397 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_REJECTED
), procdata
, past_pat_inspector
);
398 if (result
== VISIT_CONTINUE
)
399 result
= VISIT_REJECTED
;
400 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
403 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
404 if (VISIT_CONTINUE
== result
)
405 return VISIT_ACCEPTED
;
410 /* Accept if all pattern match. */
412 process_and (struct process_data
*procdata
)
416 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, past_pat_inspector
);
417 if (result
== VISIT_CONTINUE
)
418 result
= VISIT_REJECTED
;
419 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
422 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
423 if (VISIT_CONTINUE
== result
)
424 return VISIT_ACCEPTED
;
429 typedef int (*processfunc
)(struct process_data
*procdata
);
431 static processfunc mainprocessor
= NULL
;
434 add_visitor(visitfunc fn
, void *context
)
436 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
438 p
->context
= context
;
441 if (NULL
== lastinspector
)
443 lastinspector
= inspectors
= p
;
447 lastinspector
->next
= p
;
453 visit_justprint_quoted(struct process_data
*procdata
, void *context
)
456 print_quoted (stdout
, quote_opts
, stdout_is_a_tty
,
458 procdata
->original_filename
);
460 return VISIT_CONTINUE
;
464 visit_justprint_unquoted(struct process_data
*procdata
, void *context
)
467 fputs(procdata
->original_filename
, stdout
);
469 return VISIT_CONTINUE
;
473 toolong (struct process_data
*procdata
)
476 _("locate database %s contains a "
477 "filename longer than locate can handle"),
482 extend (struct process_data
*procdata
, size_t siz1
, size_t siz2
)
484 /* Figure out if the addition operation is safe before performing it. */
485 if (SIZE_MAX
- siz1
< siz2
)
489 else if (procdata
->pathsize
< (siz1
+siz2
))
491 procdata
->pathsize
= siz1
+siz2
;
492 procdata
->original_filename
= x2nrealloc (procdata
->original_filename
,
499 visit_old_format(struct process_data
*procdata
, void *context
)
504 if (EOF
== procdata
->c
)
507 /* Get the offset in the path where this path info starts. */
508 if (procdata
->c
== LOCATEDB_OLD_ESCAPE
)
513 procdata
->count
-= LOCATEDB_OLD_OFFSET
;
514 minval
= (0 - procdata
->count
);
515 if (procdata
->count
>= 0)
516 maxval
= (procdata
->len
- procdata
->count
);
518 maxval
= (procdata
->len
- 0);
519 word
= getword(procdata
->fp
, procdata
->dbfile
,
520 minval
, maxval
, &procdata
->endian_state
);
521 procdata
->count
+= word
;
522 assert(procdata
->count
>= 0);
526 procdata
->count
+= (procdata
->c
- LOCATEDB_OLD_OFFSET
);
527 assert(procdata
->count
>= 0);
530 /* Overlay the old path with the remainder of the new. Read
531 * more data until we get to the next filename.
533 for (i
=procdata
->count
;
534 (procdata
->c
= getc (procdata
->fp
)) > LOCATEDB_OLD_ESCAPE
;)
536 if (EOF
== procdata
->c
)
539 if (procdata
->c
< 0200)
541 /* An ordinary character. */
542 extend (procdata
, i
, 1u);
543 procdata
->original_filename
[i
++] = procdata
->c
;
547 /* Bigram markers have the high bit set. */
548 extend (procdata
, i
, 2u);
550 procdata
->original_filename
[i
++] = procdata
->bigram1
[procdata
->c
];
551 procdata
->original_filename
[i
++] = procdata
->bigram2
[procdata
->c
];
555 /* Consider the case where we executed the loop body zero times; we
556 * still need space for the terminating null byte.
558 extend (procdata
, i
, 1u);
559 procdata
->original_filename
[i
] = 0;
561 procdata
->munged_filename
= procdata
->original_filename
;
563 return VISIT_CONTINUE
;
567 visit_locate02_format(struct process_data
*procdata
, void *context
)
573 if (procdata
->slocatedb_format
)
575 if (procdata
->itemcount
== 0)
577 ungetc(procdata
->c
, procdata
->fp
);
581 else if (procdata
->itemcount
== 1)
583 procdata
->count
= procdata
->len
-1;
587 if (procdata
->c
== LOCATEDB_ESCAPE
)
588 procdata
->count
+= (short)get_short (procdata
->fp
);
589 else if (procdata
->c
> 127)
590 procdata
->count
+= procdata
->c
- 256;
592 procdata
->count
+= procdata
->c
;
597 if (procdata
->c
== LOCATEDB_ESCAPE
)
598 procdata
->count
+= (short)get_short (procdata
->fp
);
599 else if (procdata
->c
> 127)
600 procdata
->count
+= procdata
->c
- 256;
602 procdata
->count
+= procdata
->c
;
605 if (procdata
->count
> procdata
->len
|| procdata
->count
< 0)
607 /* This should not happen generally , but since we're
608 * reading in data which is outside our control, we
611 error(1, 0, _("locate database %s is corrupt or invalid"),
612 quotearg_n_style(0, locale_quoting_style
, procdata
->dbfile
));
615 /* Overlay the old path with the remainder of the new. */
616 nread
= locate_read_str (&procdata
->original_filename
,
618 procdata
->fp
, 0, procdata
->count
);
621 procdata
->c
= getc (procdata
->fp
);
622 procdata
->len
= procdata
->count
+ nread
;
623 s
= procdata
->original_filename
+ procdata
->len
- 1; /* Move to the last char in path. */
624 assert (s
[0] != '\0');
625 assert (s
[1] == '\0'); /* Our terminator. */
626 assert (s
[2] == '\0'); /* Added by locate_read_str. */
628 procdata
->munged_filename
= procdata
->original_filename
;
630 if (procdata
->slocatedb_format
)
632 /* Don't increment indefinitely, it might overflow. */
633 if (procdata
->itemcount
< 6)
635 ++(procdata
->itemcount
);
640 return VISIT_CONTINUE
;
644 visit_basename(struct process_data
*procdata
, void *context
)
647 procdata
->munged_filename
= base_name(procdata
->original_filename
);
649 return VISIT_CONTINUE
;
653 /* visit_existing_follow implements -L -e */
655 visit_existing_follow(struct process_data
*procdata
, void *context
)
660 /* munged_filename has been converted in some way (to lower case,
661 * or is just the base name of the file), and original_filename has not.
662 * Hence only original_filename is still actually the name of the file
663 * whose existence we would need to check.
665 if (stat(procdata
->original_filename
, &st
) != 0)
667 return VISIT_REJECTED
;
671 return VISIT_CONTINUE
;
675 /* visit_non_existing_follow implements -L -E */
677 visit_non_existing_follow(struct process_data
*procdata
, void *context
)
682 /* munged_filename has been converted in some way (to lower case,
683 * or is just the base name of the file), and original_filename has not.
684 * Hence only original_filename is still actually the name of the file
685 * whose existence we would need to check.
687 if (stat(procdata
->original_filename
, &st
) == 0)
689 return VISIT_REJECTED
;
693 return VISIT_CONTINUE
;
697 /* visit_existing_nofollow implements -P -e */
699 visit_existing_nofollow(struct process_data
*procdata
, void *context
)
704 /* munged_filename has been converted in some way (to lower case,
705 * or is just the base name of the file), and original_filename has not.
706 * Hence only original_filename is still actually the name of the file
707 * whose existence we would need to check.
709 if (lstat(procdata
->original_filename
, &st
) != 0)
711 return VISIT_REJECTED
;
715 return VISIT_CONTINUE
;
719 /* visit_non_existing_nofollow implements -P -E */
721 visit_non_existing_nofollow(struct process_data
*procdata
, void *context
)
726 /* munged_filename has been converted in some way (to lower case,
727 * or is just the base name of the file), and original_filename has not.
728 * Hence only original_filename is still actually the name of the file
729 * whose existence we would need to check.
731 if (lstat(procdata
->original_filename
, &st
) == 0)
733 return VISIT_REJECTED
;
737 return VISIT_CONTINUE
;
742 visit_substring_match_nocasefold_wide(struct process_data
*procdata
, void *context
)
744 const char *pattern
= context
;
746 if (NULL
!= mbsstr(procdata
->munged_filename
, pattern
))
747 return VISIT_ACCEPTED
;
749 return VISIT_REJECTED
;
753 visit_substring_match_nocasefold_narrow(struct process_data
*procdata
, void *context
)
755 const char *pattern
= context
;
756 assert(MB_CUR_MAX
== 1);
757 if (NULL
!= strstr(procdata
->munged_filename
, pattern
))
758 return VISIT_ACCEPTED
;
760 return VISIT_REJECTED
;
764 visit_substring_match_casefold_wide(struct process_data
*procdata
, void *context
)
766 const char *pattern
= context
;
768 if (NULL
!= mbscasestr(procdata
->munged_filename
, pattern
))
769 return VISIT_ACCEPTED
;
771 return VISIT_REJECTED
;
776 visit_substring_match_casefold_narrow(struct process_data
*procdata
, void *context
)
778 const char *pattern
= context
;
780 assert(MB_CUR_MAX
== 1);
781 if (NULL
!= strcasestr(procdata
->munged_filename
, pattern
))
782 return VISIT_ACCEPTED
;
784 return VISIT_REJECTED
;
789 visit_globmatch_nofold(struct process_data
*procdata
, void *context
)
791 const char *glob
= context
;
792 if (fnmatch(glob
, procdata
->munged_filename
, 0) != 0)
793 return VISIT_REJECTED
;
795 return VISIT_ACCEPTED
;
800 visit_globmatch_casefold(struct process_data
*procdata
, void *context
)
802 const char *glob
= context
;
803 if (fnmatch(glob
, procdata
->munged_filename
, FNM_CASEFOLD
) != 0)
804 return VISIT_REJECTED
;
806 return VISIT_ACCEPTED
;
811 visit_regex(struct process_data
*procdata
, void *context
)
813 struct regular_expression
*p
= context
;
814 const size_t len
= strlen(procdata
->munged_filename
);
816 int rv
= re_search (&p
->regex
, procdata
->munged_filename
,
818 (struct re_registers
*) NULL
);
821 return VISIT_REJECTED
; /* no match (-1), or internal error (-2) */
825 return VISIT_ACCEPTED
; /* match */
831 visit_stats(struct process_data
*procdata
, void *context
)
833 struct locate_stats
*p
= context
;
834 size_t len
= strlen(procdata
->original_filename
);
836 int highbit
, whitespace
, newline
;
838 ++(p
->total_filename_count
);
839 p
->total_filename_length
+= len
;
841 highbit
= whitespace
= newline
= 0;
842 for (s
=procdata
->original_filename
; *s
; ++s
)
844 if ( (int)(*s
) & 128 )
848 newline
= whitespace
= 1;
850 else if (isspace((unsigned char)*s
))
857 ++(p
->highbit_filename_count
);
859 ++(p
->whitespace_count
);
861 ++(p
->newline_count
);
863 return VISIT_CONTINUE
;
868 visit_limit(struct process_data
*procdata
, void *context
)
870 struct locate_limits
*p
= context
;
874 if (++p
->items_accepted
>= p
->limit
)
877 return VISIT_CONTINUE
;
881 visit_count(struct process_data
*procdata
, void *context
)
883 struct locate_limits
*p
= context
;
888 return VISIT_CONTINUE
;
891 /* Emit the statistics.
894 print_stats(int argc
, size_t database_file_size
)
896 char hbuf
[LONGEST_HUMAN_READABLE
+ 1];
898 printf(_("Locate database size: %s bytes\n"),
899 human_readable ((uintmax_t) database_file_size
,
900 hbuf
, human_ceiling
, 1, 1));
902 printf( (results_were_filtered
?
903 _("Matching Filenames: %s ") :
904 _("All Filenames: %s ")),
905 human_readable (statistics
.total_filename_count
,
906 hbuf
, human_ceiling
, 1, 1));
907 printf(_("with a cumulative length of %s bytes"),
908 human_readable (statistics
.total_filename_length
,
909 hbuf
, human_ceiling
, 1, 1));
911 printf(_("\n\tof which %s contain whitespace, "),
912 human_readable (statistics
.whitespace_count
,
913 hbuf
, human_ceiling
, 1, 1));
914 printf(_("\n\t%s contain newline characters, "),
915 human_readable (statistics
.newline_count
,
916 hbuf
, human_ceiling
, 1, 1));
917 printf(_("\n\tand %s contain characters with the high bit set.\n"),
918 human_readable (statistics
.highbit_filename_count
,
919 hbuf
, human_ceiling
, 1, 1));
923 if (results_were_filtered
)
925 printf(_("Some filenames may have been filtered out, "
926 "so we cannot compute the compression ratio.\n"));
930 if (statistics
.total_filename_length
)
932 /* A negative compression ratio just means that the
933 * compressed database is larger than the list of
934 * filenames. This can happen for example for
935 * old-format databases containing a small list of short
936 * filenames, because the bigram list is 256 bytes.
938 printf(_("Compression ratio %4.2f%% (higher is better)\n"),
939 100.0 * ((double)statistics
.total_filename_length
940 - (double) database_file_size
)
941 / (double) statistics
.total_filename_length
);
945 printf(_("Compression ratio is undefined\n"));
953 * Return nonzero if the data we read in indicates that we are
954 * looking at a LOCATE02 locate database.
957 looking_at_gnu_locatedb (const char *data
, size_t len
)
959 if (len
< sizeof (LOCATEDB_MAGIC
))
961 else if (0 == memcmp (data
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
962 return 1; /* We saw the magic byte sequence */
968 * Return nonzero if the data we read in indicates that we are
969 * looking at an slocate database.
972 looking_at_slocate_locatedb (const char *filename
,
985 /* Check that the magic number is a one-byte string */
988 if (isdigit((unsigned char)data
[0]))
990 /* looks promising. */
991 *seclevel
= (data
[0] - '0');
995 /* Hmm, well it's probably an slocate database
996 * of some awsomely huge security level, like 2.
997 * We don't know how to handle those.
1000 _("locate database %s looks like an slocate "
1001 "database but it seems to have security level %c, "
1002 "which GNU findutils does not currently support"),
1003 quotearg_n_style(0, locale_quoting_style
, filename
),
1020 /* Definitely not slocate. */
1028 i_am_little_endian(void)
1032 unsigned char uch
[4];
1037 u
.uch
[1] = u
.uch
[2] = u
.uch
[3] = 0;
1044 /* Print or count the entries in DBFILE that match shell globbing patterns in
1045 ARGV. Return the number of entries matched. */
1047 static unsigned long
1048 search_one_database (int argc
,
1057 struct locate_limits
*plimit
,
1063 char *pathpart
; /* A pattern to consider. */
1064 int argn
; /* Index to current pattern in argv. */
1065 int nread
; /* number of bytes read from an entry. */
1066 struct process_data procdata
; /* Storage for data shared with visitors. */
1067 int slocate_seclevel
;
1069 struct visitor
* pvis
; /* temp for determining past_pat_inspector. */
1070 const char *format_name
;
1071 enum ExistenceCheckType do_check_existence
;
1074 /* We may turn on existence checking for a given database.
1075 * We ensure that we can return to the previous behaviour
1076 * by using two variables, do_check_existence (which we act on)
1077 * and check_existence (whcih indicates the default before we
1078 * adjust it on the bassis of what kind of database we;re using
1080 do_check_existence
= check_existence
;
1084 regex_options
|= RE_ICASE
;
1087 procdata
.endian_state
= GetwordEndianStateInitial
;
1088 procdata
.len
= procdata
.count
= 0;
1089 procdata
.slocatedb_format
= 0;
1090 procdata
.itemcount
= 0;
1092 procdata
.dbfile
= dbfile
;
1095 /* Set up the inspection regime */
1097 lastinspector
= NULL
;
1098 past_pat_inspector
= NULL
;
1099 results_were_filtered
= false;
1101 procdata
.pathsize
= 1026; /* Increased as necessary by locate_read_str. */
1103 procdata
.pathsize
= 128; /* Increased as necessary by locate_read_str. */
1105 procdata
.original_filename
= xmalloc (procdata
.pathsize
);
1108 nread
= fread (procdata
.original_filename
, 1, SLOCATE_DB_MAGIC_LEN
,
1110 slocate_seclevel
= 0;
1111 if (looking_at_slocate_locatedb(procdata
.dbfile
,
1112 procdata
.original_filename
,
1117 _("%s is an slocate database. "
1118 "Support for these is new, expect problems for now."),
1119 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1121 /* slocate also uses frcode, but with a different header.
1122 * We handle the header here and then work with the data
1123 * in the normal way.
1125 if (slocate_seclevel
> 1)
1127 /* We don't know what those security levels mean,
1128 * so do nothing further
1131 _("%s is an slocate database of unsupported security level %d; skipping it."),
1132 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
),
1136 else if (slocate_seclevel
> 0)
1138 /* Don't show the filenames to the user if they don't exist.
1139 * Showing stats is safe since filenames are only counted
1140 * after the existence check
1142 if (ACCEPT_NON_EXISTING
== check_existence
)
1144 /* Do not allow the user to see a list of filenames that they
1148 _("You specified the -E option, but that option "
1149 "cannot be used with slocate-format databases "
1150 "with a non-zero security level. No results will be "
1151 "generated for this database.\n"));
1154 if (ACCEPT_EXISTING
!= do_check_existence
)
1156 if (enable_print
|| stats
)
1159 _("%s is an slocate database. "
1160 "Turning on the '-e' option."),
1161 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1163 do_check_existence
= ACCEPT_EXISTING
;
1166 add_visitor(visit_locate02_format
, NULL
);
1167 format_name
= "slocate";
1168 procdata
.slocatedb_format
= 1;
1174 procdata
.slocatedb_format
= 0;
1175 extend (&procdata
, sizeof(LOCATEDB_MAGIC
), 0u);
1176 nread2
= fread (procdata
.original_filename
+nread
, 1, sizeof (LOCATEDB_MAGIC
)-nread
,
1178 if (looking_at_gnu_locatedb(procdata
.original_filename
, nread
+nread2
))
1180 add_visitor(visit_locate02_format
, NULL
);
1181 format_name
= "GNU LOCATE02";
1183 else /* Use the old format */
1188 extend (&procdata
, 256u, 0u);
1189 /* Read the list of the most common bigrams in the database. */
1192 int more_read
= fread (procdata
.original_filename
+ nread
, 1,
1193 256 - nread
, procdata
.fp
);
1194 if ( (more_read
+ nread
) != 256 )
1197 _("Old-format locate database %s is "
1198 "too short to be valid"),
1199 quotearg_n_style(0, locale_quoting_style
, dbfile
));
1204 for (i
= 0; i
< 128; i
++)
1206 procdata
.bigram1
[i
] = procdata
.original_filename
[i
<< 1];
1207 procdata
.bigram2
[i
] = procdata
.original_filename
[(i
<< 1) + 1];
1209 format_name
= "old";
1211 add_visitor(visit_old_format
, NULL
);
1216 add_visitor(visit_basename
, NULL
);
1218 /* Add an inspector for each pattern we're looking for. */
1219 for ( argn
= 0; argn
< argc
; argn
++ )
1221 results_were_filtered
= true;
1222 pathpart
= argv
[argn
];
1225 struct regular_expression
*p
= xmalloc(sizeof(*p
));
1226 const char *error_message
= NULL
;
1228 memset (&p
->regex
, 0, sizeof (p
->regex
));
1230 re_set_syntax(regex_options
);
1231 p
->regex
.allocated
= 100;
1232 p
->regex
.buffer
= xmalloc (p
->regex
.allocated
);
1233 p
->regex
.fastmap
= NULL
;
1234 p
->regex
.syntax
= regex_options
;
1235 p
->regex
.translate
= NULL
;
1237 error_message
= re_compile_pattern (pathpart
, strlen (pathpart
),
1241 error (1, 0, "%s", error_message
);
1245 add_visitor(visit_regex
, p
);
1248 else if (contains_metacharacter(pathpart
))
1251 add_visitor(visit_globmatch_casefold
, pathpart
);
1253 add_visitor(visit_globmatch_nofold
, pathpart
);
1257 /* No glob characters used. Hence we match on
1258 * _any part_ of the filename, not just the
1259 * basename. This seems odd to me, but it is the
1260 * traditional behaviour.
1261 * James Youngman <jay@gnu.org>
1264 if (1 == MB_CUR_MAX
)
1266 /* As an optimisation, use a strstr() matcher if we are
1267 * in a unibyte locale. This can give a x2 speedup in
1268 * the C locale. Some light testing reveals that
1269 * glibc's strstr() is somewhere around 40% faster than
1270 * gnulib's, so we just use strstr().
1272 matcher
= ignore_case
?
1273 visit_substring_match_casefold_narrow
:
1274 visit_substring_match_nocasefold_narrow
;
1278 matcher
= ignore_case
?
1279 visit_substring_match_casefold_wide
:
1280 visit_substring_match_nocasefold_wide
;
1282 add_visitor(matcher
, pathpart
);
1286 pvis
= lastinspector
;
1288 /* We add visit_existing_*() as late as possible to reduce the
1289 * number of stat() calls.
1291 switch (do_check_existence
)
1293 case ACCEPT_EXISTING
:
1294 results_were_filtered
= true;
1295 if (follow_symlinks
) /* -L, default */
1296 add_visitor(visit_existing_follow
, NULL
);
1298 add_visitor(visit_existing_nofollow
, NULL
);
1301 case ACCEPT_NON_EXISTING
:
1302 results_were_filtered
= true;
1303 if (follow_symlinks
) /* -L, default */
1304 add_visitor(visit_non_existing_follow
, NULL
);
1306 add_visitor(visit_non_existing_nofollow
, NULL
);
1309 case ACCEPT_EITHER
: /* Default, neither -E nor -e */
1310 /* do nothing; no extra processing. */
1314 /* Security issue: The stats visitor must be added immediately
1315 * before the print visitor, because otherwise the -S option would
1316 * leak information about files that the caller cannot see.
1319 add_visitor(visit_stats
, &statistics
);
1323 if (print_quoted_filename
)
1324 add_visitor(visit_justprint_quoted
, NULL
);
1326 add_visitor(visit_justprint_unquoted
, NULL
);
1331 add_visitor(visit_limit
, plimit
);
1333 add_visitor(visit_count
, plimit
);
1338 past_pat_inspector
= pvis
->next
;
1340 mainprocessor
= process_and
;
1342 mainprocessor
= process_or
;
1345 mainprocessor
= process_simple
;
1349 printf(_("Database %s is in the %s format.\n"),
1355 procdata
.c
= getc (procdata
.fp
);
1356 /* If we are searching for filename patterns, the inspector list
1357 * will contain an entry for each pattern for which we are searching.
1359 while ( (procdata
.c
!= EOF
) &&
1360 (VISIT_ABORT
!= (mainprocessor
)(&procdata
)) )
1362 /* Do nothing; all the work is done in the visitor functions. */
1369 int host_little_endian
= i_am_little_endian();
1370 const char *little
= _("The database has little-endian "
1371 "machine-word encoding.\n");
1372 const char *big
= _("The database has big-endian "
1373 "machine-word encoding.\n");
1375 if (GetwordEndianStateNative
== procdata
.endian_state
)
1377 printf("%s", (host_little_endian
? little
: big
));
1379 else if (GetwordEndianStateSwab
== procdata
.endian_state
)
1381 printf("%s", (host_little_endian
? big
: little
));
1385 printf(_("The database machine-word encoding order "
1386 "is not obvious.\n"));
1390 print_stats(argc
, filesize
);
1393 if (ferror (procdata
.fp
))
1395 error (0, errno
, "%s",
1396 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1399 return plimit
->items_accepted
;
1405 extern char *version_string
;
1407 /* The name this program was run with. */
1411 usage (FILE *stream
)
1413 fprintf (stream
, _("\
1414 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1415 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1416 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1417 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1418 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1419 [--max-database-age D] [--version] [--help]\n\
1422 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
1426 REGEXTYPE_OPTION
= CHAR_MAX
+ 1,
1431 static struct option
const longopts
[] =
1433 {"database", required_argument
, NULL
, 'd'},
1434 {"existing", no_argument
, NULL
, 'e'},
1435 {"non-existing", no_argument
, NULL
, 'E'},
1436 {"ignore-case", no_argument
, NULL
, 'i'},
1437 {"all", no_argument
, NULL
, 'A'},
1438 {"help", no_argument
, NULL
, 'h'},
1439 {"version", no_argument
, NULL
, 'v'},
1440 {"null", no_argument
, NULL
, '0'},
1441 {"count", no_argument
, NULL
, 'c'},
1442 {"wholename", no_argument
, NULL
, 'w'},
1443 {"wholepath", no_argument
, NULL
, 'w'}, /* Synonym. */
1444 {"basename", no_argument
, NULL
, 'b'},
1445 {"print", no_argument
, NULL
, 'p'},
1446 {"stdio", no_argument
, NULL
, 's'},
1447 {"mmap", no_argument
, NULL
, 'm'},
1448 {"limit", required_argument
, NULL
, 'l'},
1449 {"regex", no_argument
, NULL
, 'r'},
1450 {"regextype", required_argument
, NULL
, REGEXTYPE_OPTION
},
1451 {"statistics", no_argument
, NULL
, 'S'},
1452 {"follow", no_argument
, NULL
, 'L'},
1453 {"nofollow", no_argument
, NULL
, 'P'},
1454 {"max-database-age", required_argument
, NULL
, MAX_DB_AGE
},
1455 {NULL
, no_argument
, NULL
, 0}
1462 const char * what
= "failed";
1463 const uid_t orig_euid
= geteuid();
1464 const uid_t uid
= getuid();
1465 const gid_t gid
= getgid();
1468 /* Use of setgroups() is restricted to root only. */
1471 /* We're either root or running setuid-root. */
1474 if (0 != setgroups(1u, groups
))
1476 what
= _("failed to drop group privileges");
1482 /* Drop any setuid privileges */
1483 if (uid
!= orig_euid
)
1487 /* We're really root anyway, but are setuid to something else. Leave it. */
1492 if (0 != setuid(getuid()))
1494 what
= _("failed to drop setuid privileges");
1498 /* Defend against the case where the attacker runs us with the
1499 * capability to call setuid() turned off, which on some systems
1500 * will cause the above attempt to drop privileges fail (leaving us
1505 /* Check that we can no longer switch bask to root */
1508 what
= _("Failed to fully drop privileges");
1509 /* The errno value here is not interesting (since
1510 * the system call we are complaining about
1511 * succeeded when we wanted it to fail). Arrange
1512 * for the call to error() not to print the errno
1513 * value by setting errno=0.
1522 /* Drop any setgid privileges */
1524 if (0 != setgid(gid
))
1526 what
= _("failed to drop setgid privileges");
1534 error(1, errno
, "%s",
1535 quotearg_n_style(0, locale_quoting_style
, what
));
1543 /* deliberate infinite loop */
1548 opendb(const char *name
)
1550 int fd
= open(name
, O_RDONLY
1551 #if defined O_LARGEFILE
1557 /* Make sure it won't survive an exec */
1558 if (0 != fcntl(fd
, F_SETFD
, FD_CLOEXEC
))
1568 dolocate (int argc
, char **argv
, int secure_db_fd
)
1571 unsigned long int found
= 0uL;
1573 int ignore_case
= 0;
1576 int basename_only
= 0;
1579 int regex_options
= RE_SYNTAX_EMACS
;
1584 int they_chose_db
= 0;
1585 bool did_stdin
= false; /* Set to prevent rereading stdin. */
1587 program_name
= argv
[0];
1589 #ifdef HAVE_SETLOCALE
1590 setlocale (LC_ALL
, "");
1592 bindtextdomain (PACKAGE
, LOCALEDIR
);
1593 textdomain (PACKAGE
);
1594 atexit (close_stdout
);
1597 limits
.items_accepted
= 0;
1599 quote_opts
= clone_quoting_options (NULL
);
1600 print_quoted_filename
= true;
1602 /* We cannot simultaneously trust $LOCATE_PATH and use the
1603 * setuid-access-controlled database,, since that could cause a leak
1606 dbpath
= getenv ("LOCATE_PATH");
1612 check_existence
= ACCEPT_EITHER
;
1614 while ((optc
= getopt_long (argc
, argv
, "Abcd:eEil:prsm0SwHPL", longopts
, (int *) 0)) != -1)
1619 print_quoted_filename
= false; /* print filename 'raw'. */
1640 check_existence
= ACCEPT_EXISTING
;
1644 check_existence
= ACCEPT_NON_EXISTING
;
1656 /* XXX: nothing in the test suite for this option. */
1657 set_max_db_age(optarg
);
1665 printf (_("GNU locate version %s\n"), version_string
);
1666 printf (_("Built using GNU gnulib version %s\n"), gnulib_version
);
1677 case REGEXTYPE_OPTION
:
1678 regex_options
= get_regex_type(optarg
);
1686 follow_symlinks
= 1;
1689 /* In find, -P and -H differ in the way they handle paths
1690 * given on the command line. This is not relevant for
1691 * locate, but the -H option is supported because it is
1692 * probably more intuitive to do so.
1696 follow_symlinks
= 0;
1702 strtol_error err
= xstrtoumax(optarg
, &end
, 10, &limits
.limit
, NULL
);
1703 if (LONGINT_OK
!= err
)
1705 STRTOL_FATAL_ERROR(optarg
, _("argument to --limit"), err
);
1711 case 's': /* use stdio */
1712 case 'm': /* use mmap */
1713 /* These options are implemented simply for
1714 * compatibility with FreeBSD
1724 /* If the user gave the -d option or set LOCATE_PATH,
1725 * relinquish access to the secure database.
1729 if (secure_db_fd
>= 0)
1731 close(secure_db_fd
);
1736 if (!just_count
&& !stats
)
1746 if (!just_count
&& optind
== argc
)
1754 if (1 == isatty(STDOUT_FILENO
))
1755 stdout_is_a_tty
= true;
1757 stdout_is_a_tty
= false;
1760 next_element (dbpath
, 0); /* Initialize. */
1762 /* Bail out early if limit already reached. */
1763 while (!use_limit
|| limits
.limit
> limits
.items_accepted
)
1769 statistics
.compressed_bytes
=
1770 statistics
.total_filename_count
=
1771 statistics
.total_filename_length
=
1772 statistics
.whitespace_count
=
1773 statistics
.newline_count
=
1774 statistics
.highbit_filename_count
= 0u;
1778 /* Take the next element from the list of databases */
1779 e
= next_element ((char *) NULL
, 0);
1783 if (0 == strcmp (e
, "-"))
1788 _("warning: the locate database can only be read from stdin once."));
1800 if (0 == strlen(e
) || 0 == strcmp(e
, "."))
1805 /* open the database */
1809 error (0, errno
, "%s",
1810 quotearg_n_style(0, locale_quoting_style
, e
));
1817 if (-1 == secure_db_fd
)
1819 /* Already searched the database, it's time to exit the loop */
1824 e
= selected_secure_db
;
1830 /* Check the database to see if it is old. */
1833 error (0, errno
, "%s",
1834 quotearg_n_style(0, locale_quoting_style
, e
));
1835 /* continue anyway */
1836 filesize
= (off_t
)0;
1842 filesize
= st
.st_size
;
1844 if ((time_t)-1 == time(&now
))
1846 /* If we can't tell the time, we don't know how old the
1847 * database is. But since the message is just advisory,
1848 * we continue anyway.
1850 error (0, errno
, _("time system call failed"));
1854 double age
= difftime(now
, st
.st_mtime
);
1855 double warn_seconds
= SECONDS_PER_UNIT
* warn_number_units
;
1856 if (age
> warn_seconds
)
1859 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1861 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1862 quotearg_n_style(0, locale_quoting_style
, e
),
1863 warn_number_units
, _(warn_name_units
),
1864 (age
/(double)SECONDS_PER_UNIT
), _(warn_name_units
));
1869 fp
= fdopen(fd
, "r");
1872 error (0, errno
, "%s",
1873 quotearg_n_style(0, locale_quoting_style
, e
));
1877 /* Search this database for all patterns simultaneously */
1878 found
= search_one_database (argc
- optind
, &argv
[optind
],
1880 ignore_case
, print
, basename_only
,
1881 use_limit
, &limits
, stats
,
1882 op_and
, regex
, regex_options
);
1884 /* Close the databsase (even if it is stdin) */
1885 if (fclose (fp
) == EOF
)
1887 error (0, errno
, "%s",
1888 quotearg_n_style(0, locale_quoting_style
, e
));
1895 printf("%ld\n", found
);
1898 if (found
|| (use_limit
&& (limits
.limit
==0)) || stats
)
1904 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1906 open_secure_db(void)
1910 const char * secure_db_list
[] =
1913 "/var/lib/slocate/slocate.db",
1916 for (i
=0; secure_db_list
[i
]; ++i
)
1918 fd
= opendb(secure_db_list
[i
]);
1921 selected_secure_db
= secure_db_list
[i
];
1929 main (int argc
, char **argv
)
1931 int dbfd
= open_secure_db();
1934 return dolocate(argc
, argv
, dbfd
);