1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 /* Usage: locate [options] pattern...
21 Scan a pathname list for the full pathname of a file, given only
22 a piece of the name (possibly containing shell globbing metacharacters).
23 The list has been processed with front-compression, which reduces
24 the list size by a factor of 4-5.
25 Recognizes two database formats, old and new. The old format is
26 bigram coded, which reduces space by a further 20-25% and uses the
27 following encoding of the database bytes:
29 0-28 likeliest differential counts + offset (14) to make nonnegative
30 30 escape code for out-of-range count to follow in next halfword
31 128-255 bigram codes (the 128 most common, as determined by `updatedb')
32 32-127 single character (printable) ASCII remainder
34 Earlier versions of GNU locate used to use a novel two-tiered
35 string search technique, which was described in Usenix ;login:, Vol
36 8, No 1, February/March, 1983, p. 8.
38 However, latterly code changes to provide additional functionality
39 became dificult to make with the existing reading scheme, and so
40 we no longer perform the matching as efficiently as we used to (that is,
41 we no longer use the same algorithm).
43 The old algorithm was:
45 First, match a metacharacter-free subpattern and a partial
46 pathname BACKWARDS to avoid full expansion of the pathname list.
47 The time savings is 40-50% over forward matching, which cannot
48 efficiently handle overlapped search patterns and compressed
51 Then, match the actual shell glob pattern (if in this form)
52 against the candidate pathnames using the slower shell filename
56 Written by James A. Woods <jwoods@adobe.com>.
57 Modified by David MacKenzie <djm@gnu.org>.
58 Additional work by James Youngman and Bas van Gompel.
66 #include <sys/types.h>
67 #include <grp.h> /* for setgroups() */
74 #include <stdbool.h> /* for bool/boolean */
76 /* The presence of unistd.h is assumed by gnulib these days, so we
77 * might as well assume it too.
79 /* We need <unistd.h> for isatty(). */
100 # include <libintl.h>
101 # define _(Text) gettext (Text)
103 # define _(Text) Text
104 #define textdomain(Domain)
105 #define bindtextdomain(Package, Directory)
108 # define N_(String) gettext_noop (String)
110 /* We used to use (String) instead of just String, but apparently ISO C
111 * doesn't allow this (at least, that's what HP said when someone reported
112 * this as a compiler bug). This is HP case number 1205608192. See
113 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
114 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
115 * like: static const char buf[] = ("string");
117 # define N_(String) String
120 #include "locatedb.h"
125 #include "closeout.h"
126 #include "nextelem.h"
129 #include "quotearg.h"
130 #include "printquoted.h"
131 #include "regextype.h"
132 #include "findutils-version.h"
134 /* Note that this evaluates Ch many times. */
136 # define TOUPPER(Ch) toupper (Ch)
137 # define TOLOWER(Ch) tolower (Ch)
139 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
140 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
143 /* typedef enum {false, true} boolean; */
145 /* Warn if a database is older than this. 8 days allows for a weekly
146 update that takes up to a day to perform. */
147 static unsigned int warn_number_units
= 8;
149 /* Printable name of units used in WARN_SECONDS */
150 static const char warn_name_units
[] = N_("days");
151 #define SECONDS_PER_UNIT (60 * 60 * 24)
155 VISIT_CONTINUE
= 1, /* please call the next visitor */
156 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
157 VISIT_REJECTED
= 4, /* rejected, process next file. */
158 VISIT_ABORT
= 8 /* rejected, process no more files. */
161 enum ExistenceCheckType
163 ACCEPT_EITHER
, /* Corresponds to lack of -E/-e option */
164 ACCEPT_EXISTING
, /* Corresponds to option -e */
165 ACCEPT_NON_EXISTING
/* Corresponds to option -E */
168 /* Check for existence of files before printing them out? */
169 enum ExistenceCheckType check_existence
= ACCEPT_EITHER
;
171 static int follow_symlinks
= 1;
173 /* What to separate the results with. */
174 static int separator
= '\n';
176 static struct quoting_options
* quote_opts
= NULL
;
177 static bool stdout_is_a_tty
;
178 static bool print_quoted_filename
;
179 static bool results_were_filtered
;
181 static const char *selected_secure_db
= NULL
;
184 /* Change the number of days old the database can be
185 * before we complain about it.
188 set_max_db_age(const char *s
)
191 unsigned long int val
;
192 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
197 _("The argument for option --max-database-age must not be empty"));
201 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
202 * we would not be able to tell if that is the correct answer, or whether it
203 * signifies an error.
206 val
= strtoul(s
, &end
, 10);
208 /* Diagnose number too large, non-numbes and trailing junk. */
209 if ((ULONG_MAX
== val
&& ERANGE
== errno
) ||
210 (0 == val
&& EINVAL
== errno
))
213 _("Invalid argument %s for option --max-database-age"),
214 quotearg_n_style(0, locale_quoting_style
, s
));
218 /* errno wasn't set, don't print its message */
220 _("Invalid argument %s for option --max-database-age"),
221 quotearg_n_style(0, locale_quoting_style
, s
));
225 warn_number_units
= val
;
231 /* Read in a 16-bit int, high byte first (network byte order). */
239 x
= (signed char) fgetc (fp
) << 8;
240 x
|= (fgetc (fp
) & 0xff);
244 const char * const metacharacters
= "*?[]\\";
246 /* Return nonzero if S contains any shell glob characters.
249 contains_metacharacter(const char *s
)
251 if (NULL
== strpbrk(s
, metacharacters
))
259 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
260 * until we reach DELIMITER or end-of-file. We reallocate the buffer
261 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
262 * is made regarding the content of the data (i.e. the implementation is
263 * 8-bit clean, the only delimiter is DELIMITER).
265 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
266 * has been removed from gnulib.
268 * We call the function locate_read_str() to avoid a name clash with the curses
272 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
279 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
284 needed
= offs
+ nread
+ 1u;
287 char *pnew
= realloc(*buf
, needed
);
290 return -1; /* FAIL */
298 memcpy((*buf
)+offs
, p
, nread
);
308 uintmax_t items_accepted
;
310 static struct locate_limits limits
;
315 uintmax_t compressed_bytes
;
316 uintmax_t total_filename_count
;
317 uintmax_t total_filename_length
;
318 uintmax_t whitespace_count
;
319 uintmax_t newline_count
;
320 uintmax_t highbit_filename_count
;
322 static struct locate_stats statistics
;
325 struct regular_expression
327 struct re_pattern_buffer regex
; /* for --regex */
333 int c
; /* An input byte. */
334 char itemcount
; /* Indicates we're at the beginning of an slocate db. */
335 int count
; /* The length of the prefix shared with the previous database entry. */
337 char *original_filename
; /* The current input database entry. */
338 size_t pathsize
; /* Amount allocated for it. */
339 char *munged_filename
; /* path or basename(path) */
340 FILE *fp
; /* The pathname database. */
341 const char *dbfile
; /* Its name, or "<stdin>" */
342 int slocatedb_format
; /* Allows us to cope with slocate's format variant */
343 GetwordEndianState endian_state
;
344 /* for the old database format,
345 the first and second characters of the most common bigrams. */
351 typedef int (*visitfunc
)(struct process_data
*procdata
,
358 struct visitor
*next
;
362 static struct visitor
*inspectors
= NULL
;
363 static struct visitor
*lastinspector
= NULL
;
364 static struct visitor
*past_pat_inspector
= NULL
;
366 static inline int visit(const struct visitor
*p
,
368 struct process_data
*procdata
,
369 const struct visitor
* const stop
)
371 register int result
= accept_flags
;
372 while ( (accept_flags
& result
) && (stop
!= p
) )
374 result
= (p
->inspector
)(procdata
, p
->context
);
380 /* 0 or 1 pattern(s) */
382 process_simple(struct process_data
*procdata
)
384 return visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, NULL
);
387 /* Accept if any pattern matches. */
389 process_or (struct process_data
*procdata
)
393 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_REJECTED
), procdata
, past_pat_inspector
);
394 if (result
== VISIT_CONTINUE
)
395 result
= VISIT_REJECTED
;
396 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
399 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
400 if (VISIT_CONTINUE
== result
)
401 return VISIT_ACCEPTED
;
406 /* Accept if all pattern match. */
408 process_and (struct process_data
*procdata
)
412 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, past_pat_inspector
);
413 if (result
== VISIT_CONTINUE
)
414 result
= VISIT_REJECTED
;
415 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
418 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
419 if (VISIT_CONTINUE
== result
)
420 return VISIT_ACCEPTED
;
425 typedef int (*processfunc
)(struct process_data
*procdata
);
427 static processfunc mainprocessor
= NULL
;
430 add_visitor(visitfunc fn
, void *context
)
432 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
434 p
->context
= context
;
437 if (NULL
== lastinspector
)
439 lastinspector
= inspectors
= p
;
443 lastinspector
->next
= p
;
449 visit_justprint_quoted(struct process_data
*procdata
, void *context
)
452 print_quoted (stdout
, quote_opts
, stdout_is_a_tty
,
454 procdata
->original_filename
);
456 return VISIT_CONTINUE
;
460 visit_justprint_unquoted(struct process_data
*procdata
, void *context
)
463 fputs(procdata
->original_filename
, stdout
);
465 return VISIT_CONTINUE
;
469 toolong (struct process_data
*procdata
)
472 _("locate database %s contains a "
473 "filename longer than locate can handle"),
478 extend (struct process_data
*procdata
, size_t siz1
, size_t siz2
)
480 /* Figure out if the addition operation is safe before performing it. */
481 if (SIZE_MAX
- siz1
< siz2
)
485 else if (procdata
->pathsize
< (siz1
+siz2
))
487 procdata
->pathsize
= siz1
+siz2
;
488 procdata
->original_filename
= x2nrealloc (procdata
->original_filename
,
495 visit_old_format(struct process_data
*procdata
, void *context
)
500 if (EOF
== procdata
->c
)
503 /* Get the offset in the path where this path info starts. */
504 if (procdata
->c
== LOCATEDB_OLD_ESCAPE
)
509 procdata
->count
-= LOCATEDB_OLD_OFFSET
;
510 minval
= (0 - procdata
->count
);
511 if (procdata
->count
>= 0)
512 maxval
= (procdata
->len
- procdata
->count
);
514 maxval
= (procdata
->len
- 0);
515 word
= getword(procdata
->fp
, procdata
->dbfile
,
516 minval
, maxval
, &procdata
->endian_state
);
517 procdata
->count
+= word
;
518 assert(procdata
->count
>= 0);
522 procdata
->count
+= (procdata
->c
- LOCATEDB_OLD_OFFSET
);
523 assert(procdata
->count
>= 0);
526 /* Overlay the old path with the remainder of the new. Read
527 * more data until we get to the next filename.
529 for (i
=procdata
->count
;
530 (procdata
->c
= getc (procdata
->fp
)) > LOCATEDB_OLD_ESCAPE
;)
532 if (EOF
== procdata
->c
)
535 if (procdata
->c
< 0200)
537 /* An ordinary character. */
538 extend (procdata
, i
, 1u);
539 procdata
->original_filename
[i
++] = procdata
->c
;
543 /* Bigram markers have the high bit set. */
544 extend (procdata
, i
, 2u);
546 procdata
->original_filename
[i
++] = procdata
->bigram1
[procdata
->c
];
547 procdata
->original_filename
[i
++] = procdata
->bigram2
[procdata
->c
];
551 /* Consider the case where we executed the loop body zero times; we
552 * still need space for the terminating null byte.
554 extend (procdata
, i
, 1u);
555 procdata
->original_filename
[i
] = 0;
557 procdata
->munged_filename
= procdata
->original_filename
;
559 return VISIT_CONTINUE
;
563 visit_locate02_format(struct process_data
*procdata
, void *context
)
569 if (procdata
->slocatedb_format
)
571 if (procdata
->itemcount
== 0)
573 ungetc(procdata
->c
, procdata
->fp
);
577 else if (procdata
->itemcount
== 1)
579 procdata
->count
= procdata
->len
-1;
583 if (procdata
->c
== LOCATEDB_ESCAPE
)
584 procdata
->count
+= (short)get_short (procdata
->fp
);
585 else if (procdata
->c
> 127)
586 procdata
->count
+= procdata
->c
- 256;
588 procdata
->count
+= procdata
->c
;
593 if (procdata
->c
== LOCATEDB_ESCAPE
)
594 procdata
->count
+= (short)get_short (procdata
->fp
);
595 else if (procdata
->c
> 127)
596 procdata
->count
+= procdata
->c
- 256;
598 procdata
->count
+= procdata
->c
;
601 if (procdata
->count
> procdata
->len
|| procdata
->count
< 0)
603 /* This should not happen generally , but since we're
604 * reading in data which is outside our control, we
607 error(1, 0, _("locate database %s is corrupt or invalid"),
608 quotearg_n_style(0, locale_quoting_style
, procdata
->dbfile
));
611 /* Overlay the old path with the remainder of the new. */
612 nread
= locate_read_str (&procdata
->original_filename
,
614 procdata
->fp
, 0, procdata
->count
);
617 procdata
->c
= getc (procdata
->fp
);
618 procdata
->len
= procdata
->count
+ nread
;
619 s
= procdata
->original_filename
+ procdata
->len
- 1; /* Move to the last char in path. */
620 assert (s
[0] != '\0');
621 assert (s
[1] == '\0'); /* Our terminator. */
622 assert (s
[2] == '\0'); /* Added by locate_read_str. */
624 procdata
->munged_filename
= procdata
->original_filename
;
626 if (procdata
->slocatedb_format
)
628 /* Don't increment indefinitely, it might overflow. */
629 if (procdata
->itemcount
< 6)
631 ++(procdata
->itemcount
);
636 return VISIT_CONTINUE
;
640 visit_basename(struct process_data
*procdata
, void *context
)
643 procdata
->munged_filename
= last_component (procdata
->original_filename
);
645 return VISIT_CONTINUE
;
649 /* visit_existing_follow implements -L -e */
651 visit_existing_follow(struct process_data
*procdata
, void *context
)
656 /* munged_filename has been converted in some way (to lower case,
657 * or is just the base name of the file), and original_filename has not.
658 * Hence only original_filename is still actually the name of the file
659 * whose existence we would need to check.
661 if (stat(procdata
->original_filename
, &st
) != 0)
663 return VISIT_REJECTED
;
667 return VISIT_CONTINUE
;
671 /* visit_non_existing_follow implements -L -E */
673 visit_non_existing_follow(struct process_data
*procdata
, void *context
)
678 /* munged_filename has been converted in some way (to lower case,
679 * or is just the base name of the file), and original_filename has not.
680 * Hence only original_filename is still actually the name of the file
681 * whose existence we would need to check.
683 if (stat(procdata
->original_filename
, &st
) == 0)
685 return VISIT_REJECTED
;
689 return VISIT_CONTINUE
;
693 /* visit_existing_nofollow implements -P -e */
695 visit_existing_nofollow(struct process_data
*procdata
, void *context
)
700 /* munged_filename has been converted in some way (to lower case,
701 * or is just the base name of the file), and original_filename has not.
702 * Hence only original_filename is still actually the name of the file
703 * whose existence we would need to check.
705 if (lstat(procdata
->original_filename
, &st
) != 0)
707 return VISIT_REJECTED
;
711 return VISIT_CONTINUE
;
715 /* visit_non_existing_nofollow implements -P -E */
717 visit_non_existing_nofollow(struct process_data
*procdata
, void *context
)
722 /* munged_filename has been converted in some way (to lower case,
723 * or is just the base name of the file), and original_filename has not.
724 * Hence only original_filename is still actually the name of the file
725 * whose existence we would need to check.
727 if (lstat(procdata
->original_filename
, &st
) == 0)
729 return VISIT_REJECTED
;
733 return VISIT_CONTINUE
;
738 visit_substring_match_nocasefold_wide(struct process_data
*procdata
, void *context
)
740 const char *pattern
= context
;
742 if (NULL
!= mbsstr(procdata
->munged_filename
, pattern
))
743 return VISIT_ACCEPTED
;
745 return VISIT_REJECTED
;
749 visit_substring_match_nocasefold_narrow(struct process_data
*procdata
, void *context
)
751 const char *pattern
= context
;
752 assert(MB_CUR_MAX
== 1);
753 if (NULL
!= strstr(procdata
->munged_filename
, pattern
))
754 return VISIT_ACCEPTED
;
756 return VISIT_REJECTED
;
760 visit_substring_match_casefold_wide(struct process_data
*procdata
, void *context
)
762 const char *pattern
= context
;
764 if (NULL
!= mbscasestr(procdata
->munged_filename
, pattern
))
765 return VISIT_ACCEPTED
;
767 return VISIT_REJECTED
;
772 visit_substring_match_casefold_narrow(struct process_data
*procdata
, void *context
)
774 const char *pattern
= context
;
776 assert(MB_CUR_MAX
== 1);
777 if (NULL
!= strcasestr(procdata
->munged_filename
, pattern
))
778 return VISIT_ACCEPTED
;
780 return VISIT_REJECTED
;
785 visit_globmatch_nofold(struct process_data
*procdata
, void *context
)
787 const char *glob
= context
;
788 if (fnmatch(glob
, procdata
->munged_filename
, 0) != 0)
789 return VISIT_REJECTED
;
791 return VISIT_ACCEPTED
;
796 visit_globmatch_casefold(struct process_data
*procdata
, void *context
)
798 const char *glob
= context
;
799 if (fnmatch(glob
, procdata
->munged_filename
, FNM_CASEFOLD
) != 0)
800 return VISIT_REJECTED
;
802 return VISIT_ACCEPTED
;
807 visit_regex(struct process_data
*procdata
, void *context
)
809 struct regular_expression
*p
= context
;
810 const size_t len
= strlen(procdata
->munged_filename
);
812 int rv
= re_search (&p
->regex
, procdata
->munged_filename
,
814 (struct re_registers
*) NULL
);
817 return VISIT_REJECTED
; /* no match (-1), or internal error (-2) */
821 return VISIT_ACCEPTED
; /* match */
827 visit_stats(struct process_data
*procdata
, void *context
)
829 struct locate_stats
*p
= context
;
830 size_t len
= strlen(procdata
->original_filename
);
832 int highbit
, whitespace
, newline
;
834 ++(p
->total_filename_count
);
835 p
->total_filename_length
+= len
;
837 highbit
= whitespace
= newline
= 0;
838 for (s
=procdata
->original_filename
; *s
; ++s
)
840 if ( (int)(*s
) & 128 )
844 newline
= whitespace
= 1;
846 else if (isspace((unsigned char)*s
))
853 ++(p
->highbit_filename_count
);
855 ++(p
->whitespace_count
);
857 ++(p
->newline_count
);
859 return VISIT_CONTINUE
;
864 visit_limit(struct process_data
*procdata
, void *context
)
866 struct locate_limits
*p
= context
;
870 if (++p
->items_accepted
>= p
->limit
)
873 return VISIT_CONTINUE
;
877 visit_count(struct process_data
*procdata
, void *context
)
879 struct locate_limits
*p
= context
;
884 return VISIT_CONTINUE
;
887 /* Emit the statistics.
890 print_stats(int argc
, size_t database_file_size
)
892 char hbuf
[LONGEST_HUMAN_READABLE
+ 1];
894 printf(_("Locate database size: %s bytes\n"),
895 human_readable ((uintmax_t) database_file_size
,
896 hbuf
, human_ceiling
, 1, 1));
898 printf( (results_were_filtered
?
899 _("Matching Filenames: %s ") :
900 _("All Filenames: %s ")),
901 human_readable (statistics
.total_filename_count
,
902 hbuf
, human_ceiling
, 1, 1));
903 printf(_("with a cumulative length of %s bytes"),
904 human_readable (statistics
.total_filename_length
,
905 hbuf
, human_ceiling
, 1, 1));
907 printf(_("\n\tof which %s contain whitespace, "),
908 human_readable (statistics
.whitespace_count
,
909 hbuf
, human_ceiling
, 1, 1));
910 printf(_("\n\t%s contain newline characters, "),
911 human_readable (statistics
.newline_count
,
912 hbuf
, human_ceiling
, 1, 1));
913 printf(_("\n\tand %s contain characters with the high bit set.\n"),
914 human_readable (statistics
.highbit_filename_count
,
915 hbuf
, human_ceiling
, 1, 1));
919 if (results_were_filtered
)
921 printf(_("Some filenames may have been filtered out, "
922 "so we cannot compute the compression ratio.\n"));
926 if (statistics
.total_filename_length
)
928 /* A negative compression ratio just means that the
929 * compressed database is larger than the list of
930 * filenames. This can happen for example for
931 * old-format databases containing a small list of short
932 * filenames, because the bigram list is 256 bytes.
934 printf(_("Compression ratio %4.2f%% (higher is better)\n"),
935 100.0 * ((double)statistics
.total_filename_length
936 - (double) database_file_size
)
937 / (double) statistics
.total_filename_length
);
941 printf(_("Compression ratio is undefined\n"));
949 * Return nonzero if the data we read in indicates that we are
950 * looking at a LOCATE02 locate database.
953 looking_at_gnu_locatedb (const char *data
, size_t len
)
955 if (len
< sizeof (LOCATEDB_MAGIC
))
957 else if (0 == memcmp (data
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
958 return 1; /* We saw the magic byte sequence */
964 * Return nonzero if the data we read in indicates that we are
965 * looking at an slocate database.
968 looking_at_slocate_locatedb (const char *filename
,
981 /* Check that the magic number is a one-byte string */
984 if (isdigit((unsigned char)data
[0]))
986 /* looks promising. */
987 *seclevel
= (data
[0] - '0');
991 /* Hmm, well it's probably an slocate database
992 * of some awsomely huge security level, like 2.
993 * We don't know how to handle those.
996 _("locate database %s looks like an slocate "
997 "database but it seems to have security level %c, "
998 "which GNU findutils does not currently support"),
999 quotearg_n_style(0, locale_quoting_style
, filename
),
1016 /* Definitely not slocate. */
1024 i_am_little_endian(void)
1028 unsigned char uch
[4];
1033 u
.uch
[1] = u
.uch
[2] = u
.uch
[3] = 0;
1040 /* Print or count the entries in DBFILE that match shell globbing patterns in
1041 ARGV. Return the number of entries matched. */
1043 static unsigned long
1044 search_one_database (int argc
,
1053 struct locate_limits
*plimit
,
1059 char *pathpart
; /* A pattern to consider. */
1060 int argn
; /* Index to current pattern in argv. */
1061 int nread
; /* number of bytes read from an entry. */
1062 struct process_data procdata
; /* Storage for data shared with visitors. */
1063 int slocate_seclevel
;
1065 struct visitor
* pvis
; /* temp for determining past_pat_inspector. */
1066 const char *format_name
;
1067 enum ExistenceCheckType do_check_existence
;
1070 /* We may turn on existence checking for a given database.
1071 * We ensure that we can return to the previous behaviour
1072 * by using two variables, do_check_existence (which we act on)
1073 * and check_existence (whcih indicates the default before we
1074 * adjust it on the bassis of what kind of database we;re using
1076 do_check_existence
= check_existence
;
1080 regex_options
|= RE_ICASE
;
1083 procdata
.endian_state
= GetwordEndianStateInitial
;
1084 procdata
.len
= procdata
.count
= 0;
1085 procdata
.slocatedb_format
= 0;
1086 procdata
.itemcount
= 0;
1088 procdata
.dbfile
= dbfile
;
1091 /* Set up the inspection regime */
1093 lastinspector
= NULL
;
1094 past_pat_inspector
= NULL
;
1095 results_were_filtered
= false;
1097 procdata
.pathsize
= 1026; /* Increased as necessary by locate_read_str. */
1099 procdata
.pathsize
= 128; /* Increased as necessary by locate_read_str. */
1101 procdata
.original_filename
= xmalloc (procdata
.pathsize
);
1104 nread
= fread (procdata
.original_filename
, 1, SLOCATE_DB_MAGIC_LEN
,
1106 slocate_seclevel
= 0;
1107 if (looking_at_slocate_locatedb(procdata
.dbfile
,
1108 procdata
.original_filename
,
1113 _("%s is an slocate database. "
1114 "Support for these is new, expect problems for now."),
1115 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1117 /* slocate also uses frcode, but with a different header.
1118 * We handle the header here and then work with the data
1119 * in the normal way.
1121 if (slocate_seclevel
> 1)
1123 /* We don't know what those security levels mean,
1124 * so do nothing further
1127 _("%s is an slocate database of unsupported security level %d; skipping it."),
1128 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
),
1132 else if (slocate_seclevel
> 0)
1134 /* Don't show the filenames to the user if they don't exist.
1135 * Showing stats is safe since filenames are only counted
1136 * after the existence check
1138 if (ACCEPT_NON_EXISTING
== check_existence
)
1140 /* Do not allow the user to see a list of filenames that they
1144 _("You specified the -E option, but that option "
1145 "cannot be used with slocate-format databases "
1146 "with a non-zero security level. No results will be "
1147 "generated for this database.\n"));
1150 if (ACCEPT_EXISTING
!= do_check_existence
)
1152 if (enable_print
|| stats
)
1155 _("%s is an slocate database. "
1156 "Turning on the '-e' option."),
1157 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1159 do_check_existence
= ACCEPT_EXISTING
;
1162 add_visitor(visit_locate02_format
, NULL
);
1163 format_name
= "slocate";
1164 procdata
.slocatedb_format
= 1;
1170 procdata
.slocatedb_format
= 0;
1171 extend (&procdata
, sizeof(LOCATEDB_MAGIC
), 0u);
1172 nread2
= fread (procdata
.original_filename
+nread
, 1, sizeof (LOCATEDB_MAGIC
)-nread
,
1174 if (looking_at_gnu_locatedb(procdata
.original_filename
, nread
+nread2
))
1176 add_visitor(visit_locate02_format
, NULL
);
1177 format_name
= "GNU LOCATE02";
1179 else /* Use the old format */
1184 extend (&procdata
, 256u, 0u);
1185 /* Read the list of the most common bigrams in the database. */
1188 int more_read
= fread (procdata
.original_filename
+ nread
, 1,
1189 256 - nread
, procdata
.fp
);
1190 if ( (more_read
+ nread
) != 256 )
1193 _("Old-format locate database %s is "
1194 "too short to be valid"),
1195 quotearg_n_style(0, locale_quoting_style
, dbfile
));
1200 for (i
= 0; i
< 128; i
++)
1202 procdata
.bigram1
[i
] = procdata
.original_filename
[i
<< 1];
1203 procdata
.bigram2
[i
] = procdata
.original_filename
[(i
<< 1) + 1];
1205 format_name
= "old";
1207 add_visitor(visit_old_format
, NULL
);
1212 add_visitor(visit_basename
, NULL
);
1214 /* Add an inspector for each pattern we're looking for. */
1215 for ( argn
= 0; argn
< argc
; argn
++ )
1217 results_were_filtered
= true;
1218 pathpart
= argv
[argn
];
1221 struct regular_expression
*p
= xmalloc(sizeof(*p
));
1222 const char *error_message
= NULL
;
1224 memset (&p
->regex
, 0, sizeof (p
->regex
));
1226 re_set_syntax(regex_options
);
1227 p
->regex
.allocated
= 100;
1228 p
->regex
.buffer
= xmalloc (p
->regex
.allocated
);
1229 p
->regex
.fastmap
= NULL
;
1230 p
->regex
.syntax
= regex_options
;
1231 p
->regex
.translate
= NULL
;
1233 error_message
= re_compile_pattern (pathpart
, strlen (pathpart
),
1237 error (1, 0, "%s", error_message
);
1241 add_visitor(visit_regex
, p
);
1244 else if (contains_metacharacter(pathpart
))
1247 add_visitor(visit_globmatch_casefold
, pathpart
);
1249 add_visitor(visit_globmatch_nofold
, pathpart
);
1253 /* No glob characters used. Hence we match on
1254 * _any part_ of the filename, not just the
1255 * basename. This seems odd to me, but it is the
1256 * traditional behaviour.
1257 * James Youngman <jay@gnu.org>
1260 if (1 == MB_CUR_MAX
)
1262 /* As an optimisation, use a strstr() matcher if we are
1263 * in a unibyte locale. This can give a x2 speedup in
1264 * the C locale. Some light testing reveals that
1265 * glibc's strstr() is somewhere around 40% faster than
1266 * gnulib's, so we just use strstr().
1268 matcher
= ignore_case
?
1269 visit_substring_match_casefold_narrow
:
1270 visit_substring_match_nocasefold_narrow
;
1274 matcher
= ignore_case
?
1275 visit_substring_match_casefold_wide
:
1276 visit_substring_match_nocasefold_wide
;
1278 add_visitor(matcher
, pathpart
);
1282 pvis
= lastinspector
;
1284 /* We add visit_existing_*() as late as possible to reduce the
1285 * number of stat() calls.
1287 switch (do_check_existence
)
1289 case ACCEPT_EXISTING
:
1290 results_were_filtered
= true;
1291 if (follow_symlinks
) /* -L, default */
1292 add_visitor(visit_existing_follow
, NULL
);
1294 add_visitor(visit_existing_nofollow
, NULL
);
1297 case ACCEPT_NON_EXISTING
:
1298 results_were_filtered
= true;
1299 if (follow_symlinks
) /* -L, default */
1300 add_visitor(visit_non_existing_follow
, NULL
);
1302 add_visitor(visit_non_existing_nofollow
, NULL
);
1305 case ACCEPT_EITHER
: /* Default, neither -E nor -e */
1306 /* do nothing; no extra processing. */
1310 /* Security issue: The stats visitor must be added immediately
1311 * before the print visitor, because otherwise the -S option would
1312 * leak information about files that the caller cannot see.
1315 add_visitor(visit_stats
, &statistics
);
1319 if (print_quoted_filename
)
1320 add_visitor(visit_justprint_quoted
, NULL
);
1322 add_visitor(visit_justprint_unquoted
, NULL
);
1327 add_visitor(visit_limit
, plimit
);
1329 add_visitor(visit_count
, plimit
);
1334 past_pat_inspector
= pvis
->next
;
1336 mainprocessor
= process_and
;
1338 mainprocessor
= process_or
;
1341 mainprocessor
= process_simple
;
1345 printf(_("Database %s is in the %s format.\n"),
1351 procdata
.c
= getc (procdata
.fp
);
1352 /* If we are searching for filename patterns, the inspector list
1353 * will contain an entry for each pattern for which we are searching.
1355 while ( (procdata
.c
!= EOF
) &&
1356 (VISIT_ABORT
!= (mainprocessor
)(&procdata
)) )
1358 /* Do nothing; all the work is done in the visitor functions. */
1365 int host_little_endian
= i_am_little_endian();
1366 const char *little
= _("The database has little-endian "
1367 "machine-word encoding.\n");
1368 const char *big
= _("The database has big-endian "
1369 "machine-word encoding.\n");
1371 if (GetwordEndianStateNative
== procdata
.endian_state
)
1373 printf("%s", (host_little_endian
? little
: big
));
1375 else if (GetwordEndianStateSwab
== procdata
.endian_state
)
1377 printf("%s", (host_little_endian
? big
: little
));
1381 printf(_("The database machine-word encoding order "
1382 "is not obvious.\n"));
1386 print_stats(argc
, filesize
);
1389 if (ferror (procdata
.fp
))
1391 error (0, errno
, "%s",
1392 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1395 return plimit
->items_accepted
;
1401 extern char *version_string
;
1403 /* The name this program was run with. */
1407 usage (FILE *stream
)
1409 fprintf (stream
, _("\
1410 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1411 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1412 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1413 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1414 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1415 [--max-database-age D] [--version] [--help]\n\
1418 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
1422 REGEXTYPE_OPTION
= CHAR_MAX
+ 1,
1427 static struct option
const longopts
[] =
1429 {"database", required_argument
, NULL
, 'd'},
1430 {"existing", no_argument
, NULL
, 'e'},
1431 {"non-existing", no_argument
, NULL
, 'E'},
1432 {"ignore-case", no_argument
, NULL
, 'i'},
1433 {"all", no_argument
, NULL
, 'A'},
1434 {"help", no_argument
, NULL
, 'h'},
1435 {"version", no_argument
, NULL
, 'v'},
1436 {"null", no_argument
, NULL
, '0'},
1437 {"count", no_argument
, NULL
, 'c'},
1438 {"wholename", no_argument
, NULL
, 'w'},
1439 {"wholepath", no_argument
, NULL
, 'w'}, /* Synonym. */
1440 {"basename", no_argument
, NULL
, 'b'},
1441 {"print", no_argument
, NULL
, 'p'},
1442 {"stdio", no_argument
, NULL
, 's'},
1443 {"mmap", no_argument
, NULL
, 'm'},
1444 {"limit", required_argument
, NULL
, 'l'},
1445 {"regex", no_argument
, NULL
, 'r'},
1446 {"regextype", required_argument
, NULL
, REGEXTYPE_OPTION
},
1447 {"statistics", no_argument
, NULL
, 'S'},
1448 {"follow", no_argument
, NULL
, 'L'},
1449 {"nofollow", no_argument
, NULL
, 'P'},
1450 {"max-database-age", required_argument
, NULL
, MAX_DB_AGE
},
1451 {NULL
, no_argument
, NULL
, 0}
1458 const char * what
= "failed";
1459 const uid_t orig_euid
= geteuid();
1460 const uid_t uid
= getuid();
1461 const gid_t gid
= getgid();
1464 /* Use of setgroups() is restricted to root only. */
1467 /* We're either root or running setuid-root. */
1470 if (0 != setgroups(1u, groups
))
1472 what
= _("failed to drop group privileges");
1478 /* Drop any setuid privileges */
1479 if (uid
!= orig_euid
)
1483 /* We're really root anyway, but are setuid to something else. Leave it. */
1488 if (0 != setuid(getuid()))
1490 what
= _("failed to drop setuid privileges");
1494 /* Defend against the case where the attacker runs us with the
1495 * capability to call setuid() turned off, which on some systems
1496 * will cause the above attempt to drop privileges fail (leaving us
1501 /* Check that we can no longer switch bask to root */
1504 what
= _("Failed to fully drop privileges");
1505 /* The errno value here is not interesting (since
1506 * the system call we are complaining about
1507 * succeeded when we wanted it to fail). Arrange
1508 * for the call to error() not to print the errno
1509 * value by setting errno=0.
1518 /* Drop any setgid privileges */
1520 if (0 != setgid(gid
))
1522 what
= _("failed to drop setgid privileges");
1530 error(1, errno
, "%s",
1531 quotearg_n_style(0, locale_quoting_style
, what
));
1539 /* deliberate infinite loop */
1544 opendb(const char *name
)
1546 int fd
= open(name
, O_RDONLY
1547 #if defined O_LARGEFILE
1553 /* Make sure it won't survive an exec */
1554 if (0 != fcntl(fd
, F_SETFD
, FD_CLOEXEC
))
1564 dolocate (int argc
, char **argv
, int secure_db_fd
)
1567 unsigned long int found
= 0uL;
1568 int ignore_case
= 0;
1571 int basename_only
= 0;
1574 int regex_options
= RE_SYNTAX_EMACS
;
1579 int they_chose_db
= 0;
1580 bool did_stdin
= false; /* Set to prevent rereading stdin. */
1582 program_name
= argv
[0];
1584 #ifdef HAVE_SETLOCALE
1585 setlocale (LC_ALL
, "");
1587 bindtextdomain (PACKAGE
, LOCALEDIR
);
1588 textdomain (PACKAGE
);
1589 atexit (close_stdout
);
1592 limits
.items_accepted
= 0;
1594 quote_opts
= clone_quoting_options (NULL
);
1595 print_quoted_filename
= true;
1597 /* We cannot simultaneously trust $LOCATE_PATH and use the
1598 * setuid-access-controlled database,, since that could cause a leak
1601 dbpath
= getenv ("LOCATE_PATH");
1607 check_existence
= ACCEPT_EITHER
;
1612 int optc
= getopt_long (argc
, argv
, "Abcd:eEil:prsm0SwHPL", longopts
,
1621 print_quoted_filename
= false; /* print filename 'raw'. */
1642 check_existence
= ACCEPT_EXISTING
;
1646 check_existence
= ACCEPT_NON_EXISTING
;
1658 /* XXX: nothing in the test suite for this option. */
1659 set_max_db_age (optarg
);
1667 display_findutils_version ("locate");
1678 case REGEXTYPE_OPTION
:
1679 regex_options
= get_regex_type (optarg
);
1687 follow_symlinks
= 1;
1690 /* In find, -P and -H differ in the way they handle paths
1691 * given on the command line. This is not relevant for
1692 * locate, but the -H option is supported because it is
1693 * probably more intuitive to do so.
1697 follow_symlinks
= 0;
1703 strtol_error err
= xstrtoumax (optarg
, &end
, 10, &limits
.limit
,
1705 if (LONGINT_OK
!= err
)
1706 xstrtol_fatal (err
, opti
, optc
, longopts
, optarg
);
1711 case 's': /* use stdio */
1712 case 'm': /* use mmap */
1713 /* These options are implemented simply for
1714 * compatibility with FreeBSD
1725 /* If the user gave the -d option or set LOCATE_PATH,
1726 * relinquish access to the secure database.
1730 if (secure_db_fd
>= 0)
1732 close(secure_db_fd
);
1737 if (!just_count
&& !stats
)
1747 if (!just_count
&& optind
== argc
)
1755 if (1 == isatty(STDOUT_FILENO
))
1756 stdout_is_a_tty
= true;
1758 stdout_is_a_tty
= false;
1761 next_element (dbpath
, 0); /* Initialize. */
1763 /* Bail out early if limit already reached. */
1764 while (!use_limit
|| limits
.limit
> limits
.items_accepted
)
1770 statistics
.compressed_bytes
=
1771 statistics
.total_filename_count
=
1772 statistics
.total_filename_length
=
1773 statistics
.whitespace_count
=
1774 statistics
.newline_count
=
1775 statistics
.highbit_filename_count
= 0u;
1779 /* Take the next element from the list of databases */
1780 e
= next_element ((char *) NULL
, 0);
1784 if (0 == strcmp (e
, "-"))
1789 _("warning: the locate database can only be read from stdin once."));
1801 if (0 == strlen(e
) || 0 == strcmp(e
, "."))
1806 /* open the database */
1810 error (0, errno
, "%s",
1811 quotearg_n_style(0, locale_quoting_style
, e
));
1818 if (-1 == secure_db_fd
)
1820 /* Already searched the database, it's time to exit the loop */
1825 e
= selected_secure_db
;
1831 /* Check the database to see if it is old. */
1834 error (0, errno
, "%s",
1835 quotearg_n_style(0, locale_quoting_style
, e
));
1836 /* continue anyway */
1837 filesize
= (off_t
)0;
1843 filesize
= st
.st_size
;
1845 if ((time_t)-1 == time(&now
))
1847 /* If we can't tell the time, we don't know how old the
1848 * database is. But since the message is just advisory,
1849 * we continue anyway.
1851 error (0, errno
, _("time system call failed"));
1855 double age
= difftime(now
, st
.st_mtime
);
1856 double warn_seconds
= SECONDS_PER_UNIT
* warn_number_units
;
1857 if (age
> warn_seconds
)
1860 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1862 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1863 quotearg_n_style(0, locale_quoting_style
, e
),
1864 warn_number_units
, _(warn_name_units
),
1865 (age
/(double)SECONDS_PER_UNIT
), _(warn_name_units
));
1870 fp
= fdopen(fd
, "r");
1873 error (0, errno
, "%s",
1874 quotearg_n_style(0, locale_quoting_style
, e
));
1878 /* Search this database for all patterns simultaneously */
1879 found
= search_one_database (argc
- optind
, &argv
[optind
],
1881 ignore_case
, print
, basename_only
,
1882 use_limit
, &limits
, stats
,
1883 op_and
, regex
, regex_options
);
1885 /* Close the databsase (even if it is stdin) */
1886 if (fclose (fp
) == EOF
)
1888 error (0, errno
, "%s",
1889 quotearg_n_style(0, locale_quoting_style
, e
));
1896 printf("%ld\n", found
);
1899 if (found
|| (use_limit
&& (limits
.limit
==0)) || stats
)
1905 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1907 open_secure_db(void)
1911 const char * secure_db_list
[] =
1914 "/var/lib/slocate/slocate.db",
1917 for (i
=0; secure_db_list
[i
]; ++i
)
1919 fd
= opendb(secure_db_list
[i
]);
1922 selected_secure_db
= secure_db_list
[i
];
1930 main (int argc
, char **argv
)
1932 int dbfd
= open_secure_db();
1935 return dolocate(argc
, argv
, dbfd
);