1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
68 #include <sys/types.h>
69 #include <grp.h> /* for setgroups() */
76 #include <stdbool.h> /* for bool/boolean */
78 /* The presence of unistd.h is assumed by gnulib these days, so we
79 * might as well assume it too.
81 /* We need <unistd.h> for isatty(). */
105 # include <libintl.h>
106 # define _(Text) gettext (Text)
108 # define _(Text) Text
109 #define textdomain(Domain)
110 #define bindtextdomain(Package, Directory)
113 # define N_(String) gettext_noop (String)
115 /* We used to use (String) instead of just String, but apparently ISO C
116 * doesn't allow this (at least, that's what HP said when someone reported
117 * this as a compiler bug). This is HP case number 1205608192. See
118 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
119 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
120 * like: static const char buf[] = ("string");
122 # define N_(String) String
125 #include "locatedb.h"
131 #include "closeout.h"
132 #include "nextelem.h"
135 #include "quotearg.h"
136 #include "printquoted.h"
137 #include "regextype.h"
138 #include "gnulib-version.h"
140 /* Note that this evaluates Ch many times. */
142 # define TOUPPER(Ch) toupper (Ch)
143 # define TOLOWER(Ch) tolower (Ch)
145 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
146 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
149 /* typedef enum {false, true} boolean; */
151 /* Warn if a database is older than this. 8 days allows for a weekly
152 update that takes up to a day to perform. */
153 static unsigned int warn_number_units
= 8;
155 /* Printable name of units used in WARN_SECONDS */
156 static const char warn_name_units
[] = N_("days");
157 #define SECONDS_PER_UNIT (60 * 60 * 24)
161 VISIT_CONTINUE
= 1, /* please call the next visitor */
162 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
163 VISIT_REJECTED
= 4, /* rejected, process next file. */
164 VISIT_ABORT
= 8 /* rejected, process no more files. */
167 enum ExistenceCheckType
169 ACCEPT_EITHER
, /* Corresponds to lack of -E/-e option */
170 ACCEPT_EXISTING
, /* Corresponds to option -e */
171 ACCEPT_NON_EXISTING
/* Corresponds to option -E */
174 /* Check for existence of files before printing them out? */
175 enum ExistenceCheckType check_existence
= ACCEPT_EITHER
;
177 static int follow_symlinks
= 1;
179 /* What to separate the results with. */
180 static int separator
= '\n';
182 static struct quoting_options
* quote_opts
= NULL
;
183 static bool stdout_is_a_tty
;
184 static bool print_quoted_filename
;
185 static bool results_were_filtered
;
187 static const char *selected_secure_db
= NULL
;
190 /* Change the number of days old the database can be
191 * before we complain about it.
194 set_max_db_age(const char *s
)
197 unsigned long int val
;
198 /* XXX: we ignore the case where the input is negative, which is allowed(!). */
203 _("The argument for option --max-database-age must not be empty"));
207 /* We have to set errno here, otherwise when the function returns ULONG_MAX,
208 * we would not be able to tell if that is the correct answer, or whether it
209 * signifies an error.
212 val
= strtoul(s
, &end
, 10);
214 /* Diagnose number too large, non-numbes and trailing junk. */
215 if ((ULONG_MAX
== val
&& ERANGE
== errno
) ||
216 (0 == val
&& EINVAL
== errno
))
219 _("Invalid argument %s for option --max-database-age"),
220 quotearg_n_style(0, locale_quoting_style
, s
));
224 /* errno wasn't set, don't print its message */
226 _("Invalid argument %s for option --max-database-age"),
227 quotearg_n_style(0, locale_quoting_style
, s
));
231 warn_number_units
= val
;
237 /* Read in a 16-bit int, high byte first (network byte order). */
245 x
= (signed char) fgetc (fp
) << 8;
246 x
|= (fgetc (fp
) & 0xff);
250 const char * const metacharacters
= "*?[]\\";
252 /* Return nonzero if S contains any shell glob characters.
255 contains_metacharacter(const char *s
)
257 if (NULL
== strpbrk(s
, metacharacters
))
265 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
266 * until we reach DELIMITER or end-of-file. We reallocate the buffer
267 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
268 * is made regarding the content of the data (i.e. the implementation is
269 * 8-bit clean, the only delimiter is DELIMITER).
271 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
272 * has been removed from gnulib.
274 * We call the function locate_read_str() to avoid a name clash with the curses
278 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
285 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
290 needed
= offs
+ nread
+ 1u;
293 char *pnew
= realloc(*buf
, needed
);
296 return -1; /* FAIL */
304 memcpy((*buf
)+offs
, p
, nread
);
314 uintmax_t items_accepted
;
316 static struct locate_limits limits
;
321 uintmax_t compressed_bytes
;
322 uintmax_t total_filename_count
;
323 uintmax_t total_filename_length
;
324 uintmax_t whitespace_count
;
325 uintmax_t newline_count
;
326 uintmax_t highbit_filename_count
;
328 static struct locate_stats statistics
;
331 struct regular_expression
333 struct re_pattern_buffer regex
; /* for --regex */
339 int c
; /* An input byte. */
340 char itemcount
; /* Indicates we're at the beginning of an slocate db. */
341 int count
; /* The length of the prefix shared with the previous database entry. */
343 char *original_filename
; /* The current input database entry. */
344 size_t pathsize
; /* Amount allocated for it. */
345 char *munged_filename
; /* path or base_name(path) */
346 FILE *fp
; /* The pathname database. */
347 const char *dbfile
; /* Its name, or "<stdin>" */
348 int slocatedb_format
; /* Allows us to cope with slocate's format variant */
349 GetwordEndianState endian_state
;
350 /* for the old database format,
351 the first and second characters of the most common bigrams. */
357 typedef int (*visitfunc
)(struct process_data
*procdata
,
364 struct visitor
*next
;
368 static struct visitor
*inspectors
= NULL
;
369 static struct visitor
*lastinspector
= NULL
;
370 static struct visitor
*past_pat_inspector
= NULL
;
372 static inline int visit(const struct visitor
*p
,
374 struct process_data
*procdata
,
375 const struct visitor
* const stop
)
377 register int result
= accept_flags
;
378 while ( (accept_flags
& result
) && (stop
!= p
) )
380 result
= (p
->inspector
)(procdata
, p
->context
);
386 /* 0 or 1 pattern(s) */
388 process_simple(struct process_data
*procdata
)
390 return visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, NULL
);
393 /* Accept if any pattern matches. */
395 process_or (struct process_data
*procdata
)
399 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_REJECTED
), procdata
, past_pat_inspector
);
400 if (result
== VISIT_CONTINUE
)
401 result
= VISIT_REJECTED
;
402 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
405 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
406 if (VISIT_CONTINUE
== result
)
407 return VISIT_ACCEPTED
;
412 /* Accept if all pattern match. */
414 process_and (struct process_data
*procdata
)
418 result
= visit(inspectors
, (VISIT_CONTINUE
|VISIT_ACCEPTED
), procdata
, past_pat_inspector
);
419 if (result
== VISIT_CONTINUE
)
420 result
= VISIT_REJECTED
;
421 if (result
& (VISIT_ABORT
| VISIT_REJECTED
))
424 result
= visit(past_pat_inspector
, VISIT_CONTINUE
, procdata
, NULL
);
425 if (VISIT_CONTINUE
== result
)
426 return VISIT_ACCEPTED
;
431 typedef int (*processfunc
)(struct process_data
*procdata
);
433 static processfunc mainprocessor
= NULL
;
436 add_visitor(visitfunc fn
, void *context
)
438 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
440 p
->context
= context
;
443 if (NULL
== lastinspector
)
445 lastinspector
= inspectors
= p
;
449 lastinspector
->next
= p
;
455 visit_justprint_quoted(struct process_data
*procdata
, void *context
)
458 print_quoted (stdout
, quote_opts
, stdout_is_a_tty
,
460 procdata
->original_filename
);
462 return VISIT_CONTINUE
;
466 visit_justprint_unquoted(struct process_data
*procdata
, void *context
)
469 fputs(procdata
->original_filename
, stdout
);
471 return VISIT_CONTINUE
;
475 toolong (struct process_data
*procdata
)
478 _("locate database %s contains a "
479 "filename longer than locate can handle"),
484 extend (struct process_data
*procdata
, size_t siz1
, size_t siz2
)
486 /* Figure out if the addition operation is safe before performing it. */
487 if (SIZE_MAX
- siz1
< siz2
)
491 else if (procdata
->pathsize
< (siz1
+siz2
))
493 procdata
->pathsize
= siz1
+siz2
;
494 procdata
->original_filename
= x2nrealloc (procdata
->original_filename
,
501 visit_old_format(struct process_data
*procdata
, void *context
)
506 if (EOF
== procdata
->c
)
509 /* Get the offset in the path where this path info starts. */
510 if (procdata
->c
== LOCATEDB_OLD_ESCAPE
)
515 procdata
->count
-= LOCATEDB_OLD_OFFSET
;
516 minval
= (0 - procdata
->count
);
517 if (procdata
->count
>= 0)
518 maxval
= (procdata
->len
- procdata
->count
);
520 maxval
= (procdata
->len
- 0);
521 word
= getword(procdata
->fp
, procdata
->dbfile
,
522 minval
, maxval
, &procdata
->endian_state
);
523 procdata
->count
+= word
;
524 assert(procdata
->count
>= 0);
528 procdata
->count
+= (procdata
->c
- LOCATEDB_OLD_OFFSET
);
529 assert(procdata
->count
>= 0);
532 /* Overlay the old path with the remainder of the new. Read
533 * more data until we get to the next filename.
535 for (i
=procdata
->count
;
536 (procdata
->c
= getc (procdata
->fp
)) > LOCATEDB_OLD_ESCAPE
;)
538 if (EOF
== procdata
->c
)
541 if (procdata
->c
< 0200)
543 /* An ordinary character. */
544 extend (procdata
, i
, 1u);
545 procdata
->original_filename
[i
++] = procdata
->c
;
549 /* Bigram markers have the high bit set. */
550 extend (procdata
, i
, 2u);
552 procdata
->original_filename
[i
++] = procdata
->bigram1
[procdata
->c
];
553 procdata
->original_filename
[i
++] = procdata
->bigram2
[procdata
->c
];
557 /* Consider the case where we executed the loop body zero times; we
558 * still need space for the terminating null byte.
560 extend (procdata
, i
, 1u);
561 procdata
->original_filename
[i
] = 0;
563 procdata
->munged_filename
= procdata
->original_filename
;
565 return VISIT_CONTINUE
;
569 visit_locate02_format(struct process_data
*procdata
, void *context
)
575 if (procdata
->slocatedb_format
)
577 if (procdata
->itemcount
== 0)
579 ungetc(procdata
->c
, procdata
->fp
);
583 else if (procdata
->itemcount
== 1)
585 procdata
->count
= procdata
->len
-1;
589 if (procdata
->c
== LOCATEDB_ESCAPE
)
590 procdata
->count
+= (short)get_short (procdata
->fp
);
591 else if (procdata
->c
> 127)
592 procdata
->count
+= procdata
->c
- 256;
594 procdata
->count
+= procdata
->c
;
599 if (procdata
->c
== LOCATEDB_ESCAPE
)
600 procdata
->count
+= (short)get_short (procdata
->fp
);
601 else if (procdata
->c
> 127)
602 procdata
->count
+= procdata
->c
- 256;
604 procdata
->count
+= procdata
->c
;
607 if (procdata
->count
> procdata
->len
|| procdata
->count
< 0)
609 /* This should not happen generally , but since we're
610 * reading in data which is outside our control, we
613 error(1, 0, _("locate database %s is corrupt or invalid"),
614 quotearg_n_style(0, locale_quoting_style
, procdata
->dbfile
));
617 /* Overlay the old path with the remainder of the new. */
618 nread
= locate_read_str (&procdata
->original_filename
,
620 procdata
->fp
, 0, procdata
->count
);
623 procdata
->c
= getc (procdata
->fp
);
624 procdata
->len
= procdata
->count
+ nread
;
625 s
= procdata
->original_filename
+ procdata
->len
- 1; /* Move to the last char in path. */
626 assert (s
[0] != '\0');
627 assert (s
[1] == '\0'); /* Our terminator. */
628 assert (s
[2] == '\0'); /* Added by locate_read_str. */
630 procdata
->munged_filename
= procdata
->original_filename
;
632 if (procdata
->slocatedb_format
)
634 /* Don't increment indefinitely, it might overflow. */
635 if (procdata
->itemcount
< 6)
637 ++(procdata
->itemcount
);
642 return VISIT_CONTINUE
;
646 visit_basename(struct process_data
*procdata
, void *context
)
649 procdata
->munged_filename
= base_name(procdata
->original_filename
);
651 return VISIT_CONTINUE
;
655 /* visit_existing_follow implements -L -e */
657 visit_existing_follow(struct process_data
*procdata
, void *context
)
662 /* munged_filename has been converted in some way (to lower case,
663 * or is just the base name of the file), and original_filename has not.
664 * Hence only original_filename is still actually the name of the file
665 * whose existence we would need to check.
667 if (stat(procdata
->original_filename
, &st
) != 0)
669 return VISIT_REJECTED
;
673 return VISIT_CONTINUE
;
677 /* visit_non_existing_follow implements -L -E */
679 visit_non_existing_follow(struct process_data
*procdata
, void *context
)
684 /* munged_filename has been converted in some way (to lower case,
685 * or is just the base name of the file), and original_filename has not.
686 * Hence only original_filename is still actually the name of the file
687 * whose existence we would need to check.
689 if (stat(procdata
->original_filename
, &st
) == 0)
691 return VISIT_REJECTED
;
695 return VISIT_CONTINUE
;
699 /* visit_existing_nofollow implements -P -e */
701 visit_existing_nofollow(struct process_data
*procdata
, void *context
)
706 /* munged_filename has been converted in some way (to lower case,
707 * or is just the base name of the file), and original_filename has not.
708 * Hence only original_filename is still actually the name of the file
709 * whose existence we would need to check.
711 if (lstat(procdata
->original_filename
, &st
) != 0)
713 return VISIT_REJECTED
;
717 return VISIT_CONTINUE
;
721 /* visit_non_existing_nofollow implements -P -E */
723 visit_non_existing_nofollow(struct process_data
*procdata
, void *context
)
728 /* munged_filename has been converted in some way (to lower case,
729 * or is just the base name of the file), and original_filename has not.
730 * Hence only original_filename is still actually the name of the file
731 * whose existence we would need to check.
733 if (lstat(procdata
->original_filename
, &st
) == 0)
735 return VISIT_REJECTED
;
739 return VISIT_CONTINUE
;
744 visit_substring_match_nocasefold_wide(struct process_data
*procdata
, void *context
)
746 const char *pattern
= context
;
748 if (NULL
!= mbsstr(procdata
->munged_filename
, pattern
))
749 return VISIT_ACCEPTED
;
751 return VISIT_REJECTED
;
755 visit_substring_match_nocasefold_narrow(struct process_data
*procdata
, void *context
)
757 const char *pattern
= context
;
758 assert(MB_CUR_MAX
== 1);
759 if (NULL
!= strstr(procdata
->munged_filename
, pattern
))
760 return VISIT_ACCEPTED
;
762 return VISIT_REJECTED
;
766 visit_substring_match_casefold_wide(struct process_data
*procdata
, void *context
)
768 const char *pattern
= context
;
770 if (NULL
!= mbscasestr(procdata
->munged_filename
, pattern
))
771 return VISIT_ACCEPTED
;
773 return VISIT_REJECTED
;
778 visit_substring_match_casefold_narrow(struct process_data
*procdata
, void *context
)
780 const char *pattern
= context
;
782 assert(MB_CUR_MAX
== 1);
783 if (NULL
!= strcasestr(procdata
->munged_filename
, pattern
))
784 return VISIT_ACCEPTED
;
786 return VISIT_REJECTED
;
791 visit_globmatch_nofold(struct process_data
*procdata
, void *context
)
793 const char *glob
= context
;
794 if (fnmatch(glob
, procdata
->munged_filename
, 0) != 0)
795 return VISIT_REJECTED
;
797 return VISIT_ACCEPTED
;
802 visit_globmatch_casefold(struct process_data
*procdata
, void *context
)
804 const char *glob
= context
;
805 if (fnmatch(glob
, procdata
->munged_filename
, FNM_CASEFOLD
) != 0)
806 return VISIT_REJECTED
;
808 return VISIT_ACCEPTED
;
813 visit_regex(struct process_data
*procdata
, void *context
)
815 struct regular_expression
*p
= context
;
816 const size_t len
= strlen(procdata
->munged_filename
);
818 int rv
= re_search (&p
->regex
, procdata
->munged_filename
,
820 (struct re_registers
*) NULL
);
823 return VISIT_REJECTED
; /* no match (-1), or internal error (-2) */
827 return VISIT_ACCEPTED
; /* match */
833 visit_stats(struct process_data
*procdata
, void *context
)
835 struct locate_stats
*p
= context
;
836 size_t len
= strlen(procdata
->original_filename
);
838 int highbit
, whitespace
, newline
;
840 ++(p
->total_filename_count
);
841 p
->total_filename_length
+= len
;
843 highbit
= whitespace
= newline
= 0;
844 for (s
=procdata
->original_filename
; *s
; ++s
)
846 if ( (int)(*s
) & 128 )
850 newline
= whitespace
= 1;
852 else if (isspace((unsigned char)*s
))
859 ++(p
->highbit_filename_count
);
861 ++(p
->whitespace_count
);
863 ++(p
->newline_count
);
865 return VISIT_CONTINUE
;
870 visit_limit(struct process_data
*procdata
, void *context
)
872 struct locate_limits
*p
= context
;
876 if (++p
->items_accepted
>= p
->limit
)
879 return VISIT_CONTINUE
;
883 visit_count(struct process_data
*procdata
, void *context
)
885 struct locate_limits
*p
= context
;
890 return VISIT_CONTINUE
;
893 /* Emit the statistics.
896 print_stats(int argc
, size_t database_file_size
)
898 char hbuf
[LONGEST_HUMAN_READABLE
+ 1];
900 printf(_("Locate database size: %s bytes\n"),
901 human_readable ((uintmax_t) database_file_size
,
902 hbuf
, human_ceiling
, 1, 1));
904 printf( (results_were_filtered
?
905 _("Matching Filenames: %s ") :
906 _("All Filenames: %s ")),
907 human_readable (statistics
.total_filename_count
,
908 hbuf
, human_ceiling
, 1, 1));
909 printf(_("with a cumulative length of %s bytes"),
910 human_readable (statistics
.total_filename_length
,
911 hbuf
, human_ceiling
, 1, 1));
913 printf(_("\n\tof which %s contain whitespace, "),
914 human_readable (statistics
.whitespace_count
,
915 hbuf
, human_ceiling
, 1, 1));
916 printf(_("\n\t%s contain newline characters, "),
917 human_readable (statistics
.newline_count
,
918 hbuf
, human_ceiling
, 1, 1));
919 printf(_("\n\tand %s contain characters with the high bit set.\n"),
920 human_readable (statistics
.highbit_filename_count
,
921 hbuf
, human_ceiling
, 1, 1));
925 if (results_were_filtered
)
927 printf(_("Some filenames may have been filtered out, "
928 "so we cannot compute the compression ratio.\n"));
932 if (statistics
.total_filename_length
)
934 /* A negative compression ratio just means that the
935 * compressed database is larger than the list of
936 * filenames. This can happen for example for
937 * old-format databases containing a small list of short
938 * filenames, because the bigram list is 256 bytes.
940 printf(_("Compression ratio %4.2f%% (higher is better)\n"),
941 100.0 * ((double)statistics
.total_filename_length
942 - (double) database_file_size
)
943 / (double) statistics
.total_filename_length
);
947 printf(_("Compression ratio is undefined\n"));
955 * Return nonzero if the data we read in indicates that we are
956 * looking at a LOCATE02 locate database.
959 looking_at_gnu_locatedb (const char *data
, size_t len
)
961 if (len
< sizeof (LOCATEDB_MAGIC
))
963 else if (0 == memcmp (data
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
964 return 1; /* We saw the magic byte sequence */
970 * Return nonzero if the data we read in indicates that we are
971 * looking at an slocate database.
974 looking_at_slocate_locatedb (const char *filename
,
987 /* Check that the magic number is a one-byte string */
990 if (isdigit((unsigned char)data
[0]))
992 /* looks promising. */
993 *seclevel
= (data
[0] - '0');
997 /* Hmm, well it's probably an slocate database
998 * of some awsomely huge security level, like 2.
999 * We don't know how to handle those.
1002 _("locate database %s looks like an slocate "
1003 "database but it seems to have security level %c, "
1004 "which GNU findutils does not currently support"),
1005 quotearg_n_style(0, locale_quoting_style
, filename
),
1022 /* Definitely not slocate. */
1030 i_am_little_endian(void)
1034 unsigned char uch
[4];
1039 u
.uch
[1] = u
.uch
[2] = u
.uch
[3] = 0;
1046 /* Print or count the entries in DBFILE that match shell globbing patterns in
1047 ARGV. Return the number of entries matched. */
1049 static unsigned long
1050 search_one_database (int argc
,
1059 struct locate_limits
*plimit
,
1065 char *pathpart
; /* A pattern to consider. */
1066 int argn
; /* Index to current pattern in argv. */
1067 int nread
; /* number of bytes read from an entry. */
1068 struct process_data procdata
; /* Storage for data shared with visitors. */
1069 int slocate_seclevel
;
1071 struct visitor
* pvis
; /* temp for determining past_pat_inspector. */
1072 const char *format_name
;
1073 enum ExistenceCheckType do_check_existence
;
1076 /* We may turn on existence checking for a given database.
1077 * We ensure that we can return to the previous behaviour
1078 * by using two variables, do_check_existence (which we act on)
1079 * and check_existence (whcih indicates the default before we
1080 * adjust it on the bassis of what kind of database we;re using
1082 do_check_existence
= check_existence
;
1086 regex_options
|= RE_ICASE
;
1089 procdata
.endian_state
= GetwordEndianStateInitial
;
1090 procdata
.len
= procdata
.count
= 0;
1091 procdata
.slocatedb_format
= 0;
1092 procdata
.itemcount
= 0;
1094 procdata
.dbfile
= dbfile
;
1097 /* Set up the inspection regime */
1099 lastinspector
= NULL
;
1100 past_pat_inspector
= NULL
;
1101 results_were_filtered
= false;
1103 procdata
.pathsize
= 1026; /* Increased as necessary by locate_read_str. */
1105 procdata
.pathsize
= 128; /* Increased as necessary by locate_read_str. */
1107 procdata
.original_filename
= xmalloc (procdata
.pathsize
);
1110 nread
= fread (procdata
.original_filename
, 1, SLOCATE_DB_MAGIC_LEN
,
1112 slocate_seclevel
= 0;
1113 if (looking_at_slocate_locatedb(procdata
.dbfile
,
1114 procdata
.original_filename
,
1119 _("%s is an slocate database. "
1120 "Support for these is new, expect problems for now."),
1121 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1123 /* slocate also uses frcode, but with a different header.
1124 * We handle the header here and then work with the data
1125 * in the normal way.
1127 if (slocate_seclevel
> 1)
1129 /* We don't know what those security levels mean,
1130 * so do nothing further
1133 _("%s is an slocate database of unsupported security level %d; skipping it."),
1134 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
),
1138 else if (slocate_seclevel
> 0)
1140 /* Don't show the filenames to the user if they don't exist.
1141 * Showing stats is safe since filenames are only counted
1142 * after the existence check
1144 if (ACCEPT_NON_EXISTING
== check_existence
)
1146 /* Do not allow the user to see a list of filenames that they
1150 _("You specified the -E option, but that option "
1151 "cannot be used with slocate-format databases "
1152 "with a non-zero security level. No results will be "
1153 "generated for this database.\n"));
1156 if (ACCEPT_EXISTING
!= do_check_existence
)
1158 if (enable_print
|| stats
)
1161 _("%s is an slocate database. "
1162 "Turning on the '-e' option."),
1163 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1165 do_check_existence
= ACCEPT_EXISTING
;
1168 add_visitor(visit_locate02_format
, NULL
);
1169 format_name
= "slocate";
1170 procdata
.slocatedb_format
= 1;
1176 procdata
.slocatedb_format
= 0;
1177 extend (&procdata
, sizeof(LOCATEDB_MAGIC
), 0u);
1178 nread2
= fread (procdata
.original_filename
+nread
, 1, sizeof (LOCATEDB_MAGIC
)-nread
,
1180 if (looking_at_gnu_locatedb(procdata
.original_filename
, nread
+nread2
))
1182 add_visitor(visit_locate02_format
, NULL
);
1183 format_name
= "GNU LOCATE02";
1185 else /* Use the old format */
1190 /* Read the list of the most common bigrams in the database. */
1193 extend (&procdata
, 256u, 0u);
1194 int more_read
= fread (procdata
.original_filename
+ nread
, 1,
1195 256 - nread
, procdata
.fp
);
1196 if ( (more_read
+ nread
) != 256 )
1199 _("Old-format locate database %s is "
1200 "too short to be valid"),
1201 quotearg_n_style(0, locale_quoting_style
, dbfile
));
1206 for (i
= 0; i
< 128; i
++)
1208 procdata
.bigram1
[i
] = procdata
.original_filename
[i
<< 1];
1209 procdata
.bigram2
[i
] = procdata
.original_filename
[(i
<< 1) + 1];
1211 format_name
= "old";
1213 add_visitor(visit_old_format
, NULL
);
1218 add_visitor(visit_basename
, NULL
);
1220 /* Add an inspector for each pattern we're looking for. */
1221 for ( argn
= 0; argn
< argc
; argn
++ )
1223 results_were_filtered
= true;
1224 pathpart
= argv
[argn
];
1227 struct regular_expression
*p
= xmalloc(sizeof(*p
));
1228 const char *error_message
= NULL
;
1230 memset (&p
->regex
, 0, sizeof (p
->regex
));
1232 re_set_syntax(regex_options
);
1233 p
->regex
.allocated
= 100;
1234 p
->regex
.buffer
= (unsigned char *) xmalloc (p
->regex
.allocated
);
1235 p
->regex
.fastmap
= NULL
;
1236 p
->regex
.syntax
= regex_options
;
1237 p
->regex
.translate
= NULL
;
1239 error_message
= re_compile_pattern (pathpart
, strlen (pathpart
),
1243 error (1, 0, "%s", error_message
);
1247 add_visitor(visit_regex
, p
);
1250 else if (contains_metacharacter(pathpart
))
1253 add_visitor(visit_globmatch_casefold
, pathpart
);
1255 add_visitor(visit_globmatch_nofold
, pathpart
);
1259 /* No glob characters used. Hence we match on
1260 * _any part_ of the filename, not just the
1261 * basename. This seems odd to me, but it is the
1262 * traditional behaviour.
1263 * James Youngman <jay@gnu.org>
1266 if (1 == MB_CUR_MAX
)
1268 /* As an optimisation, use a strstr() matcher if we are
1269 * in a unibyte locale. This can give a x2 speedup in
1270 * the C locale. Some light testing reveals that
1271 * glibc's strstr() is somewhere around 40% faster than
1272 * gnulib's, so we just use strstr().
1274 matcher
= ignore_case
?
1275 visit_substring_match_casefold_narrow
:
1276 visit_substring_match_nocasefold_narrow
;
1280 matcher
= ignore_case
?
1281 visit_substring_match_casefold_wide
:
1282 visit_substring_match_nocasefold_wide
;
1284 add_visitor(matcher
, pathpart
);
1288 pvis
= lastinspector
;
1290 /* We add visit_existing_*() as late as possible to reduce the
1291 * number of stat() calls.
1293 switch (do_check_existence
)
1295 case ACCEPT_EXISTING
:
1296 results_were_filtered
= true;
1297 if (follow_symlinks
) /* -L, default */
1298 add_visitor(visit_existing_follow
, NULL
);
1300 add_visitor(visit_existing_nofollow
, NULL
);
1303 case ACCEPT_NON_EXISTING
:
1304 results_were_filtered
= true;
1305 if (follow_symlinks
) /* -L, default */
1306 add_visitor(visit_non_existing_follow
, NULL
);
1308 add_visitor(visit_non_existing_nofollow
, NULL
);
1311 case ACCEPT_EITHER
: /* Default, neither -E nor -e */
1312 /* do nothing; no extra processing. */
1316 /* Security issue: The stats visitor must be added immediately
1317 * before the print visitor, because otherwise the -S option would
1318 * leak information about files that the caller cannot see.
1321 add_visitor(visit_stats
, &statistics
);
1325 if (print_quoted_filename
)
1326 add_visitor(visit_justprint_quoted
, NULL
);
1328 add_visitor(visit_justprint_unquoted
, NULL
);
1333 add_visitor(visit_limit
, plimit
);
1335 add_visitor(visit_count
, plimit
);
1340 past_pat_inspector
= pvis
->next
;
1342 mainprocessor
= process_and
;
1344 mainprocessor
= process_or
;
1347 mainprocessor
= process_simple
;
1351 printf(_("Database %s is in the %s format.\n"),
1357 procdata
.c
= getc (procdata
.fp
);
1358 /* If we are searching for filename patterns, the inspector list
1359 * will contain an entry for each pattern for which we are searching.
1361 while ( (procdata
.c
!= EOF
) &&
1362 (VISIT_ABORT
!= (mainprocessor
)(&procdata
)) )
1364 /* Do nothing; all the work is done in the visitor functions. */
1371 int host_little_endian
= i_am_little_endian();
1372 const char *little
= _("The database has little-endian "
1373 "machine-word encoding.\n");
1374 const char *big
= _("The database has big-endian "
1375 "machine-word encoding.\n");
1377 if (GetwordEndianStateNative
== procdata
.endian_state
)
1379 printf("%s", (host_little_endian
? little
: big
));
1381 else if (GetwordEndianStateSwab
== procdata
.endian_state
)
1383 printf("%s", (host_little_endian
? big
: little
));
1387 printf(_("The database machine-word encoding order "
1388 "is not obvious.\n"));
1392 print_stats(argc
, filesize
);
1395 if (ferror (procdata
.fp
))
1397 error (0, errno
, "%s",
1398 quotearg_n_style(0, locale_quoting_style
, procdata
.dbfile
));
1401 return plimit
->items_accepted
;
1407 extern char *version_string
;
1409 /* The name this program was run with. */
1413 usage (FILE *stream
)
1415 fprintf (stream
, _("\
1416 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1417 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1418 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1419 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1420 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1421 [--max-database-age D] [--version] [--help]\n\
1424 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
1428 REGEXTYPE_OPTION
= CHAR_MAX
+ 1,
1433 static struct option
const longopts
[] =
1435 {"database", required_argument
, NULL
, 'd'},
1436 {"existing", no_argument
, NULL
, 'e'},
1437 {"non-existing", no_argument
, NULL
, 'E'},
1438 {"ignore-case", no_argument
, NULL
, 'i'},
1439 {"all", no_argument
, NULL
, 'A'},
1440 {"help", no_argument
, NULL
, 'h'},
1441 {"version", no_argument
, NULL
, 'v'},
1442 {"null", no_argument
, NULL
, '0'},
1443 {"count", no_argument
, NULL
, 'c'},
1444 {"wholename", no_argument
, NULL
, 'w'},
1445 {"wholepath", no_argument
, NULL
, 'w'}, /* Synonym. */
1446 {"basename", no_argument
, NULL
, 'b'},
1447 {"print", no_argument
, NULL
, 'p'},
1448 {"stdio", no_argument
, NULL
, 's'},
1449 {"mmap", no_argument
, NULL
, 'm'},
1450 {"limit", required_argument
, NULL
, 'l'},
1451 {"regex", no_argument
, NULL
, 'r'},
1452 {"regextype", required_argument
, NULL
, REGEXTYPE_OPTION
},
1453 {"statistics", no_argument
, NULL
, 'S'},
1454 {"follow", no_argument
, NULL
, 'L'},
1455 {"nofollow", no_argument
, NULL
, 'P'},
1456 {"max-database-age", required_argument
, NULL
, MAX_DB_AGE
},
1457 {NULL
, no_argument
, NULL
, 0}
1464 const char * what
= "failed";
1465 const uid_t orig_euid
= geteuid();
1466 const uid_t uid
= getuid();
1467 const gid_t gid
= getgid();
1470 /* Use of setgroups() is restricted to root only. */
1473 /* We're either root or running setuid-root. */
1476 if (0 != setgroups(1u, groups
))
1478 what
= _("failed to drop group privileges");
1484 /* Drop any setuid privileges */
1485 if (uid
!= orig_euid
)
1489 /* We're really root anyway, but are setuid to something else. Leave it. */
1494 if (0 != setuid(getuid()))
1496 what
= _("failed to drop setuid privileges");
1500 /* Defend against the case where the attacker runs us with the
1501 * capability to call setuid() turned off, which on some systems
1502 * will cause the above attempt to drop privileges fail (leaving us
1507 /* Check that we can no longer switch bask to root */
1510 what
= _("Failed to fully drop privileges");
1511 /* The errno value here is not interesting (since
1512 * the system call we are complaining about
1513 * succeeded when we wanted it to fail). Arrange
1514 * for the call to error() not to print the errno
1515 * value by setting errno=0.
1524 /* Drop any setgid privileges */
1526 if (0 != setgid(gid
))
1528 what
= _("failed to drop setgid privileges");
1536 error(1, errno
, "%s",
1537 quotearg_n_style(0, locale_quoting_style
, what
));
1545 /* deliberate infinite loop */
1550 opendb(const char *name
)
1552 int fd
= open(name
, O_RDONLY
1553 #if defined(O_LARGEFILE)
1559 /* Make sure it won't survive an exec */
1560 if (0 != fcntl(fd
, F_SETFD
, FD_CLOEXEC
))
1570 dolocate (int argc
, char **argv
, int secure_db_fd
)
1573 unsigned long int found
= 0uL;
1575 int ignore_case
= 0;
1578 int basename_only
= 0;
1581 int regex_options
= RE_SYNTAX_EMACS
;
1586 int they_chose_db
= 0;
1587 bool did_stdin
= false; /* Set to prevent rereading stdin. */
1589 program_name
= argv
[0];
1591 #ifdef HAVE_SETLOCALE
1592 setlocale (LC_ALL
, "");
1594 bindtextdomain (PACKAGE
, LOCALEDIR
);
1595 textdomain (PACKAGE
);
1596 atexit (close_stdout
);
1599 limits
.items_accepted
= 0;
1601 quote_opts
= clone_quoting_options (NULL
);
1602 print_quoted_filename
= true;
1604 /* We cannot simultaneously trust $LOCATE_PATH and use the
1605 * setuid-access-controlled database,, since that could cause a leak
1608 dbpath
= getenv ("LOCATE_PATH");
1614 check_existence
= ACCEPT_EITHER
;
1616 while ((optc
= getopt_long (argc
, argv
, "Abcd:eEil:prsm0SwHPL", longopts
, (int *) 0)) != -1)
1621 print_quoted_filename
= false; /* print filename 'raw'. */
1642 check_existence
= ACCEPT_EXISTING
;
1646 check_existence
= ACCEPT_NON_EXISTING
;
1658 /* XXX: nothing in the test suite for this option. */
1659 set_max_db_age(optarg
);
1667 printf (_("GNU locate version %s\n"), version_string
);
1668 printf (_("Built using GNU gnulib version %s\n"), gnulib_version
);
1679 case REGEXTYPE_OPTION
:
1680 regex_options
= get_regex_type(optarg
);
1688 follow_symlinks
= 1;
1691 /* In find, -P and -H differ in the way they handle paths
1692 * given on the command line. This is not relevant for
1693 * locate, but the -H option is supported because it is
1694 * probably more intuitive to do so.
1698 follow_symlinks
= 0;
1704 strtol_error err
= xstrtoumax(optarg
, &end
, 10, &limits
.limit
, NULL
);
1705 if (LONGINT_OK
!= err
)
1707 STRTOL_FATAL_ERROR(optarg
, _("argument to --limit"), err
);
1713 case 's': /* use stdio */
1714 case 'm': /* use mmap */
1715 /* These options are implemented simply for
1716 * compatibility with FreeBSD
1726 /* If the user gave the -d option or set LOCATE_PATH,
1727 * relinquish access to the secure database.
1731 if (secure_db_fd
>= 0)
1733 close(secure_db_fd
);
1738 if (!just_count
&& !stats
)
1748 if (!just_count
&& optind
== argc
)
1756 if (1 == isatty(STDOUT_FILENO
))
1757 stdout_is_a_tty
= true;
1759 stdout_is_a_tty
= false;
1762 next_element (dbpath
, 0); /* Initialize. */
1764 /* Bail out early if limit already reached. */
1765 while (!use_limit
|| limits
.limit
> limits
.items_accepted
)
1771 statistics
.compressed_bytes
=
1772 statistics
.total_filename_count
=
1773 statistics
.total_filename_length
=
1774 statistics
.whitespace_count
=
1775 statistics
.newline_count
=
1776 statistics
.highbit_filename_count
= 0u;
1780 /* Take the next element from the list of databases */
1781 e
= next_element ((char *) NULL
, 0);
1785 if (0 == strcmp (e
, "-"))
1790 _("warning: the locate database can only be read from stdin once."));
1802 if (0 == strlen(e
) || 0 == strcmp(e
, "."))
1807 /* open the database */
1811 error (0, errno
, "%s",
1812 quotearg_n_style(0, locale_quoting_style
, e
));
1819 if (-1 == secure_db_fd
)
1821 /* Already searched the database, it's time to exit the loop */
1826 e
= selected_secure_db
;
1832 /* Check the database to see if it is old. */
1835 error (0, errno
, "%s",
1836 quotearg_n_style(0, locale_quoting_style
, e
));
1837 /* continue anyway */
1838 filesize
= (off_t
)0;
1844 filesize
= st
.st_size
;
1846 if ((time_t)-1 == time(&now
))
1848 /* If we can't tell the time, we don't know how old the
1849 * database is. But since the message is just advisory,
1850 * we continue anyway.
1852 error (0, errno
, "time system call");
1856 double age
= difftime(now
, st
.st_mtime
);
1857 double warn_seconds
= SECONDS_PER_UNIT
* warn_number_units
;
1858 if (age
> warn_seconds
)
1861 warning: database `fred' is more than 8 days old (actual age is 10 days)*/
1863 _("warning: database %s is more than %d %s old (actual age is %.1f %s)"),
1864 quotearg_n_style(0, locale_quoting_style
, e
),
1865 warn_number_units
, _(warn_name_units
),
1866 (age
/(double)SECONDS_PER_UNIT
), _(warn_name_units
));
1871 fp
= fdopen(fd
, "r");
1874 error (0, errno
, "%s",
1875 quotearg_n_style(0, locale_quoting_style
, e
));
1879 /* Search this database for all patterns simultaneously */
1880 found
= search_one_database (argc
- optind
, &argv
[optind
],
1882 ignore_case
, print
, basename_only
,
1883 use_limit
, &limits
, stats
,
1884 op_and
, regex
, regex_options
);
1886 /* Close the databsase (even if it is stdin) */
1887 if (fclose (fp
) == EOF
)
1889 error (0, errno
, "%s",
1890 quotearg_n_style(0, locale_quoting_style
, e
));
1897 printf("%ld\n", found
);
1900 if (found
|| (use_limit
&& (limits
.limit
==0)) || stats
)
1906 #define ARRAYSIZE(a) (sizeof(a)/sizeof(a[0]))
1908 open_secure_db(void)
1912 const char * secure_db_list
[] =
1915 "/var/lib/slocate/slocate.db",
1918 for (i
=0; secure_db_list
[i
]; ++i
)
1920 fd
= opendb(secure_db_list
[i
]);
1923 selected_secure_db
= secure_db_list
[i
];
1931 main (int argc
, char **argv
)
1933 int dbfd
= open_secure_db();
1936 return dolocate(argc
, argv
, dbfd
);