1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.org>. */
55 #include <sys/types.h>
65 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
88 # define _(Text) gettext (Text)
91 #define textdomain(Domain)
92 #define bindtextdomain(Package, Directory)
95 # define N_(String) gettext_noop (String)
97 /* We used to use (String) instead of just String, but apparentl;y ISO C
98 * doesn't allow this (at least, that's what HP said when someone reported
99 * this as a compiler bug). This is HP case number 1205608192. See
100 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
101 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
102 * like: static const char buf[] = ("string");
104 # define N_(String) String
107 #include "locatedb.h"
109 #include "../gnulib/lib/xalloc.h"
110 #include "../gnulib/lib/error.h"
111 #include "../gnulib/lib/human.h"
113 #include "closeout.h"
114 #include "nextelem.h"
118 /* Note that this evaluates C many times. */
120 # define TOUPPER(Ch) toupper (Ch)
121 # define TOLOWER(Ch) tolower (Ch)
123 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
124 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
127 /* typedef enum {false, true} boolean; */
129 /* Warn if a database is older than this. 8 days allows for a weekly
130 update that takes up to a day to perform. */
131 #define WARN_NUMBER_UNITS (8)
132 /* Printable name of units used in WARN_SECONDS */
133 static const char warn_name_units
[] = N_("days");
134 #define SECONDS_PER_UNIT (60 * 60 * 24)
136 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
138 /* Check for existence of files before printing them out? */
139 static int check_existence
= 0;
141 static int follow_symlinks
= 1;
143 /* What to separate the results with. */
144 static int separator
= '\n';
149 /* Read in a 16-bit int, high byte first (network byte order). */
158 x
= (signed char) fgetc (fp
) << 8;
159 x
|= (fgetc (fp
) & 0xff);
163 const char * const metacharacters
= "*?[]\\";
165 /* Return nonzero if S contains any shell glob characters.
168 contains_metacharacter(const char *s
)
170 if (NULL
== strpbrk(s
, metacharacters
))
178 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
179 * until we reach DELIMITER or end-of-file. We reallocate the buffer
180 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
181 * is made regarding the content of the data (i.e. the implementation is
182 * 8-bit clean, the only delimiter is DELIMITER).
184 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
185 * has been removed from gnulib.
187 * We call the function locate_read_str() to avoid a name clash with the curses
191 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
197 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
202 needed
= offs
+ nread
+ 1;
205 char *pnew
= realloc(*buf
, needed
);
208 return -1; /* FAIL */
216 memcpy((*buf
)+offs
, p
, nread
);
224 lc_strcpy(char *dest
, const char *src
)
228 *dest
++ = TOLOWER(*src
);
236 VISIT_CONTINUE
= 1, /* please call the next visitor */
237 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
238 VISIT_REJECTED
= 4, /* rejected, process next file. */
239 VISIT_ABORT
= 8 /* rejected, process no more files. */
245 uintmax_t compressed_bytes
;
246 uintmax_t total_filename_count
;
247 uintmax_t total_filename_length
;
248 uintmax_t whitespace_count
;
249 uintmax_t newline_count
;
250 uintmax_t highbit_filename_count
;
252 static struct locate_stats statistics
;
262 struct regular_expression
269 typedef int (*visitfunc
)(const char *munged_filename
,
270 const char *original_filename
,
277 struct visitor
*next
;
281 static struct visitor
*inspectors
= NULL
;
282 static struct visitor
*lastinspector
= NULL
;
285 process_filename(const char *munged_filename
, const char *original_filename
)
287 int result
= VISIT_CONTINUE
;
288 const struct visitor
*p
= inspectors
;
290 while ( (VISIT_CONTINUE
== result
) && (NULL
!= p
) )
292 result
= (p
->inspector
)(munged_filename
, original_filename
, p
->context
);
296 if (VISIT_CONTINUE
== result
)
297 return VISIT_ACCEPTED
;
303 add_visitor(visitfunc fn
, void *context
)
305 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
307 p
->context
= context
;
310 if (NULL
== lastinspector
)
312 lastinspector
= inspectors
= p
;
316 lastinspector
->next
= p
;
324 visit_justprint(const char *munged_filename
, const char *original_filename
, void *context
)
327 (void) munged_filename
;
328 fputs(original_filename
, stdout
);
330 return VISIT_CONTINUE
;
334 visit_exists_follow(const char *munged_filename
,
335 const char *original_filename
, void *context
)
339 (void) munged_filename
;
341 /* munged_filename has been converted in some way (to lower case,
342 * or is just the base name of the file), and original_filename has not.
343 * Hence only original_filename is still actually the name of the file
344 * whose existence we would need to check.
346 if (stat(original_filename
, &st
) != 0)
348 return VISIT_REJECTED
;
352 return VISIT_CONTINUE
;
357 visit_exists_nofollow(const char *munged_filename
,
358 const char *original_filename
, void *context
)
362 (void) munged_filename
;
364 /* munged_filename has been converted in some way (to lower case,
365 * or is just the base name of the file), and original_filename has not.
366 * Hence only original_filename is still actually the name of the file
367 * whose existence we would need to check.
369 if (lstat(original_filename
, &st
) != 0)
371 return VISIT_REJECTED
;
375 return VISIT_CONTINUE
;
380 visit_substring_match_nocasefold(const char *munged_filename
, const char *original_filename
, void *context
)
382 const char *pattern
= context
;
383 (void) original_filename
;
385 if (NULL
!= strstr(munged_filename
, pattern
))
386 return VISIT_CONTINUE
;
388 return VISIT_REJECTED
;
392 visit_substring_match_casefold(const char *munged_filename
, const char *original_filename
, void *context
)
394 struct casefolder
* p
= context
;
395 size_t len
= strlen(munged_filename
);
397 (void) original_filename
;
398 if (len
+1 > p
->buffersize
)
400 p
->buffer
= xrealloc(p
->buffer
, len
+1); /* XXX: consider using extendbuf(). */
401 p
->buffersize
= len
+1;
403 lc_strcpy(p
->buffer
, munged_filename
);
406 if (NULL
!= strstr(p
->buffer
, p
->pattern
))
407 return VISIT_CONTINUE
;
409 return VISIT_REJECTED
;
414 visit_globmatch_nofold(const char *munged_filename
, const char *original_filename
, void *context
)
416 const char *glob
= context
;
417 (void) original_filename
;
418 if (fnmatch(glob
, munged_filename
, 0) != 0)
419 return VISIT_REJECTED
;
421 return VISIT_CONTINUE
;
426 visit_globmatch_casefold(const char *munged_filename
, const char *original_filename
, void *context
)
428 const char *glob
= context
;
429 (void) original_filename
;
430 if (fnmatch(glob
, munged_filename
, FNM_CASEFOLD
) != 0)
431 return VISIT_REJECTED
;
433 return VISIT_CONTINUE
;
438 visit_regex(const char *munged_filename
, const char *original_filename
, void *context
)
440 struct regular_expression
*p
= context
;
441 (void) original_filename
;
443 if (0 == regexec(&p
->re
, munged_filename
, 0u, NULL
, 0))
444 return VISIT_CONTINUE
; /* match */
446 return VISIT_REJECTED
; /* no match */
451 visit_stats(const char *munged_filename
, const char *original_filename
, void *context
)
453 struct locate_stats
*p
= context
;
454 size_t len
= strlen(original_filename
);
456 int highbit
, whitespace
, newline
;
457 (void) munged_filename
;
459 ++(p
->total_filename_count
);
460 p
->total_filename_length
+= len
;
462 highbit
= whitespace
= newline
= 0;
463 for (s
=original_filename
; *s
; ++s
)
465 if ( (int)(*s
) & 128 )
469 newline
= whitespace
= 1;
471 else if (isspace((unsigned char)*s
))
478 ++(p
->highbit_filename_count
);
480 ++(p
->whitespace_count
);
482 ++(p
->newline_count
);
484 return VISIT_CONTINUE
;
488 /* Emit the statistics.
491 print_stats(size_t database_file_size
)
493 char hbuf
[LONGEST_HUMAN_READABLE
+ 1];
495 printf(_("Locate database size: %s bytes\n"),
496 human_readable ((uintmax_t) database_file_size
,
497 hbuf
, human_ceiling
, 1, 1));
499 printf(_("Filenames: %s "),
500 human_readable (statistics
.total_filename_count
,
501 hbuf
, human_ceiling
, 1, 1));
502 printf(_("with a cumulative length of %s bytes"),
503 human_readable (statistics
.total_filename_length
,
504 hbuf
, human_ceiling
, 1, 1));
506 printf(_("\n\tof which %s contain whitespace, "),
507 human_readable (statistics
.whitespace_count
,
508 hbuf
, human_ceiling
, 1, 1));
509 printf(_("\n\t%s contain newline characters, "),
510 human_readable (statistics
.newline_count
,
511 hbuf
, human_ceiling
, 1, 1));
512 printf(_("\n\tand %s contain characters with the high bit set.\n"),
513 human_readable (statistics
.highbit_filename_count
,
514 hbuf
, human_ceiling
, 1, 1));
516 printf(_("Compression ratio %4.2f%%\n"),
517 100.0 * ((double)statistics
.total_filename_length
518 - (double) database_file_size
)
519 / (double) statistics
.total_filename_length
);
524 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
525 Return the number of entries printed. */
528 new_locate (char *pathpart
,
538 FILE *fp
; /* The pathname database. */
539 int c
; /* An input byte. */
540 int nread
; /* number of bytes read from an entry. */
541 char *path
; /* The current input database entry. */
542 const char *testpath
;
543 size_t pathsize
; /* Amount allocated for it. */
544 int count
= 0; /* The length of the prefix shared with the previous database entry. */
546 int old_format
= 0; /* true if reading a bigram-encoded database. */
548 /* for the old database format,
549 the first and second characters of the most common bigrams. */
550 char bigram1
[128], bigram2
[128];
552 /* number of items accepted (i.e. printed) */
553 unsigned long int items_accepted
= 0uL;
555 /* To check the age of the database. */
559 /* Set up the inspection regime */
561 lastinspector
= NULL
;
566 add_visitor(visit_stats
, &statistics
);
572 struct regular_expression
*p
= xmalloc(sizeof(*p
));
573 int cflags
= REG_EXTENDED
| REG_NOSUB
574 | (ignore_case
? REG_ICASE
: 0);
576 if (0 == regcomp(&p
->re
, pathpart
, cflags
))
578 add_visitor(visit_regex
, p
);
582 error (1, errno
, "Invalid regular expression; %s", pathpart
);
585 else if (contains_metacharacter(pathpart
))
588 add_visitor(visit_globmatch_casefold
, pathpart
);
590 add_visitor(visit_globmatch_nofold
, pathpart
);
594 /* No glob characters used. Hence we match on
595 * _any part_ of the filename, not just the
596 * basename. This seems odd to me, but it is the
597 * traditional behaviour.
598 * James Youngman <jay@gnu.org>
602 struct casefolder
* cf
= xmalloc(sizeof(*cf
));
603 cf
->pattern
= pathpart
;
606 add_visitor(visit_substring_match_casefold
, cf
);
610 add_visitor(visit_substring_match_nocasefold
, pathpart
);
614 /* We add visit_exists_*() as late as possible to reduce the
615 * number of stat() calls.
621 f
= visit_exists_follow
;
623 f
= visit_exists_nofollow
;
625 add_visitor(f
, NULL
);
630 add_visitor(visit_justprint
, NULL
);
634 if (stat (dbfile
, &st
) || (fp
= fopen (dbfile
, "r")) == NULL
)
636 error (0, errno
, "%s", dbfile
);
640 if (now
- st
.st_mtime
> WARN_SECONDS
)
643 warning: database `fred' is more than 8 days old */
644 error (0, 0, _("warning: database `%s' is more than %d %s old"),
645 dbfile
, WARN_NUMBER_UNITS
, _(warn_name_units
));
648 pathsize
= 1026; /* Increased as necessary by locate_read_str. */
649 path
= xmalloc (pathsize
);
651 nread
= fread (path
, 1, sizeof (LOCATEDB_MAGIC
), fp
);
652 if (nread
!= sizeof (LOCATEDB_MAGIC
)
653 || memcmp (path
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
656 /* Read the list of the most common bigrams in the database. */
658 for (i
= 0; i
< 128; i
++)
660 bigram1
[i
] = getc (fp
);
661 bigram2
[i
] = getc (fp
);
668 printf(_("Database %s is in the %s format.\n"),
670 old_format
? _("old") : "LOCATE02");
673 /* If we ignore case, convert it to lower first so we don't have to
676 if (!stats
&& ignore_case
)
678 lc_strcpy(pathpart
, pathpart
);
684 while ( (c
!= EOF
) && (!use_limit
|| (limit
> 0)) )
686 register char *s
; /* Scan the path we read in. */
690 /* Get the offset in the path where this path info starts. */
691 if (c
== LOCATEDB_OLD_ESCAPE
)
692 count
+= getw (fp
) - LOCATEDB_OLD_OFFSET
;
694 count
+= c
- LOCATEDB_OLD_OFFSET
;
696 /* Overlay the old path with the remainder of the new. */
697 for (s
= path
+ count
; (c
= getc (fp
)) > LOCATEDB_OLD_ESCAPE
;)
699 *s
++ = c
; /* An ordinary character. */
702 /* Bigram markers have the high bit set. */
711 if (c
== LOCATEDB_ESCAPE
)
712 count
+= (short)get_short (fp
);
718 if (count
> strlen(path
))
720 /* This should not happen generally , but since we're
721 * reading in data which is outside our control, we
724 error(1, 0, _("locate database `%s' is corrupt or invalid"), dbfile
);
727 /* Overlay the old path with the remainder of the new. */
728 nread
= locate_read_str (&path
, &pathsize
, fp
, 0, count
);
732 s
= path
+ count
+ nread
- 1; /* Move to the last char in path. */
733 assert (s
[0] != '\0');
734 assert (s
[1] == '\0'); /* Our terminator. */
735 assert (s
[2] == '\0'); /* Added by locate_read_str. */
738 testpath
= basename_only
? base_name(path
) : path
;
739 if (VISIT_ACCEPTED
== process_filename(testpath
, path
))
741 if ((++items_accepted
>= limit
) && use_limit
)
751 print_stats(st
.st_size
);
756 error (0, errno
, "%s", dbfile
);
759 if (fclose (fp
) == EOF
)
761 error (0, errno
, "%s", dbfile
);
765 return items_accepted
;
771 extern char *version_string
;
773 /* The name this program was run with. */
780 fprintf (stream
, _("\
781 Usage: %s [-d path | --database=path] [-e | --existing]\n\
782 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
783 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
784 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
785 [-r | --regex ] [--version] [--help] pattern...\n"),
787 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
790 static struct option
const longopts
[] =
792 {"database", required_argument
, NULL
, 'd'},
793 {"existing", no_argument
, NULL
, 'e'},
794 {"ignore-case", no_argument
, NULL
, 'i'},
795 {"help", no_argument
, NULL
, 'h'},
796 {"version", no_argument
, NULL
, 'v'},
797 {"null", no_argument
, NULL
, '0'},
798 {"count", no_argument
, NULL
, 'c'},
799 {"wholename", no_argument
, NULL
, 'w'},
800 {"wholepath", no_argument
, NULL
, 'w'}, /* Synonym. */
801 {"basename", no_argument
, NULL
, 'b'},
802 {"stdio", no_argument
, NULL
, 's'},
803 {"mmap", no_argument
, NULL
, 'm'},
804 {"limit", required_argument
, NULL
, 'l'},
805 {"regex", no_argument
, NULL
, 'r'},
806 {"statistics", no_argument
, NULL
, 'S'},
807 {"follow", no_argument
, NULL
, 'L'},
808 {"nofollow", no_argument
, NULL
, 'P'},
809 {NULL
, no_argument
, NULL
, 0}
818 unsigned long int found
= 0uL;
823 int basename_only
= 0;
829 program_name
= argv
[0];
831 #ifdef HAVE_SETLOCALE
832 setlocale (LC_ALL
, "");
834 bindtextdomain (PACKAGE
, LOCALEDIR
);
835 textdomain (PACKAGE
);
836 atexit (close_stdout
);
838 dbpath
= getenv ("LOCATE_PATH");
844 while ((optc
= getopt_long (argc
, argv
, "bcd:eil:rsm0SwHPL", longopts
, (int *) 0)) != -1)
877 printf (_("GNU locate version %s\n"), version_string
);
896 /* In find, -P and -H differ in the way they handle paths
897 * given on the command line. This is not relevant for
898 * locate, but the -H option is supported because it is
899 * probably more intuitive to do so.
909 strtol_error err
= xstrtoumax(optarg
, &end
, 10, &limit
, NULL
);
910 if (LONGINT_OK
!= err
)
912 STRTOL_FATAL_ERROR(optarg
, _("argument to --limit"), err
);
918 case 's': /* use stdio */
919 case 'm': /* use mmap */
920 /* These options are implemented simply for
921 * compatibility with FreeBSD
944 for (; stats
|| optind
< argc
; optind
++)
948 next_element (dbpath
, 0); /* Initialize. */
949 needle
= stats
? NULL
: argv
[optind
];
950 while ((e
= next_element ((char *) NULL
, 0)) != NULL
)
952 statistics
.compressed_bytes
=
953 statistics
.total_filename_count
=
954 statistics
.total_filename_length
=
955 statistics
.whitespace_count
=
956 statistics
.newline_count
=
957 statistics
.highbit_filename_count
= 0u;
959 if (0 == strlen(e
) || 0 == strcmp(e
, "."))
961 /* Use the default database name instead (note: we
962 * don't use 'dbpath' since that might itself contain a
963 * colon-separated list.
968 found
+= new_locate (needle
, e
, ignore_case
, print
, basename_only
, use_limit
, limit
, stats
, regex
);
976 printf("%ld\n", found
);
979 if (found
|| (use_limit
&& (limit
==0)) || stats
)