1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.org>. */
55 #include <sys/types.h>
65 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
88 # define _(Text) gettext (Text)
91 #define textdomain(Domain)
92 #define bindtextdomain(Package, Directory)
95 # define N_(String) gettext_noop (String)
97 /* We used to use (String) instead of just String, but apparentl;y ISO C
98 * doesn't allow this (at least, that's what HP said when someone reported
99 * this as a compiler bug). This is HP case number 1205608192. See
100 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
101 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
102 * like: static const char buf[] = ("string");
104 # define N_(String) String
107 #include "locatedb.h"
109 #include "../gnulib/lib/xalloc.h"
110 #include "../gnulib/lib/error.h"
111 #include "../gnulib/lib/human.h"
113 #include "closeout.h"
115 /* Note that this evaluates C many times. */
117 # define TOUPPER(Ch) toupper (Ch)
118 # define TOLOWER(Ch) tolower (Ch)
120 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
121 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
124 /* typedef enum {false, true} boolean; */
126 /* Warn if a database is older than this. 8 days allows for a weekly
127 update that takes up to a day to perform. */
128 #define WARN_NUMBER_UNITS (8)
129 /* Printable name of units used in WARN_SECONDS */
130 static const char warn_name_units
[] = N_("days");
131 #define SECONDS_PER_UNIT (60 * 60 * 24)
133 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
135 /* Check for existence of files before printing them out? */
136 int check_existence
= 0;
138 /* What to separate the results with. */
139 static int separator
= '\n';
141 char *next_element (char *path
, int curdir_ok
);
144 /* Read in a 16-bit int, high byte first (network byte order). */
153 x
= (signed char) fgetc (fp
) << 8;
154 x
|= (fgetc (fp
) & 0xff);
158 const char * const metacharacters
= "*?[]\\";
160 /* Return nonzero if S contains any shell glob characters.
163 contains_metacharacter(const char *s
)
165 if (NULL
== strpbrk(s
, metacharacters
))
173 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
174 * until we reach DELIMITER or end-of-file. We reallocate the buffer
175 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
176 * is made regarding the content of the data (i.e. the implementation is
177 * 8-bit clean, the only delimiter is DELIMITER).
179 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
180 * has been removed from gnulib.
182 * We call the function locate_read_str() to avoid a name clash with the curses
186 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
192 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
197 needed
= offs
+ nread
+ 1;
200 char *pnew
= realloc(*buf
, needed
);
203 return -1; /* FAIL */
211 memcpy((*buf
)+offs
, p
, nread
);
219 lc_strcpy(char *dest
, const char *src
)
223 *dest
++ = TOLOWER(*src
);
231 VISIT_CONTINUE
= 1, /* please call the next visitor */
232 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
233 VISIT_REJECTED
= 4, /* rejected, process next file. */
234 VISIT_ABORT
= 8 /* rejected, process no more files. */
240 uintmax_t compressed_bytes
;
241 uintmax_t total_filename_count
;
242 uintmax_t total_filename_length
;
243 uintmax_t whitespace_count
;
244 uintmax_t newline_count
;
245 uintmax_t highbit_filename_count
;
247 static struct locate_stats statistics
;
259 typedef int (*visitfunc
)(const char *tested_filename
,
260 const char *printed_filename
,
267 struct visitor
*next
;
271 static struct visitor
*inspectors
= NULL
;
272 static struct visitor
*lastinspector
= NULL
;
275 process_filename(const char *tested_filename
, const char *printed_filename
)
277 int result
= VISIT_CONTINUE
;
278 const struct visitor
*p
= inspectors
;
280 while ( (VISIT_CONTINUE
== result
) && (NULL
!= p
) )
282 result
= (p
->inspector
)(tested_filename
, printed_filename
, p
->context
);
286 if (VISIT_CONTINUE
== result
)
287 return VISIT_ACCEPTED
;
293 add_visitor(visitfunc fn
, void *context
)
295 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
297 p
->context
= context
;
300 if (NULL
== lastinspector
)
302 lastinspector
= inspectors
= p
;
306 lastinspector
->next
= p
;
314 visit_justprint(const char *testname
, const char *printname
, void *context
)
318 fputs(printname
, stdout
);
320 return VISIT_CONTINUE
;
324 visit_exists(const char *testname
, const char *printname
, void *context
)
330 /* testname has been converted in some way (to lower case,
331 * or is just the basename of the file), and printname has not.
332 * Hence only printname is still actually the name of the file
333 * whose existence we would need to check.
335 if (stat(printname
, &st
) != 0)
337 return VISIT_REJECTED
;
341 return VISIT_CONTINUE
;
346 visit_substring_match_nocasefold(const char *testname
, const char *printname
, void *context
)
348 const char *pattern
= context
;
351 if (NULL
!= strstr(testname
, pattern
))
352 return VISIT_CONTINUE
;
354 return VISIT_REJECTED
;
358 visit_substring_match_casefold(const char *testname
, const char *printname
, void *context
)
360 struct casefolder
* p
= context
;
361 size_t len
= strlen(testname
);
364 if (len
+1 > p
->buffersize
)
366 p
->buffer
= xrealloc(p
->buffer
, len
+1); /* XXX: consider using extendbuf(). */
367 p
->buffersize
= len
+1;
369 lc_strcpy(p
->buffer
, testname
);
372 if (NULL
!= strstr(p
->buffer
, p
->pattern
))
373 return VISIT_CONTINUE
;
375 return VISIT_REJECTED
;
380 visit_globmatch_nofold(const char *testname
, const char *printname
, void *context
)
382 const char *glob
= context
;
384 if (fnmatch(glob
, testname
, 0) != 0)
385 return VISIT_REJECTED
;
387 return VISIT_CONTINUE
;
392 visit_globmatch_casefold(const char *testname
, const char *printname
, void *context
)
394 const char *glob
= context
;
396 if (fnmatch(glob
, testname
, FNM_CASEFOLD
) != 0)
397 return VISIT_REJECTED
;
399 return VISIT_CONTINUE
;
403 visit_stats(const char *testname
, const char *printname
, void *context
)
405 struct locate_stats
*p
= context
;
406 size_t len
= strlen(printname
);
408 int highbit
, whitespace
, newline
;
410 ++(p
->total_filename_count
);
411 p
->total_filename_length
+= len
;
413 highbit
= whitespace
= newline
= 0;
414 for (s
=printname
; *s
; ++s
)
416 if ( (int)(*s
) & 128 )
420 newline
= whitespace
= 1;
422 else if (isspace((unsigned char)*s
))
429 ++(p
->highbit_filename_count
);
431 ++(p
->whitespace_count
);
433 ++(p
->newline_count
);
435 return VISIT_CONTINUE
;
440 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
441 Return the number of entries printed. */
444 new_locate (char *pathpart
,
453 char hbuf
[LONGEST_HUMAN_READABLE
+ 1];
454 FILE *fp
; /* The pathname database. */
455 int c
; /* An input byte. */
456 int nread
; /* number of bytes read from an entry. */
457 char *path
; /* The current input database entry. */
458 const char *testpath
;
459 size_t pathsize
; /* Amount allocated for it. */
460 int count
= 0; /* The length of the prefix shared with the previous database entry. */
462 int old_format
= 0; /* true if reading a bigram-encoded database. */
466 /* for the old database format,
467 the first and second characters of the most common bigrams. */
468 char bigram1
[128], bigram2
[128];
470 /* number of items accepted (i.e. printed) */
471 unsigned long int items_accepted
= 0uL;
473 /* To check the age of the database. */
477 /* Set up the inspection regime */
479 lastinspector
= NULL
;
484 add_visitor(visit_stats
, &statistics
);
488 if (contains_metacharacter(pathpart
))
491 add_visitor(visit_globmatch_casefold
, pathpart
);
493 add_visitor(visit_globmatch_nofold
, pathpart
);
497 /* No glob characters used. Hence we match on
498 * _any part_ of the filename, not just the
499 * basename. This seems odd to me, but it is the
500 * traditional behaviour.
501 * James Youngman <jay@gnu.org>
505 struct casefolder
* cf
= xmalloc(sizeof(*cf
));
506 cf
->pattern
= pathpart
;
509 add_visitor(visit_substring_match_casefold
, cf
);
513 add_visitor(visit_substring_match_nocasefold
, pathpart
);
517 /* We add visit_exists() as late as possible to reduce the
518 * number of stat() calls. (Idea by Bas van Gompel).
521 add_visitor(visit_exists
, NULL
);
524 add_visitor(visit_justprint
, NULL
);
528 if (stat (dbfile
, &st
) || (fp
= fopen (dbfile
, "r")) == NULL
)
530 error (0, errno
, "%s", dbfile
);
534 if (now
- st
.st_mtime
> WARN_SECONDS
)
537 warning: database `fred' is more than 8 days old */
538 error (0, 0, _("warning: database `%s' is more than %d %s old"),
539 dbfile
, WARN_NUMBER_UNITS
, _(warn_name_units
));
542 pathsize
= 1026; /* Increased as necessary by locate_read_str. */
543 path
= xmalloc (pathsize
);
545 nread
= fread (path
, 1, sizeof (LOCATEDB_MAGIC
), fp
);
546 if (nread
!= sizeof (LOCATEDB_MAGIC
)
547 || memcmp (path
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
550 /* Read the list of the most common bigrams in the database. */
552 for (i
= 0; i
< 128; i
++)
554 bigram1
[i
] = getc (fp
);
555 bigram2
[i
] = getc (fp
);
562 printf(_("Database %s is in the %s format.\n"),
564 old_format
? _("old") : "LOCATE02");
567 /* If we ignore case, convert it to lower first so we don't have to
570 if (!stats
&& ignore_case
)
572 lc_strcpy(pathpart
, pathpart
);
578 while ( (c
!= EOF
) && (!use_limit
|| (limit
> 0)) )
580 register char *s
; /* Scan the path we read in. */
584 /* Get the offset in the path where this path info starts. */
585 if (c
== LOCATEDB_OLD_ESCAPE
)
586 count
+= getw (fp
) - LOCATEDB_OLD_OFFSET
;
588 count
+= c
- LOCATEDB_OLD_OFFSET
;
590 /* Overlay the old path with the remainder of the new. */
591 for (s
= path
+ count
; (c
= getc (fp
)) > LOCATEDB_OLD_ESCAPE
;)
593 *s
++ = c
; /* An ordinary character. */
596 /* Bigram markers have the high bit set. */
605 if (c
== LOCATEDB_ESCAPE
)
606 count
+= (short)get_short (fp
);
612 if (count
> strlen(path
))
614 /* This should not happen generally , but since we're
615 * reading in data which is outside our control, we
618 error(1, 0, _("locate database `%s' is corrupt or invalid"), dbfile
);
621 /* Overlay the old path with the remainder of the new. */
622 nread
= locate_read_str (&path
, &pathsize
, fp
, 0, count
);
626 s
= path
+ count
+ nread
- 1; /* Move to the last char in path. */
627 assert (s
[0] != '\0');
628 assert (s
[1] == '\0'); /* Our terminator. */
629 assert (s
[2] == '\0'); /* Added by locate_read_str. */
632 testpath
= basename_only
? base_name(path
) : path
;
633 if (VISIT_ACCEPTED
== process_filename(testpath
, path
))
635 if ((++items_accepted
>= limit
) && use_limit
)
645 printf(_("Locate database size: %s bytes\n"),
646 human_readable ((uintmax_t) st
.st_size
,
647 hbuf
, human_ceiling
, 1, 1));
649 printf(_("Filenames: %s "),
650 human_readable (statistics
.total_filename_count
,
651 hbuf
, human_ceiling
, 1, 1));
652 printf(_("with a cumulative length of %s bytes"),
653 human_readable (statistics
.total_filename_length
,
654 hbuf
, human_ceiling
, 1, 1));
656 printf(_("\n\tof which %s contain whitespace, "),
657 human_readable (statistics
.whitespace_count
,
658 hbuf
, human_ceiling
, 1, 1));
659 printf(_("\n\t%s contain newline characters, "),
660 human_readable (statistics
.newline_count
,
661 hbuf
, human_ceiling
, 1, 1));
662 printf(_("\n\tand %s contain characters with the high bit set.\n"),
663 human_readable (statistics
.highbit_filename_count
,
664 hbuf
, human_ceiling
, 1, 1));
666 printf(_("Compression ratio %4.2f%%\n"),
667 100.0 * ((double)statistics
.total_filename_length
668 - (double) st
.st_size
)
669 / (double) statistics
.total_filename_length
);
675 error (0, errno
, "%s", dbfile
);
678 if (fclose (fp
) == EOF
)
680 error (0, errno
, "%s", dbfile
);
684 return items_accepted
;
690 extern char *version_string
;
692 /* The name this program was run with. */
699 fprintf (stream
, _("\
700 Usage: %s [-d path | --database=path] [-e | --existing]\n\
701 [-i | --ignore-case] [--wholepath] [--basename] [--limit=N | -l N]\n\
702 [--version] [--help] pattern...\n"),
704 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
707 static struct option
const longopts
[] =
709 {"database", required_argument
, NULL
, 'd'},
710 {"existing", no_argument
, NULL
, 'e'},
711 {"ignore-case", no_argument
, NULL
, 'i'},
712 {"help", no_argument
, NULL
, 'h'},
713 {"version", no_argument
, NULL
, 'v'},
714 {"null", no_argument
, NULL
, '0'},
715 {"count", no_argument
, NULL
, 'c'},
716 {"wholename", no_argument
, NULL
, 'w'},
717 {"basename", no_argument
, NULL
, 'b'},
718 {"stdio", no_argument
, NULL
, 's'},
719 {"mmap", no_argument
, NULL
, 'm'},
720 {"limit", required_argument
, NULL
, 'l'},
721 {"statistics", no_argument
, NULL
, 'S'},
722 {NULL
, no_argument
, NULL
, 0}
731 unsigned long int found
= 0uL;
736 int basename_only
= 0;
741 program_name
= argv
[0];
743 #ifdef HAVE_SETLOCALE
744 setlocale (LC_ALL
, "");
746 bindtextdomain (PACKAGE
, LOCALEDIR
);
747 textdomain (PACKAGE
);
748 atexit (close_stdout
);
750 dbpath
= getenv ("LOCATE_PATH");
756 while ((optc
= getopt_long (argc
, argv
, "bcd:eil:sm0S", longopts
, (int *) 0)) != -1)
789 printf (_("GNU locate version %s\n"), version_string
);
803 strtol_error err
= xstrtoumax(optarg
, &end
, 10, &limit
, NULL
);
804 if (LONGINT_OK
!= err
)
806 STRTOL_FATAL_ERROR(optarg
, _("argument to --limit"), err
);
812 case 's': /* use stdio */
813 case 'm': /* use mmap */
814 /* These options are implemented simply for
815 * compatibility with FreeBSD
838 for (; stats
|| optind
< argc
; optind
++)
842 next_element (dbpath
, 0); /* Initialize. */
843 needle
= stats
? NULL
: argv
[optind
];
844 while ((e
= next_element ((char *) NULL
, 0)) != NULL
)
846 statistics
.compressed_bytes
=
847 statistics
.total_filename_count
=
848 statistics
.total_filename_length
=
849 statistics
.whitespace_count
=
850 statistics
.newline_count
=
851 statistics
.highbit_filename_count
= 0u;
853 if (0 == strlen(e
) || 0 == strcmp(e
, "."))
855 /* Use the default database name instead (note: we
856 * don't use 'dbpath' since that might itself contain a
857 * colon-separated list.
862 found
+= new_locate (needle
, e
, ignore_case
, print
, basename_only
, use_limit
, limit
, stats
);
870 printf("%ld\n", found
);
873 if (found
|| (use_limit
&& (limit
==0)) || stats
)