Ensure that the new long options have a relevang short option too.
[findutils.git] / locate / locate.c
blobf0b356fdf9aba070f74d5642da7e79e0d8a07025
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
17 USA.
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
44 matching routines.
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.org>. */
52 #include <config.h>
53 #include <stdio.h>
54 #include <ctype.h>
55 #include <sys/types.h>
56 #include <sys/stat.h>
57 #include <time.h>
58 #include <fnmatch.h>
59 #include <getopt.h>
60 #include <xstrtol.h>
62 #define NDEBUG
63 #include <assert.h>
65 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
66 #include <string.h>
67 #else
68 #include <strings.h>
69 #define strchr index
70 #endif
72 #ifdef STDC_HEADERS
73 #include <stdlib.h>
74 #endif
76 #ifdef HAVE_ERRNO_H
77 #include <errno.h>
78 #else
79 extern int errno;
80 #endif
82 #ifdef HAVE_LOCALE_H
83 #include <locale.h>
84 #endif
86 #if ENABLE_NLS
87 # include <libintl.h>
88 # define _(Text) gettext (Text)
89 #else
90 # define _(Text) Text
91 #define textdomain(Domain)
92 #define bindtextdomain(Package, Directory)
93 #endif
94 #ifdef gettext_noop
95 # define N_(String) gettext_noop (String)
96 #else
97 /* We used to use (String) instead of just String, but apparentl;y ISO C
98 * doesn't allow this (at least, that's what HP said when someone reported
99 * this as a compiler bug). This is HP case number 1205608192. See
100 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
101 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
102 * like: static const char buf[] = ("string");
104 # define N_(String) String
105 #endif
107 #include "locatedb.h"
108 #include <getline.h>
109 #include "../gnulib/lib/xalloc.h"
110 #include "../gnulib/lib/error.h"
111 #include "../gnulib/lib/human.h"
112 #include "dirname.h"
113 #include "closeout.h"
115 /* Note that this evaluates C many times. */
116 #ifdef _LIBC
117 # define TOUPPER(Ch) toupper (Ch)
118 # define TOLOWER(Ch) tolower (Ch)
119 #else
120 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
121 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
122 #endif
124 /* typedef enum {false, true} boolean; */
126 /* Warn if a database is older than this. 8 days allows for a weekly
127 update that takes up to a day to perform. */
128 #define WARN_NUMBER_UNITS (8)
129 /* Printable name of units used in WARN_SECONDS */
130 static const char warn_name_units[] = N_("days");
131 #define SECONDS_PER_UNIT (60 * 60 * 24)
133 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
135 /* Check for existence of files before printing them out? */
136 int check_existence = 0;
138 /* What to separate the results with. */
139 static int separator = '\n';
141 char *next_element (char *path, int curdir_ok);
144 /* Read in a 16-bit int, high byte first (network byte order). */
146 static short
147 get_short (fp)
148 FILE *fp;
151 register short x;
153 x = (signed char) fgetc (fp) << 8;
154 x |= (fgetc (fp) & 0xff);
155 return x;
158 const char * const metacharacters = "*?[]\\";
160 /* Return nonzero if S contains any shell glob characters.
162 static int
163 contains_metacharacter(const char *s)
165 if (NULL == strpbrk(s, metacharacters))
166 return 0;
167 else
168 return 1;
171 /* locate_read_str()
173 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
174 * until we reach DELIMITER or end-of-file. We reallocate the buffer
175 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
176 * is made regarding the content of the data (i.e. the implementation is
177 * 8-bit clean, the only delimiter is DELIMITER).
179 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
180 * has been removed from gnulib.
182 * We call the function locate_read_str() to avoid a name clash with the curses
183 * function getstr().
185 static int
186 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
188 char * p = NULL;
189 size_t sz = 0;
190 int needed, nread;
192 nread = getdelim(&p, &sz, delimiter, fp);
193 if (nread >= 0)
195 assert(p != NULL);
197 needed = offs + nread + 1;
198 if (needed > (*siz))
200 char *pnew = realloc(*buf, needed);
201 if (NULL == pnew)
203 return -1; /* FAIL */
205 else
207 *siz = needed;
208 *buf = pnew;
211 memcpy((*buf)+offs, p, nread);
212 free(p);
214 return nread;
218 static void
219 lc_strcpy(char *dest, const char *src)
221 while (*src)
223 *dest++ = TOLOWER(*src);
224 ++src;
226 *dest = 0;
229 enum visit_result
231 VISIT_CONTINUE = 1, /* please call the next visitor */
232 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
233 VISIT_REJECTED = 4, /* rejected, process next file. */
234 VISIT_ABORT = 8 /* rejected, process no more files. */
238 struct locate_stats
240 uintmax_t compressed_bytes;
241 uintmax_t total_filename_count;
242 uintmax_t total_filename_length;
243 uintmax_t whitespace_count;
244 uintmax_t newline_count;
245 uintmax_t highbit_filename_count;
247 static struct locate_stats statistics;
250 struct casefolder
252 const char *pattern;
253 char *buffer;
254 size_t buffersize;
259 typedef int (*visitfunc)(const char *munged_filename,
260 const char *original_filename,
261 void *context);
263 struct visitor
265 visitfunc inspector;
266 void * context;
267 struct visitor *next;
271 static struct visitor *inspectors = NULL;
272 static struct visitor *lastinspector = NULL;
274 static int
275 process_filename(const char *munged_filename, const char *original_filename)
277 int result = VISIT_CONTINUE;
278 const struct visitor *p = inspectors;
280 while ( (VISIT_CONTINUE == result) && (NULL != p) )
282 result = (p->inspector)(munged_filename, original_filename, p->context);
283 p = p->next;
286 if (VISIT_CONTINUE == result)
287 return VISIT_ACCEPTED;
288 else
289 return result;
292 static void
293 add_visitor(visitfunc fn, void *context)
295 struct visitor *p = xmalloc(sizeof(struct visitor));
296 p->inspector = fn;
297 p->context = context;
298 p->next = NULL;
300 if (NULL == lastinspector)
302 lastinspector = inspectors = p;
304 else
306 lastinspector->next = p;
307 lastinspector = p;
313 static int
314 visit_justprint(const char *munged_filename, const char *original_filename, void *context)
316 (void) context;
317 (void) munged_filename;
318 fputs(original_filename, stdout);
319 putchar(separator);
320 return VISIT_CONTINUE;
323 static int
324 visit_exists(const char *munged_filename, const char *original_filename, void *context)
326 struct stat st;
327 (void) context;
328 (void) munged_filename;
330 /* munged_filename has been converted in some way (to lower case,
331 * or is just the basename of the file), and original_filename has not.
332 * Hence only original_filename is still actually the name of the file
333 * whose existence we would need to check.
335 if (stat(original_filename, &st) != 0)
337 return VISIT_REJECTED;
339 else
341 return VISIT_CONTINUE;
345 static int
346 visit_substring_match_nocasefold(const char *munged_filename, const char *original_filename, void *context)
348 const char *pattern = context;
349 (void) original_filename;
351 if (NULL != strstr(munged_filename, pattern))
352 return VISIT_CONTINUE;
353 else
354 return VISIT_REJECTED;
357 static int
358 visit_substring_match_casefold(const char *munged_filename, const char *original_filename, void *context)
360 struct casefolder * p = context;
361 size_t len = strlen(munged_filename);
363 (void) original_filename;
364 if (len+1 > p->buffersize)
366 p->buffer = xrealloc(p->buffer, len+1); /* XXX: consider using extendbuf(). */
367 p->buffersize = len+1;
369 lc_strcpy(p->buffer, munged_filename);
372 if (NULL != strstr(p->buffer, p->pattern))
373 return VISIT_CONTINUE;
374 else
375 return VISIT_REJECTED;
379 static int
380 visit_globmatch_nofold(const char *munged_filename, const char *original_filename, void *context)
382 const char *glob = context;
383 (void) original_filename;
384 if (fnmatch(glob, munged_filename, 0) != 0)
385 return VISIT_REJECTED;
386 else
387 return VISIT_CONTINUE;
391 static int
392 visit_globmatch_casefold(const char *munged_filename, const char *original_filename, void *context)
394 const char *glob = context;
395 (void) original_filename;
396 if (fnmatch(glob, munged_filename, FNM_CASEFOLD) != 0)
397 return VISIT_REJECTED;
398 else
399 return VISIT_CONTINUE;
402 static int
403 visit_stats(const char *munged_filename, const char *original_filename, void *context)
405 struct locate_stats *p = context;
406 size_t len = strlen(original_filename);
407 const char *s;
408 int highbit, whitespace, newline;
410 ++(p->total_filename_count);
411 p->total_filename_length += len;
413 highbit = whitespace = newline = 0;
414 for (s=original_filename; *s; ++s)
416 if ( (int)(*s) & 128 )
417 highbit = 1;
418 if ('\n' == *s)
420 newline = whitespace = 1;
422 else if (isspace((unsigned char)*s))
424 whitespace = 1;
428 if (highbit)
429 ++(p->highbit_filename_count);
430 if (whitespace)
431 ++(p->whitespace_count);
432 if (newline)
433 ++(p->newline_count);
435 return VISIT_CONTINUE;
440 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
441 Return the number of entries printed. */
443 static unsigned long
444 new_locate (char *pathpart,
445 char *dbfile,
446 int ignore_case,
447 int enable_print,
448 int basename_only,
449 int use_limit,
450 uintmax_t limit,
451 int stats)
453 char hbuf[LONGEST_HUMAN_READABLE + 1];
454 FILE *fp; /* The pathname database. */
455 int c; /* An input byte. */
456 int nread; /* number of bytes read from an entry. */
457 char *path; /* The current input database entry. */
458 const char *testpath;
459 size_t pathsize; /* Amount allocated for it. */
460 int count = 0; /* The length of the prefix shared with the previous database entry. */
462 int old_format = 0; /* true if reading a bigram-encoded database. */
466 /* for the old database format,
467 the first and second characters of the most common bigrams. */
468 char bigram1[128], bigram2[128];
470 /* number of items accepted (i.e. printed) */
471 unsigned long int items_accepted = 0uL;
473 /* To check the age of the database. */
474 struct stat st;
475 time_t now;
477 /* Set up the inspection regime */
478 inspectors = NULL;
479 lastinspector = NULL;
481 if (stats)
483 assert(!use_limit);
484 add_visitor(visit_stats, &statistics);
486 else
488 if (contains_metacharacter(pathpart))
490 if (ignore_case)
491 add_visitor(visit_globmatch_casefold, pathpart);
492 else
493 add_visitor(visit_globmatch_nofold, pathpart);
495 else
497 /* No glob characters used. Hence we match on
498 * _any part_ of the filename, not just the
499 * basename. This seems odd to me, but it is the
500 * traditional behaviour.
501 * James Youngman <jay@gnu.org>
503 if (ignore_case)
505 struct casefolder * cf = xmalloc(sizeof(*cf));
506 cf->pattern = pathpart;
507 cf->buffer = NULL;
508 cf->buffersize = 0;
509 add_visitor(visit_substring_match_casefold, cf);
511 else
513 add_visitor(visit_substring_match_nocasefold, pathpart);
517 /* We add visit_exists() as late as possible to reduce the
518 * number of stat() calls.
520 if (check_existence)
521 add_visitor(visit_exists, NULL);
523 if (enable_print)
524 add_visitor(visit_justprint, NULL);
528 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
530 error (0, errno, "%s", dbfile);
531 return 0;
533 time(&now);
534 if (now - st.st_mtime > WARN_SECONDS)
536 /* For example:
537 warning: database `fred' is more than 8 days old */
538 error (0, 0, _("warning: database `%s' is more than %d %s old"),
539 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
542 pathsize = 1026; /* Increased as necessary by locate_read_str. */
543 path = xmalloc (pathsize);
545 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
546 if (nread != sizeof (LOCATEDB_MAGIC)
547 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
549 int i;
550 /* Read the list of the most common bigrams in the database. */
551 fseek (fp, 0, 0);
552 for (i = 0; i < 128; i++)
554 bigram1[i] = getc (fp);
555 bigram2[i] = getc (fp);
557 old_format = 1;
560 if (stats)
562 printf(_("Database %s is in the %s format.\n"),
563 dbfile,
564 old_format ? _("old") : "LOCATE02");
567 /* If we ignore case, convert it to lower first so we don't have to
568 * do it every time
570 if (!stats && ignore_case)
572 lc_strcpy(pathpart, pathpart);
575 items_accepted = 0;
577 c = getc (fp);
578 while ( (c != EOF) && (!use_limit || (limit > 0)) )
580 register char *s; /* Scan the path we read in. */
582 if (old_format)
584 /* Get the offset in the path where this path info starts. */
585 if (c == LOCATEDB_OLD_ESCAPE)
586 count += getw (fp) - LOCATEDB_OLD_OFFSET;
587 else
588 count += c - LOCATEDB_OLD_OFFSET;
590 /* Overlay the old path with the remainder of the new. */
591 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
592 if (c < 0200)
593 *s++ = c; /* An ordinary character. */
594 else
596 /* Bigram markers have the high bit set. */
597 c &= 0177;
598 *s++ = bigram1[c];
599 *s++ = bigram2[c];
601 *s-- = '\0';
603 else
605 if (c == LOCATEDB_ESCAPE)
606 count += (short)get_short (fp);
607 else if (c > 127)
608 count += c - 256;
609 else
610 count += c;
612 if (count > strlen(path))
614 /* This should not happen generally , but since we're
615 * reading in data which is outside our control, we
616 * cannot prevent it.
618 error(1, 0, _("locate database `%s' is corrupt or invalid"), dbfile);
621 /* Overlay the old path with the remainder of the new. */
622 nread = locate_read_str (&path, &pathsize, fp, 0, count);
623 if (nread < 0)
624 break;
625 c = getc (fp);
626 s = path + count + nread - 1; /* Move to the last char in path. */
627 assert (s[0] != '\0');
628 assert (s[1] == '\0'); /* Our terminator. */
629 assert (s[2] == '\0'); /* Added by locate_read_str. */
632 testpath = basename_only ? base_name(path) : path;
633 if (VISIT_ACCEPTED == process_filename(testpath, path))
635 if ((++items_accepted >= limit) && use_limit)
637 break;
643 if (stats)
645 printf(_("Locate database size: %s bytes\n"),
646 human_readable ((uintmax_t) st.st_size,
647 hbuf, human_ceiling, 1, 1));
649 printf(_("Filenames: %s "),
650 human_readable (statistics.total_filename_count,
651 hbuf, human_ceiling, 1, 1));
652 printf(_("with a cumulative length of %s bytes"),
653 human_readable (statistics.total_filename_length,
654 hbuf, human_ceiling, 1, 1));
656 printf(_("\n\tof which %s contain whitespace, "),
657 human_readable (statistics.whitespace_count,
658 hbuf, human_ceiling, 1, 1));
659 printf(_("\n\t%s contain newline characters, "),
660 human_readable (statistics.newline_count,
661 hbuf, human_ceiling, 1, 1));
662 printf(_("\n\tand %s contain characters with the high bit set.\n"),
663 human_readable (statistics.highbit_filename_count,
664 hbuf, human_ceiling, 1, 1));
666 printf(_("Compression ratio %4.2f%%\n"),
667 100.0 * ((double)statistics.total_filename_length
668 - (double) st.st_size)
669 / (double) statistics.total_filename_length);
670 printf("\n");
673 if (ferror (fp))
675 error (0, errno, "%s", dbfile);
676 return 0;
678 if (fclose (fp) == EOF)
680 error (0, errno, "%s", dbfile);
681 return 0;
684 return items_accepted;
690 extern char *version_string;
692 /* The name this program was run with. */
693 char *program_name;
695 static void
696 usage (stream)
697 FILE *stream;
699 fprintf (stream, _("\
700 Usage: %s [-d path | --database=path] [-e | --existing]\n\
701 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
702 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
703 [--version] [--help] pattern...\n"),
704 program_name);
705 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
708 static struct option const longopts[] =
710 {"database", required_argument, NULL, 'd'},
711 {"existing", no_argument, NULL, 'e'},
712 {"ignore-case", no_argument, NULL, 'i'},
713 {"help", no_argument, NULL, 'h'},
714 {"version", no_argument, NULL, 'v'},
715 {"null", no_argument, NULL, '0'},
716 {"count", no_argument, NULL, 'c'},
717 {"wholename", no_argument, NULL, 'w'},
718 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
719 {"basename", no_argument, NULL, 'b'},
720 {"stdio", no_argument, NULL, 's'},
721 {"mmap", no_argument, NULL, 'm'},
722 {"limit", required_argument, NULL, 'l'},
723 {"statistics", no_argument, NULL, 'S'},
724 {NULL, no_argument, NULL, 0}
728 main (argc, argv)
729 int argc;
730 char **argv;
732 char *dbpath;
733 unsigned long int found = 0uL;
734 int optc;
735 int ignore_case = 0;
736 int print = 1;
737 int just_count = 0;
738 int basename_only = 0;
739 uintmax_t limit = 0;
740 int use_limit = 0;
741 int stats = 0;
743 program_name = argv[0];
745 #ifdef HAVE_SETLOCALE
746 setlocale (LC_ALL, "");
747 #endif
748 bindtextdomain (PACKAGE, LOCALEDIR);
749 textdomain (PACKAGE);
750 atexit (close_stdout);
752 dbpath = getenv ("LOCATE_PATH");
753 if (dbpath == NULL)
754 dbpath = LOCATE_DB;
756 check_existence = 0;
758 while ((optc = getopt_long (argc, argv, "bcd:eil:sm0Sw", longopts, (int *) 0)) != -1)
759 switch (optc)
761 case '0':
762 separator = 0;
763 break;
765 case 'b':
766 basename_only = 1;
767 break;
769 case 'c':
770 just_count = 1;
771 print = 0;
772 break;
774 case 'd':
775 dbpath = optarg;
776 break;
778 case 'e':
779 check_existence = 1;
780 break;
782 case 'i':
783 ignore_case = 1;
784 break;
786 case 'h':
787 usage (stdout);
788 return 0;
790 case 'v':
791 printf (_("GNU locate version %s\n"), version_string);
792 return 0;
794 case 'w':
795 basename_only = 0;
796 break;
798 case 'S':
799 stats = 1;
800 break;
802 case 'l':
804 char *end = optarg;
805 strtol_error err = xstrtoumax(optarg, &end, 10, &limit, NULL);
806 if (LONGINT_OK != err)
808 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
810 use_limit = 1;
812 break;
814 case 's': /* use stdio */
815 case 'm': /* use mmap */
816 /* These options are implemented simply for
817 * compatibility with FreeBSD
819 break;
821 default:
822 usage (stderr);
823 return 1;
826 if (stats)
828 use_limit = 0;
829 print = 0;
831 else
833 if (optind == argc)
835 usage (stderr);
836 return 1;
840 for (; stats || optind < argc; optind++)
842 char *e;
843 const char *needle;
844 next_element (dbpath, 0); /* Initialize. */
845 needle = stats ? NULL : argv[optind];
846 while ((e = next_element ((char *) NULL, 0)) != NULL)
848 statistics.compressed_bytes =
849 statistics.total_filename_count =
850 statistics.total_filename_length =
851 statistics.whitespace_count =
852 statistics.newline_count =
853 statistics.highbit_filename_count = 0u;
855 if (0 == strlen(e) || 0 == strcmp(e, "."))
857 /* Use the default database name instead (note: we
858 * don't use 'dbpath' since that might itself contain a
859 * colon-separated list.
861 e = LOCATE_DB;
864 found += new_locate (needle, e, ignore_case, print, basename_only, use_limit, limit, stats);
866 if (stats)
867 break;
870 if (just_count)
872 printf("%ld\n", found);
875 if (found || (use_limit && (limit==0)) || stats )
876 return 0;
877 else
878 return 1;