Updated the discussion of th error messages for findutils-4.2.8.
[findutils.git] / locate / locate.c
blob89dc0b451968918b453739ebf9acddb5a479e340
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
17 USA.
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
44 matching routines.
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
52 #include <config.h>
53 #include <stdio.h>
54 #include <ctype.h>
55 #include <sys/types.h>
56 #include <sys/stat.h>
57 #include <time.h>
58 #include <fnmatch.h>
59 #include <getopt.h>
60 #include <xstrtol.h>
62 #define NDEBUG
63 #include <assert.h>
65 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
66 #include <string.h>
67 #else
68 #include <strings.h>
69 #define strchr index
70 #endif
72 #ifdef STDC_HEADERS
73 #include <stdlib.h>
74 #endif
76 #ifdef HAVE_ERRNO_H
77 #include <errno.h>
78 #else
79 extern int errno;
80 #endif
82 #ifdef HAVE_LOCALE_H
83 #include <locale.h>
84 #endif
86 #if ENABLE_NLS
87 # include <libintl.h>
88 # define _(Text) gettext (Text)
89 #else
90 # define _(Text) Text
91 #define textdomain(Domain)
92 #define bindtextdomain(Package, Directory)
93 #endif
94 #ifdef gettext_noop
95 # define N_(String) gettext_noop (String)
96 #else
97 /* We used to use (String) instead of just String, but apparentl;y ISO C
98 * doesn't allow this (at least, that's what HP said when someone reported
99 * this as a compiler bug). This is HP case number 1205608192. See
100 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
101 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
102 * like: static const char buf[] = ("string");
104 # define N_(String) String
105 #endif
107 #include "locatedb.h"
108 #include <getline.h>
109 #include "../gnulib/lib/xalloc.h"
110 #include "../gnulib/lib/error.h"
111 #include "dirname.h"
113 /* Note that this evaluates C many times. */
114 #ifdef _LIBC
115 # define TOUPPER(Ch) toupper (Ch)
116 # define TOLOWER(Ch) tolower (Ch)
117 #else
118 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
119 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
120 #endif
122 /* typedef enum {false, true} boolean; */
124 /* Warn if a database is older than this. 8 days allows for a weekly
125 update that takes up to a day to perform. */
126 #define WARN_NUMBER_UNITS (8)
127 /* Printable name of units used in WARN_SECONDS */
128 static const char warn_name_units[] = N_("days");
129 #define SECONDS_PER_UNIT (60 * 60 * 24)
131 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
133 /* Check for existence of files before printing them out? */
134 int check_existence = 0;
136 /* What to separate the results with. */
137 static int separator = '\n';
139 char *next_element ();
142 /* Read in a 16-bit int, high byte first (network byte order). */
144 static short
145 get_short (fp)
146 FILE *fp;
149 register short x;
151 x = (signed char) fgetc (fp) << 8;
152 x |= (fgetc (fp) & 0xff);
153 return x;
156 const char * const metacharacters = "*?[]\\";
158 #if 0
159 /* Return nonzero if CH is a shell glob character.
161 static int
162 is_metacharacter(char ch)
164 if (NULL == strchr (metacharacters, ch))
165 return 0;
166 else
167 return 1;
169 #endif
171 /* Return nonzero if S contains any shell glob characters.
173 static int
174 contains_metacharacter(const char *s)
176 if (NULL == strpbrk(s, metacharacters))
177 return 0;
178 else
179 return 1;
184 #if 0
185 /* Return a pointer to the last character in a static copy of the last
186 glob-free subpattern in NAME,
187 with '\0' prepended for a fast backwards pre-match. */
189 static char *
190 last_literal_end (name)
191 char *name;
193 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
194 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
195 register char *subp; /* Return value. */
196 register char *p; /* Search location in NAME. */
198 /* Find the end of the subpattern.
199 Skip trailing metacharacters and [] ranges. */
200 for (p = name + strlen (name) - 1; p >= name && is_metacharacter(*p);
201 p--)
203 if (*p == ']')
204 while (p >= name && *p != '[')
205 p--;
207 if (p < name)
208 p = name;
210 if (p - name + 3 > gfalloc)
212 gfalloc = p - name + 3 + 64; /* Room to grow. */
213 globfree = xrealloc (globfree, gfalloc);
215 subp = globfree;
216 *subp++ = '\0';
218 /* If the pattern has only metacharacters, make every path match the
219 subpattern, so it gets checked the slow way. */
220 if (p == name && is_metacharacter(*p))
221 *subp++ = '/';
222 else
224 char *endmark;
225 /* Find the start of the metacharacter-free subpattern. */
226 for (endmark = p; p >= name && !is_metacharacter(*p); p--)
228 /* Copy the subpattern into globfree. */
229 for (++p; p <= endmark; )
230 *subp++ = *p++;
232 *subp-- = '\0'; /* Null terminate, though it's not needed. */
234 return subp;
236 #endif
239 /* locate_read_str()
241 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
242 * until we reach DELIMITER or end-of-file. We reallocate the buffer
243 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
244 * is made regarding the content of the data (i.e. the implementation is
245 * 8-bit clean, the only delimiter is DELIMITER).
247 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
248 * has been removed from gnulib.
250 * We call the function locate_read_str() to avoid a name clash with the curses
251 * function getstr().
253 static int
254 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
256 char * p = NULL;
257 size_t sz = 0;
258 int needed, nread;
260 nread = getdelim(&p, &sz, delimiter, fp);
261 if (nread >= 0)
263 assert(p != NULL);
265 needed = offs + nread + 1;
266 if (needed > (*siz))
268 char *pnew = realloc(*buf, needed);
269 if (NULL == pnew)
271 return -1; /* FAIL */
273 else
275 *siz = needed;
276 *buf = pnew;
279 memcpy((*buf)+offs, p, nread);
280 free(p);
282 return nread;
286 static void
287 lc_strcpy(char *dest, const char *src)
289 while (*src)
291 *dest++ = TOLOWER(*src);
292 ++src;
296 enum visit_result
298 VISIT_CONTINUE = 1, /* please call the next visitor */
299 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
300 VISIT_REJECTED = 4, /* rejected, process next file. */
301 VISIT_ABORT = 8 /* rejected, process no more files. */
306 struct casefolder
308 const char *pattern;
309 char *buffer;
310 size_t buffersize;
315 typedef int (*visitfunc)(const char *tested_filename,
316 const char *printed_filename,
317 void *context);
319 struct visitor
321 visitfunc inspector;
322 void * context;
323 struct visitor *next;
327 static struct visitor *inspectors = NULL;
328 static struct visitor *lastinspector = NULL;
330 static int
331 process_filename(const char *tested_filename, const char *printed_filename)
333 int result = VISIT_CONTINUE;
334 const struct visitor *p = inspectors;
336 while ( (VISIT_CONTINUE == result) && (NULL != p) )
338 result = (p->inspector)(tested_filename, printed_filename, p->context);
339 p = p->next;
342 if (VISIT_CONTINUE == result)
343 return VISIT_ACCEPTED;
344 else
345 return result;
348 static void
349 add_visitor(visitfunc fn, void *context)
351 struct visitor *p = xmalloc(sizeof(struct visitor));
352 p->inspector = fn;
353 p->context = context;
354 p->next = NULL;
356 if (NULL == lastinspector)
358 lastinspector = inspectors = p;
360 else
362 lastinspector->next = p;
368 static int
369 visit_justprint(const char *testname, const char *printname, void *context)
371 (void) context;
372 (void) testname;
373 fputs(printname, stdout);
374 putchar(separator);
375 return VISIT_CONTINUE;
378 static int
379 visit_exists(const char *testname, const char *printname, void *context)
381 struct stat st;
382 (void) context;
383 (void) printname;
385 if (stat(testname, &st) != 0)
387 return VISIT_REJECTED;
389 else
391 return VISIT_CONTINUE;
395 static int
396 visit_substring_match_nocasefold(const char *testname, const char *printname, void *context)
398 const char *pattern = context;
399 (void) printname;
401 if (NULL != strstr(testname, pattern))
402 return VISIT_CONTINUE;
403 else
404 return VISIT_REJECTED;
407 static int
408 visit_substring_match_casefold(const char *testname, const char *printname, void *context)
410 struct casefolder * p = context;
411 size_t len = strlen(testname);
413 (void) printname;
414 if (len > p->buffersize)
416 p->buffer = xrealloc(p->buffer, len+1);
417 p->buffersize = len+1;
419 lc_strcpy(p->buffer, testname);
422 if (NULL != strstr(p->buffer, p->pattern))
423 return VISIT_CONTINUE;
424 else
425 return VISIT_REJECTED;
429 static int
430 visit_globmatch_nofold(const char *testname, const char *printname, void *context)
432 const char *glob = context;
433 (void) printname;
434 if (fnmatch(glob, testname, 0) != 0)
435 return VISIT_REJECTED;
436 else
437 return VISIT_CONTINUE;
441 static int
442 visit_globmatch_casefold(const char *testname, const char *printname, void *context)
444 const char *glob = context;
445 (void) printname;
446 if (fnmatch(glob, testname, FNM_CASEFOLD) != 0)
447 return VISIT_REJECTED;
448 else
449 return VISIT_CONTINUE;
452 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
453 Return the number of entries printed. */
455 static unsigned long
456 new_locate (char *pathpart, char *dbfile, int ignore_case, int enable_print, int basename_only, int use_limit, uintmax_t limit)
459 FILE *fp; /* The pathname database. */
460 int c; /* An input byte. */
461 int nread; /* number of bytes read from an entry. */
462 char *path; /* The current input database entry. */
463 const char *testpath;
464 size_t pathsize; /* Amount allocated for it. */
465 int count = 0; /* The length of the prefix shared with the previous database entry. */
467 int old_format = 0; /* true if reading a bigram-encoded database. */
471 /* for the old database format,
472 the first and second characters of the most common bigrams. */
473 char bigram1[128], bigram2[128];
475 /* number of items accepted (i.e. printed) */
476 unsigned long int items_accepted = 0uL;
478 /* To check the age of the database. */
479 struct stat st;
480 time_t now;
482 /* Set up the inspection regime */
483 inspectors = NULL;
484 lastinspector = NULL;
486 if (check_existence)
487 add_visitor(visit_exists, NULL);
489 if (contains_metacharacter(pathpart))
491 if (ignore_case)
492 add_visitor(visit_globmatch_casefold, pathpart);
493 else
494 add_visitor(visit_globmatch_nofold, pathpart);
496 else
498 /* No glob characters reuired. Hence we match on
499 * _any part_ of the filename, not just the
500 * basename. This seems odd to me, but it is the
501 * traditional behaviour.
502 * James Youngman <jay@gnu.org>
504 if (ignore_case)
506 struct casefolder * cf = xmalloc(sizeof(*cf));
507 cf->pattern = pathpart;
508 cf->buffer = NULL;
509 cf->buffersize = 0;
510 add_visitor(visit_substring_match_casefold, cf);
512 else
514 add_visitor(visit_substring_match_nocasefold, pathpart);
518 if (enable_print)
519 add_visitor(visit_justprint, NULL);
521 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
523 error (0, errno, "%s", dbfile);
524 return 0;
526 time(&now);
527 if (now - st.st_mtime > WARN_SECONDS)
529 /* For example:
530 warning: database `fred' is more than 8 days old */
531 error (0, 0, _("warning: database `%s' is more than %d %s old"),
532 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
535 pathsize = 1026; /* Increased as necessary by locate_read_str. */
536 path = xmalloc (pathsize);
538 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
539 if (nread != sizeof (LOCATEDB_MAGIC)
540 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
542 int i;
543 /* Read the list of the most common bigrams in the database. */
544 fseek (fp, 0, 0);
545 for (i = 0; i < 128; i++)
547 bigram1[i] = getc (fp);
548 bigram2[i] = getc (fp);
550 old_format = 1;
553 /* If we ignore case, convert it to lower first so we don't have to
554 * do it every time
556 if (ignore_case)
558 lc_strcpy(pathpart, pathpart);
561 items_accepted = 0;
563 c = getc (fp);
564 while ( (c != EOF) && (!use_limit || (limit > 0)) )
566 register char *s; /* Scan the path we read in. */
568 if (old_format)
570 /* Get the offset in the path where this path info starts. */
571 if (c == LOCATEDB_OLD_ESCAPE)
572 count += getw (fp) - LOCATEDB_OLD_OFFSET;
573 else
574 count += c - LOCATEDB_OLD_OFFSET;
576 /* Overlay the old path with the remainder of the new. */
577 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
578 if (c < 0200)
579 *s++ = c; /* An ordinary character. */
580 else
582 /* Bigram markers have the high bit set. */
583 c &= 0177;
584 *s++ = bigram1[c];
585 *s++ = bigram2[c];
587 *s-- = '\0';
589 else
591 if (c == LOCATEDB_ESCAPE)
592 count += (short)get_short (fp);
593 else if (c > 127)
594 count += c - 256;
595 else
596 count += c;
598 if (count > strlen(path))
600 /* This should not happen generally , but since we're
601 * reading in data which is outside our control, we
602 * cannot prevent it.
604 error(1, 0, _("locate database `%s' is corrupt or invalid"), dbfile);
607 /* Overlay the old path with the remainder of the new. */
608 nread = locate_read_str (&path, &pathsize, fp, 0, count);
609 if (nread < 0)
610 break;
611 c = getc (fp);
612 s = path + count + nread - 1; /* Move to the last char in path. */
613 assert (s[0] != '\0');
614 assert (s[1] == '\0'); /* Our terminator. */
615 assert (s[2] == '\0'); /* Added by locate_read_str. */
618 testpath = basename_only ? base_name(path) : path;
619 if (VISIT_ACCEPTED == process_filename(testpath, path))
621 if ((++items_accepted >= limit) && use_limit)
623 break;
628 if (ferror (fp))
630 error (0, errno, "%s", dbfile);
631 return 0;
633 if (fclose (fp) == EOF)
635 error (0, errno, "%s", dbfile);
636 return 0;
639 return items_accepted;
645 extern char *version_string;
647 /* The name this program was run with. */
648 char *program_name;
650 static void
651 usage (stream)
652 FILE *stream;
654 fprintf (stream, _("\
655 Usage: %s [-d path | --database=path] [-e | --existing]\n\
656 [-i | --ignore-case] [--wholepath] [--basename] [--limit=N | -l N]\n\
657 [--version] [--help] pattern...\n"),
658 program_name);
659 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
662 static void
663 sanity_check_dbpath(const char *path)
665 size_t len;
667 if (':' == path[0])
669 error(0, 0, _("warning: locate database path `%s' contains a leading colon, which is not a valid database name"), path);
672 len = strlen(path);
673 if (len > 0u)
675 if (':' == path[len-1u])
677 error(0, 0, _("warning: locate database path `%s' contains a trailing colon, which is not a valid database name"), path);
683 static struct option const longopts[] =
685 {"database", required_argument, NULL, 'd'},
686 {"existing", no_argument, NULL, 'e'},
687 {"ignore-case", no_argument, NULL, 'i'},
688 {"help", no_argument, NULL, 'h'},
689 {"version", no_argument, NULL, 'v'},
690 {"null", no_argument, NULL, '0'},
691 {"count", no_argument, NULL, 'c'},
692 {"wholename", no_argument, NULL, 'w'},
693 {"basename", no_argument, NULL, 'b'},
694 {"stdio", no_argument, NULL, 's'},
695 {"mmap", no_argument, NULL, 'm'},
696 {"limit", required_argument, NULL, 'l'},
697 {NULL, no_argument, NULL, 0}
701 main (argc, argv)
702 int argc;
703 char **argv;
705 char *dbpath;
706 unsigned long int found = 0uL;
707 int optc;
708 int ignore_case = 0;
709 int print = 1;
710 int just_count = 0;
711 int basename_only = 0;
712 uintmax_t limit = 0;
713 int use_limit = 0;
715 program_name = argv[0];
717 #ifdef HAVE_SETLOCALE
718 setlocale (LC_ALL, "");
719 #endif
720 bindtextdomain (PACKAGE, LOCALEDIR);
721 textdomain (PACKAGE);
723 dbpath = getenv ("LOCATE_PATH");
724 if (dbpath == NULL)
725 dbpath = LOCATE_DB;
727 check_existence = 0;
729 while ((optc = getopt_long (argc, argv, "cd:eil:sm0", longopts, (int *) 0)) != -1)
730 switch (optc)
732 case '0':
733 separator = 0;
734 break;
736 case 'b':
737 basename_only = 1;
738 break;
740 case 'c':
741 just_count = 1;
742 print = 0;
743 break;
745 case 'd':
746 dbpath = optarg;
747 break;
749 case 'e':
750 check_existence = 1;
751 break;
753 case 'i':
754 ignore_case = 1;
755 break;
757 case 'h':
758 usage (stdout);
759 return 0;
761 case 'v':
762 printf (_("GNU locate version %s\n"), version_string);
763 return 0;
765 case 'w':
766 basename_only = 0;
767 break;
769 case 'l':
771 char *end = optarg;
772 strtol_error err = xstrtoumax(optarg, &end, 10, &limit, NULL);
773 if (LONGINT_OK != err)
775 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
777 use_limit = 1;
779 break;
781 case 's': /* use stdio */
782 case 'm': /* use mmap */
783 /* These options are implemented simply for
784 * compatibility with FreeBSD
786 break;
788 default:
789 usage (stderr);
790 return 1;
793 if (optind == argc)
795 usage (stderr);
796 return 1;
799 sanity_check_dbpath(dbpath);
801 for (; optind < argc; optind++)
803 char *e;
804 next_element (dbpath); /* Initialize. */
805 while ((e = next_element ((char *) NULL)) != NULL)
807 found += new_locate (argv[optind], e, ignore_case, print, basename_only, use_limit, limit);
811 if (just_count)
813 printf("%ld\n", found);
816 if (found || (use_limit && (limit==0)) )
817 return 0;
818 else
819 return 1;