* find/find.1: -D option (for Solaris door files) is documented
[findutils.git] / locate / locate.c
blobbf620b430ec16d811c055f417c26fea91d5ab36d
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* Usage: locate [options] pattern...
20 Scan a pathname list for the full pathname of a file, given only
21 a piece of the name (possibly containing shell globbing metacharacters).
22 The list has been processed with front-compression, which reduces
23 the list size by a factor of 4-5.
24 Recognizes two database formats, old and new. The old format is
25 bigram coded, which reduces space by a further 20-25% and uses the
26 following encoding of the database bytes:
28 0-28 likeliest differential counts + offset (14) to make nonnegative
29 30 escape code for out-of-range count to follow in next halfword
30 128-255 bigram codes (the 128 most common, as determined by `updatedb')
31 32-127 single character (printable) ASCII remainder
33 Uses a novel two-tiered string search technique:
35 First, match a metacharacter-free subpattern and a partial pathname
36 BACKWARDS to avoid full expansion of the pathname list.
37 The time savings is 40-50% over forward matching, which cannot efficiently
38 handle overlapped search patterns and compressed path remainders.
40 Then, match the actual shell glob-style regular expression (if in this form)
41 against the candidate pathnames using the slower shell filename
42 matching routines.
44 Described more fully in Usenix ;login:, Vol 8, No 1,
45 February/March, 1983, p. 8.
47 Written by James A. Woods <jwoods@adobe.com>.
48 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
50 #define _GNU_SOURCE
51 #include <config.h>
52 #include <stdio.h>
53 #include <sys/types.h>
54 #include <sys/stat.h>
55 #include <time.h>
56 #include <fnmatch.h>
57 #include <getopt.h>
59 #define NDEBUG
60 #include <assert.h>
62 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
63 #include <string.h>
64 #else
65 #include <strings.h>
66 #define strchr index
67 #endif
69 #ifdef STDC_HEADERS
70 #include <stdlib.h>
71 #endif
73 #ifdef HAVE_ERRNO_H
74 #include <errno.h>
75 #else
76 extern int errno;
77 #endif
79 #ifdef HAVE_LOCALE_H
80 #include <locale.h>
81 #endif
83 #if ENABLE_NLS
84 # include <libintl.h>
85 # define _(Text) gettext (Text)
86 #else
87 # define _(Text) Text
88 #define textdomain(Domain)
89 #define bindtextdomain(Package, Directory)
90 #endif
91 #ifdef gettext_noop
92 # define N_(String) gettext_noop (String)
93 #else
94 # define N_(String) (String)
95 #endif
97 #include "locatedb.h"
98 #include <getline.h>
100 /* Note that this evaluates C many times. */
101 #ifdef _LIBC
102 # define TOUPPER(Ch) toupper (Ch)
103 # define TOLOWER(Ch) tolower (Ch)
104 #else
105 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
106 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
107 #endif
109 typedef enum {false, true} boolean;
111 /* Warn if a database is older than this. 8 days allows for a weekly
112 update that takes up to a day to perform. */
113 #define WARN_NUMBER_UNITS (8)
114 /* Printable name of units used in WARN_SECONDS */
115 static const char warn_name_units[] = N_("days");
116 #define SECONDS_PER_UNIT (60 * 60 * 24)
118 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
120 /* Check for existence of files before printing them out? */
121 int check_existence = 0;
123 char *next_element ();
124 char *xmalloc ();
125 char *xrealloc ();
126 void error ();
128 /* Read in a 16-bit int, high byte first (network byte order). */
130 static int
131 get_short (fp)
132 FILE *fp;
135 register short x;
137 x = fgetc (fp) << 8;
138 x |= (fgetc (fp) & 0xff);
139 return x;
142 /* Return a pointer to the last character in a static copy of the last
143 glob-free subpattern in NAME,
144 with '\0' prepended for a fast backwards pre-match. */
146 static char *
147 last_literal_end (name)
148 char *name;
150 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
151 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
152 register char *subp; /* Return value. */
153 register char *p; /* Search location in NAME. */
155 /* Find the end of the subpattern.
156 Skip trailing metacharacters and [] ranges. */
157 for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
158 p--)
160 if (*p == ']')
161 while (p >= name && *p != '[')
162 p--;
164 if (p < name)
165 p = name;
167 if (p - name + 3 > gfalloc)
169 gfalloc = p - name + 3 + 64; /* Room to grow. */
170 globfree = xrealloc (globfree, gfalloc);
172 subp = globfree;
173 *subp++ = '\0';
175 /* If the pattern has only metacharacters, make every path match the
176 subpattern, so it gets checked the slow way. */
177 if (p == name && strchr ("?*[]", *p) != NULL)
178 *subp++ = '/';
179 else
181 char *endmark;
182 /* Find the start of the metacharacter-free subpattern. */
183 for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
185 /* Copy the subpattern into globfree. */
186 for (++p; p <= endmark; )
187 *subp++ = *p++;
189 *subp-- = '\0'; /* Null terminate, though it's not needed. */
191 return subp;
194 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
195 Return the number of entries printed. */
197 static int
198 locate (pathpart, dbfile, ignore_case)
199 char *pathpart, *dbfile;
200 int ignore_case;
202 /* The pathname database. */
203 FILE *fp;
204 /* An input byte. */
205 int c;
206 /* Number of bytes read from an entry. */
207 int nread;
209 /* true if PATHPART contains globbing metacharacters. */
210 boolean globflag;
211 /* The end of the last glob-free subpattern in PATHPART. */
212 char *patend;
214 /* The current input database entry. */
215 char *path;
216 /* Amount allocated for it. */
217 size_t pathsize;
219 /* The length of the prefix shared with the previous database entry. */
220 int count = 0;
221 /* Where in `path' to stop the backward search for the last character
222 in the subpattern. Set according to `count'. */
223 char *cutoff;
225 /* true if we found a fast match (of patend) on the previous path. */
226 boolean prev_fast_match = false;
227 /* The return value. */
228 int printed = 0;
230 /* true if reading a bigram-encoded database. */
231 boolean old_format = false;
232 /* For the old database format,
233 the first and second characters of the most common bigrams. */
234 char bigram1[128], bigram2[128];
236 /* To check the age of the database. */
237 struct stat st;
238 time_t now;
240 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
242 error (0, errno, "%s", dbfile);
243 return 0;
245 time(&now);
246 if (now - st.st_mtime > WARN_SECONDS)
248 /* For example:
249 warning: database `fred' is more than 8 days old */
250 error (0, 0, _("warning: database `%s' is more than %d %s old"),
251 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
254 pathsize = 1026; /* Increased as necessary by getstr. */
255 path = xmalloc (pathsize);
257 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
258 if (nread != sizeof (LOCATEDB_MAGIC)
259 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
261 int i;
262 /* Read the list of the most common bigrams in the database. */
263 fseek (fp, 0, 0);
264 for (i = 0; i < 128; i++)
266 bigram1[i] = getc (fp);
267 bigram2[i] = getc (fp);
269 old_format = true;
272 /* If we ignore case,
273 convert it to lower first so we don't have to do it every time */
274 if (ignore_case){
275 for (patend=pathpart;*patend;++patend){
276 *patend=TOLOWER(*patend);
281 globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
282 || strchr (pathpart, '[');
284 patend = last_literal_end (pathpart);
286 c = getc (fp);
287 while (c != EOF)
289 register char *s; /* Scan the path we read in. */
291 if (old_format)
293 /* Get the offset in the path where this path info starts. */
294 if (c == LOCATEDB_OLD_ESCAPE)
295 count += getw (fp) - LOCATEDB_OLD_OFFSET;
296 else
297 count += c - LOCATEDB_OLD_OFFSET;
299 /* Overlay the old path with the remainder of the new. */
300 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
301 if (c < 0200)
302 *s++ = c; /* An ordinary character. */
303 else
305 /* Bigram markers have the high bit set. */
306 c &= 0177;
307 *s++ = bigram1[c];
308 *s++ = bigram2[c];
310 *s-- = '\0';
312 else
314 if (c == LOCATEDB_ESCAPE)
315 count += get_short (fp);
316 else if (c > 127)
317 count += c - 256;
318 else
319 count += c;
321 /* Overlay the old path with the remainder of the new. */
322 nread = getstr (&path, &pathsize, fp, '\0', count);
323 if (nread < 0)
324 break;
325 c = getc (fp);
326 s = path + count + nread - 2; /* Move to the last char in path. */
327 assert (s[0] != '\0');
328 assert (s[1] == '\0'); /* Our terminator. */
329 assert (s[2] == '\0'); /* Added by getstr. */
332 /* If the previous path matched, scan the whole path for the last
333 char in the subpattern. If not, the shared prefix doesn't match
334 the pattern, so don't scan it for the last char. */
335 cutoff = prev_fast_match ? path : path + count;
337 /* Search backward starting at the end of the path we just read in,
338 for the character at the end of the last glob-free subpattern
339 in PATHPART. */
340 if (ignore_case)
342 for (prev_fast_match = false; s >= cutoff; s--)
343 /* Fast first char check. */
344 if (TOLOWER(*s) == *patend)
346 char *s2; /* Scan the path we read in. */
347 register char *p2; /* Scan `patend'. */
349 for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && TOLOWER(*s2) == *p2;
350 s2--, p2--)
352 if (*p2 == '\0')
354 /* Success on the fast match. Compare the whole pattern
355 if it contains globbing characters. */
356 prev_fast_match = true;
357 if (globflag == false || fnmatch (pathpart, path, FNM_CASEFOLD) == 0)
359 if (!check_existence || stat(path, &st) == 0)
361 puts (path);
362 ++printed;
365 break;
369 else {
371 for (prev_fast_match = false; s >= cutoff; s--)
372 /* Fast first char check. */
373 if (*s == *patend)
375 char *s2; /* Scan the path we read in. */
376 register char *p2; /* Scan `patend'. */
378 for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && *s2 == *p2;
379 s2--, p2--)
381 if (*p2 == '\0')
383 /* Success on the fast match. Compare the whole pattern
384 if it contains globbing characters. */
385 prev_fast_match = true;
386 if (globflag == false || fnmatch (pathpart, path,
387 0) == 0)
389 if (!check_existence || stat(path, &st) == 0)
391 puts (path);
392 ++printed;
395 break;
402 if (ferror (fp))
404 error (0, errno, "%s", dbfile);
405 return 0;
407 if (fclose (fp) == EOF)
409 error (0, errno, "%s", dbfile);
410 return 0;
413 return printed;
416 extern char *version_string;
418 /* The name this program was run with. */
419 char *program_name;
421 static void
422 usage (stream, status)
423 FILE *stream;
424 int status;
426 fprintf (stream, _("\
427 Usage: %s [-d path | --database=path] [-e | --existing]\n\
428 [-i | --ignore-case] [--version] [--help] pattern...\n"),
429 program_name);
430 fputs (_("\nReport bugs to <bug-findutils@gnu.org>."), stream);
431 exit (status);
434 static struct option const longopts[] =
436 {"database", required_argument, NULL, 'd'},
437 {"existing", no_argument, NULL, 'e'},
438 {"ignore-case", no_argument, NULL, 'i'},
439 {"help", no_argument, NULL, 'h'},
440 {"version", no_argument, NULL, 'v'},
441 {NULL, no_argument, NULL, 0}
445 main (argc, argv)
446 int argc;
447 char **argv;
449 char *dbpath;
450 int fnmatch_flags = 0;
451 int found = 0, optc;
452 int ignore_case = 0;
454 program_name = argv[0];
456 #ifdef HAVE_SETLOCALE
457 setlocale (LC_ALL, "");
458 #endif
459 bindtextdomain (PACKAGE, LOCALEDIR);
460 textdomain (PACKAGE);
462 dbpath = getenv ("LOCATE_PATH");
463 if (dbpath == NULL)
464 dbpath = LOCATE_DB;
466 check_existence = 0;
468 while ((optc = getopt_long (argc, argv, "d:ei", longopts, (int *) 0)) != -1)
469 switch (optc)
471 case 'd':
472 dbpath = optarg;
473 break;
475 case 'e':
476 check_existence = 1;
477 break;
479 case 'i':
480 ignore_case = 1;
481 fnmatch_flags |= FNM_CASEFOLD;
482 break;
484 case 'h':
485 usage (stdout, 0);
487 case 'v':
488 printf (_("GNU locate version %s\n"), version_string);
489 exit (0);
491 default:
492 usage (stderr, 1);
495 if (optind == argc)
496 usage (stderr, 1);
498 for (; optind < argc; optind++)
500 char *e;
501 next_element (dbpath); /* Initialize. */
502 while ((e = next_element ((char *) NULL)) != NULL)
503 found |= locate (argv[optind], e, ignore_case);
506 exit (!found);