Merged changes made for version 4.1.20 onto the trunk
[findutils.git] / locate / locate.c
blob07da835a5081f93a0c3464a4bb3facb3559cc76f
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 9 Temple Place - Suite 330, Boston, MA 02111-1307,
17 USA.
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
44 matching routines.
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
52 #define _GNU_SOURCE
53 #include <gnulib/config.h>
54 #undef VERSION
55 #undef PACKAGE_VERSION
56 #undef PACKAGE_TARNAME
57 #undef PACKAGE_STRING
58 #undef PACKAGE_NAME
59 #undef PACKAGE
60 #include <config.h>
61 #include <stdio.h>
62 #include <sys/types.h>
63 #include <sys/stat.h>
64 #include <time.h>
65 #include <fnmatch.h>
66 #include <getopt.h>
68 #define NDEBUG
69 #include <assert.h>
71 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
72 #include <string.h>
73 #else
74 #include <strings.h>
75 #define strchr index
76 #endif
78 #ifdef STDC_HEADERS
79 #include <stdlib.h>
80 #endif
82 #ifdef HAVE_ERRNO_H
83 #include <errno.h>
84 #else
85 extern int errno;
86 #endif
88 #ifdef HAVE_LOCALE_H
89 #include <locale.h>
90 #endif
92 #if ENABLE_NLS
93 # include <libintl.h>
94 # define _(Text) gettext (Text)
95 #else
96 # define _(Text) Text
97 #define textdomain(Domain)
98 #define bindtextdomain(Package, Directory)
99 #endif
100 #ifdef gettext_noop
101 # define N_(String) gettext_noop (String)
102 #else
103 # define N_(String) (String)
104 #endif
106 #include "locatedb.h"
107 #include <getline.h>
109 /* Note that this evaluates C many times. */
110 #ifdef _LIBC
111 # define TOUPPER(Ch) toupper (Ch)
112 # define TOLOWER(Ch) tolower (Ch)
113 #else
114 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
115 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
116 #endif
118 typedef enum {false, true} boolean;
120 /* Warn if a database is older than this. 8 days allows for a weekly
121 update that takes up to a day to perform. */
122 #define WARN_NUMBER_UNITS (8)
123 /* Printable name of units used in WARN_SECONDS */
124 static const char warn_name_units[] = N_("days");
125 #define SECONDS_PER_UNIT (60 * 60 * 24)
127 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
129 /* Check for existence of files before printing them out? */
130 int check_existence = 0;
132 char *next_element ();
133 char *xmalloc ();
134 char *xrealloc ();
135 void error ();
137 /* Read in a 16-bit int, high byte first (network byte order). */
139 static int
140 get_short (fp)
141 FILE *fp;
144 register short x;
146 x = fgetc (fp) << 8;
147 x |= (fgetc (fp) & 0xff);
148 return x;
151 /* Return a pointer to the last character in a static copy of the last
152 glob-free subpattern in NAME,
153 with '\0' prepended for a fast backwards pre-match. */
155 static char *
156 last_literal_end (name)
157 char *name;
159 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
160 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
161 register char *subp; /* Return value. */
162 register char *p; /* Search location in NAME. */
164 /* Find the end of the subpattern.
165 Skip trailing metacharacters and [] ranges. */
166 for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
167 p--)
169 if (*p == ']')
170 while (p >= name && *p != '[')
171 p--;
173 if (p < name)
174 p = name;
176 if (p - name + 3 > gfalloc)
178 gfalloc = p - name + 3 + 64; /* Room to grow. */
179 globfree = xrealloc (globfree, gfalloc);
181 subp = globfree;
182 *subp++ = '\0';
184 /* If the pattern has only metacharacters, make every path match the
185 subpattern, so it gets checked the slow way. */
186 if (p == name && strchr ("?*[]", *p) != NULL)
187 *subp++ = '/';
188 else
190 char *endmark;
191 /* Find the start of the metacharacter-free subpattern. */
192 for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
194 /* Copy the subpattern into globfree. */
195 for (++p; p <= endmark; )
196 *subp++ = *p++;
198 *subp-- = '\0'; /* Null terminate, though it's not needed. */
200 return subp;
203 /* getstr()
205 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
206 * until we reach DELIMITER or end-of-file. We reallocate the buffer
207 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
208 * is made regarding the content of the data (i.e. the implementation is
209 * 8-bit clean, the only delimiter is DELIMITER).
211 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
212 * has been removed from gnulib.
214 * We call the function locate_read_str() to avoid a name clash with the curses
215 * function getstr().
217 static int locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
219 char * p = NULL;
220 size_t sz = 0;
221 int needed, nread;
223 nread = getdelim(&p, &sz, delimiter, fp);
224 if (nread >= 0)
226 assert(p != NULL);
228 needed = offs + nread;
229 if (needed > (*siz))
231 char *pnew = realloc(*buf, needed);
232 if (NULL == pnew)
234 return -1; /* FAIL */
236 else
238 *siz = needed;
239 *buf = pnew;
242 memcpy((*buf)+offs, p, nread);
243 free(p);
245 return nread;
249 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
250 Return the number of entries printed. */
252 static int
253 locate (pathpart, dbfile, ignore_case)
254 char *pathpart, *dbfile;
255 int ignore_case;
257 /* The pathname database. */
258 FILE *fp;
259 /* An input byte. */
260 int c;
261 /* Number of bytes read from an entry. */
262 int nread;
264 /* true if PATHPART contains globbing metacharacters. */
265 boolean globflag;
266 /* The end of the last glob-free subpattern in PATHPART. */
267 char *patend;
269 /* The current input database entry. */
270 char *path;
271 /* Amount allocated for it. */
272 size_t pathsize;
274 /* The length of the prefix shared with the previous database entry. */
275 int count = 0;
276 /* Where in `path' to stop the backward search for the last character
277 in the subpattern. Set according to `count'. */
278 char *cutoff;
280 /* true if we found a fast match (of patend) on the previous path. */
281 boolean prev_fast_match = false;
282 /* The return value. */
283 int printed = 0;
285 /* true if reading a bigram-encoded database. */
286 boolean old_format = false;
287 /* For the old database format,
288 the first and second characters of the most common bigrams. */
289 char bigram1[128], bigram2[128];
291 /* To check the age of the database. */
292 struct stat st;
293 time_t now;
295 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
297 error (0, errno, "%s", dbfile);
298 return 0;
300 time(&now);
301 if (now - st.st_mtime > WARN_SECONDS)
303 /* For example:
304 warning: database `fred' is more than 8 days old */
305 error (0, 0, _("warning: database `%s' is more than %d %s old"),
306 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
309 pathsize = 1026; /* Increased as necessary by locate_read_str. */
310 path = xmalloc (pathsize);
312 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
313 if (nread != sizeof (LOCATEDB_MAGIC)
314 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
316 int i;
317 /* Read the list of the most common bigrams in the database. */
318 fseek (fp, 0, 0);
319 for (i = 0; i < 128; i++)
321 bigram1[i] = getc (fp);
322 bigram2[i] = getc (fp);
324 old_format = true;
327 /* If we ignore case,
328 convert it to lower first so we don't have to do it every time */
329 if (ignore_case){
330 for (patend=pathpart;*patend;++patend){
331 *patend=TOLOWER(*patend);
336 globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
337 || strchr (pathpart, '[');
339 patend = last_literal_end (pathpart);
341 c = getc (fp);
342 while (c != EOF)
344 register char *s; /* Scan the path we read in. */
346 if (old_format)
348 /* Get the offset in the path where this path info starts. */
349 if (c == LOCATEDB_OLD_ESCAPE)
350 count += getw (fp) - LOCATEDB_OLD_OFFSET;
351 else
352 count += c - LOCATEDB_OLD_OFFSET;
354 /* Overlay the old path with the remainder of the new. */
355 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
356 if (c < 0200)
357 *s++ = c; /* An ordinary character. */
358 else
360 /* Bigram markers have the high bit set. */
361 c &= 0177;
362 *s++ = bigram1[c];
363 *s++ = bigram2[c];
365 *s-- = '\0';
367 else
369 if (c == LOCATEDB_ESCAPE)
370 count += get_short (fp);
371 else if (c > 127)
372 count += c - 256;
373 else
374 count += c;
376 /* Overlay the old path with the remainder of the new. */
377 nread = locate_read_str (&path, &pathsize, fp, 0, count);
378 if (nread < 0)
379 break;
380 c = getc (fp);
381 s = path + count + nread - 2; /* Move to the last char in path. */
382 assert (s[0] != '\0');
383 assert (s[1] == '\0'); /* Our terminator. */
384 assert (s[2] == '\0'); /* Added by locate_read_str. */
387 /* If the previous path matched, scan the whole path for the last
388 char in the subpattern. If not, the shared prefix doesn't match
389 the pattern, so don't scan it for the last char. */
390 cutoff = prev_fast_match ? path : path + count;
392 /* Search backward starting at the end of the path we just read in,
393 for the character at the end of the last glob-free subpattern
394 in PATHPART. */
395 if (ignore_case)
397 for (prev_fast_match = false; s >= cutoff; s--)
398 /* Fast first char check. */
399 if (TOLOWER(*s) == *patend)
401 char *s2; /* Scan the path we read in. */
402 register char *p2; /* Scan `patend'. */
404 for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && TOLOWER(*s2) == *p2;
405 s2--, p2--)
407 if (*p2 == '\0')
409 /* Success on the fast match. Compare the whole pattern
410 if it contains globbing characters. */
411 prev_fast_match = true;
412 if (globflag == false || fnmatch (pathpart, path, FNM_CASEFOLD) == 0)
414 if (!check_existence || stat(path, &st) == 0)
416 puts (path);
417 ++printed;
420 break;
424 else {
426 for (prev_fast_match = false; s >= cutoff; s--)
427 /* Fast first char check. */
428 if (*s == *patend)
430 char *s2; /* Scan the path we read in. */
431 register char *p2; /* Scan `patend'. */
433 for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && *s2 == *p2;
434 s2--, p2--)
436 if (*p2 == '\0')
438 /* Success on the fast match. Compare the whole pattern
439 if it contains globbing characters. */
440 prev_fast_match = true;
441 if (globflag == false || fnmatch (pathpart, path,
442 0) == 0)
444 if (!check_existence || stat(path, &st) == 0)
446 puts (path);
447 ++printed;
450 break;
457 if (ferror (fp))
459 error (0, errno, "%s", dbfile);
460 return 0;
462 if (fclose (fp) == EOF)
464 error (0, errno, "%s", dbfile);
465 return 0;
468 return printed;
471 extern char *version_string;
473 /* The name this program was run with. */
474 char *program_name;
476 static void
477 usage (stream, status)
478 FILE *stream;
479 int status;
481 fprintf (stream, _("\
482 Usage: %s [-d path | --database=path] [-e | --existing]\n\
483 [-i | --ignore-case] [--version] [--help] pattern...\n"),
484 program_name);
485 fputs (_("\nReport bugs to <bug-findutils@gnu.org>."), stream);
486 exit (status);
489 static struct option const longopts[] =
491 {"database", required_argument, NULL, 'd'},
492 {"existing", no_argument, NULL, 'e'},
493 {"ignore-case", no_argument, NULL, 'i'},
494 {"help", no_argument, NULL, 'h'},
495 {"version", no_argument, NULL, 'v'},
496 {NULL, no_argument, NULL, 0}
500 main (argc, argv)
501 int argc;
502 char **argv;
504 char *dbpath;
505 int fnmatch_flags = 0;
506 int found = 0, optc;
507 int ignore_case = 0;
509 program_name = argv[0];
511 #ifdef HAVE_SETLOCALE
512 setlocale (LC_ALL, "");
513 #endif
514 bindtextdomain (PACKAGE, LOCALEDIR);
515 textdomain (PACKAGE);
517 dbpath = getenv ("LOCATE_PATH");
518 if (dbpath == NULL)
519 dbpath = LOCATE_DB;
521 check_existence = 0;
523 while ((optc = getopt_long (argc, argv, "d:ei", longopts, (int *) 0)) != -1)
524 switch (optc)
526 case 'd':
527 dbpath = optarg;
528 break;
530 case 'e':
531 check_existence = 1;
532 break;
534 case 'i':
535 ignore_case = 1;
536 fnmatch_flags |= FNM_CASEFOLD;
537 break;
539 case 'h':
540 usage (stdout, 0);
542 case 'v':
543 printf (_("GNU locate version %s\n"), version_string);
544 exit (0);
546 default:
547 usage (stderr, 1);
550 if (optind == argc)
551 usage (stderr, 1);
553 for (; optind < argc; optind++)
555 char *e;
556 next_element (dbpath); /* Initialize. */
557 while ((e = next_element ((char *) NULL)) != NULL)
558 found |= locate (argv[optind], e, ignore_case);
561 exit (!found);