Use gnulib-tool --import to import the gnulib code, rather than the odd way we were...
[findutils.git] / locate / locate.c
blobb9b69b23766b3bbfb9de3c964030cefe488821e5
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 9 Temple Place - Suite 330, Boston, MA 02111-1307,
17 USA.
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
44 matching routines.
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
52 #include <config.h>
53 #include <stdio.h>
54 #include <ctype.h>
55 #include <sys/types.h>
56 #include <sys/stat.h>
57 #include <time.h>
58 #include <fnmatch.h>
59 #include <getopt.h>
61 #define NDEBUG
62 #include <assert.h>
64 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
65 #include <string.h>
66 #else
67 #include <strings.h>
68 #define strchr index
69 #endif
71 #ifdef STDC_HEADERS
72 #include <stdlib.h>
73 #endif
75 #ifdef HAVE_ERRNO_H
76 #include <errno.h>
77 #else
78 extern int errno;
79 #endif
81 #ifdef HAVE_LOCALE_H
82 #include <locale.h>
83 #endif
85 #if ENABLE_NLS
86 # include <libintl.h>
87 # define _(Text) gettext (Text)
88 #else
89 # define _(Text) Text
90 #define textdomain(Domain)
91 #define bindtextdomain(Package, Directory)
92 #endif
93 #ifdef gettext_noop
94 # define N_(String) gettext_noop (String)
95 #else
96 /* We used to use (String) instead of just String, but HP-UX 11.23 for
97 * ia64 has what seems to be a compiler bug, because it refuses input
98 * like: static const char buf[] = ("string");i
100 # define N_(String) String
101 #endif
103 #include "locatedb.h"
104 #include <getline.h>
106 /* Note that this evaluates C many times. */
107 #ifdef _LIBC
108 # define TOUPPER(Ch) toupper (Ch)
109 # define TOLOWER(Ch) tolower (Ch)
110 #else
111 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
112 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
113 #endif
115 typedef enum {false, true} boolean;
117 /* Warn if a database is older than this. 8 days allows for a weekly
118 update that takes up to a day to perform. */
119 #define WARN_NUMBER_UNITS (8)
120 /* Printable name of units used in WARN_SECONDS */
121 static const char warn_name_units[] = N_("days");
122 #define SECONDS_PER_UNIT (60 * 60 * 24)
124 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
126 /* Check for existence of files before printing them out? */
127 int check_existence = 0;
129 char *next_element ();
130 char *xmalloc ();
131 char *xrealloc ();
132 void error ();
134 /* Read in a 16-bit int, high byte first (network byte order). */
136 static short
137 get_short (fp)
138 FILE *fp;
141 register short x;
143 x = fgetc (fp) << 8;
144 x |= (fgetc (fp) & 0xff);
145 return x;
148 /* Return a pointer to the last character in a static copy of the last
149 glob-free subpattern in NAME,
150 with '\0' prepended for a fast backwards pre-match. */
152 static char *
153 last_literal_end (name)
154 char *name;
156 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
157 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
158 register char *subp; /* Return value. */
159 register char *p; /* Search location in NAME. */
161 /* Find the end of the subpattern.
162 Skip trailing metacharacters and [] ranges. */
163 for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
164 p--)
166 if (*p == ']')
167 while (p >= name && *p != '[')
168 p--;
170 if (p < name)
171 p = name;
173 if (p - name + 3 > gfalloc)
175 gfalloc = p - name + 3 + 64; /* Room to grow. */
176 globfree = xrealloc (globfree, gfalloc);
178 subp = globfree;
179 *subp++ = '\0';
181 /* If the pattern has only metacharacters, make every path match the
182 subpattern, so it gets checked the slow way. */
183 if (p == name && strchr ("?*[]", *p) != NULL)
184 *subp++ = '/';
185 else
187 char *endmark;
188 /* Find the start of the metacharacter-free subpattern. */
189 for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
191 /* Copy the subpattern into globfree. */
192 for (++p; p <= endmark; )
193 *subp++ = *p++;
195 *subp-- = '\0'; /* Null terminate, though it's not needed. */
197 return subp;
200 /* getstr()
202 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
203 * until we reach DELIMITER or end-of-file. We reallocate the buffer
204 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
205 * is made regarding the content of the data (i.e. the implementation is
206 * 8-bit clean, the only delimiter is DELIMITER).
208 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
209 * has been removed from gnulib.
211 * We call the function locate_read_str() to avoid a name clash with the curses
212 * function getstr().
214 static int locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
216 char * p = NULL;
217 size_t sz = 0;
218 int needed, nread;
220 nread = getdelim(&p, &sz, delimiter, fp);
221 if (nread >= 0)
223 assert(p != NULL);
225 needed = offs + nread;
226 if (needed > (*siz))
228 char *pnew = realloc(*buf, needed);
229 if (NULL == pnew)
231 return -1; /* FAIL */
233 else
235 *siz = needed;
236 *buf = pnew;
239 memcpy((*buf)+offs, p, nread);
240 free(p);
242 return nread;
246 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
247 Return the number of entries printed. */
249 static int
250 locate (pathpart, dbfile, ignore_case)
251 char *pathpart, *dbfile;
252 int ignore_case;
254 /* The pathname database. */
255 FILE *fp;
256 /* An input byte. */
257 int c;
258 /* Number of bytes read from an entry. */
259 int nread;
261 /* true if PATHPART contains globbing metacharacters. */
262 boolean globflag;
263 /* The end of the last glob-free subpattern in PATHPART. */
264 char *patend;
266 /* The current input database entry. */
267 char *path;
268 /* Amount allocated for it. */
269 size_t pathsize;
271 /* The length of the prefix shared with the previous database entry. */
272 int count = 0;
273 /* Where in `path' to stop the backward search for the last character
274 in the subpattern. Set according to `count'. */
275 char *cutoff;
277 /* true if we found a fast match (of patend) on the previous path. */
278 boolean prev_fast_match = false;
279 /* The return value. */
280 int printed = 0;
282 /* true if reading a bigram-encoded database. */
283 boolean old_format = false;
284 /* For the old database format,
285 the first and second characters of the most common bigrams. */
286 char bigram1[128], bigram2[128];
288 /* To check the age of the database. */
289 struct stat st;
290 time_t now;
292 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
294 error (0, errno, "%s", dbfile);
295 return 0;
297 time(&now);
298 if (now - st.st_mtime > WARN_SECONDS)
300 /* For example:
301 warning: database `fred' is more than 8 days old */
302 error (0, 0, _("warning: database `%s' is more than %d %s old"),
303 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
306 pathsize = 1026; /* Increased as necessary by locate_read_str. */
307 path = xmalloc (pathsize);
309 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
310 if (nread != sizeof (LOCATEDB_MAGIC)
311 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
313 int i;
314 /* Read the list of the most common bigrams in the database. */
315 fseek (fp, 0, 0);
316 for (i = 0; i < 128; i++)
318 bigram1[i] = getc (fp);
319 bigram2[i] = getc (fp);
321 old_format = true;
324 /* If we ignore case,
325 convert it to lower first so we don't have to do it every time */
326 if (ignore_case){
327 for (patend=pathpart;*patend;++patend){
328 *patend=TOLOWER(*patend);
333 globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
334 || strchr (pathpart, '[');
336 patend = last_literal_end (pathpart);
338 c = getc (fp);
339 while (c != EOF)
341 register char *s; /* Scan the path we read in. */
343 if (old_format)
345 /* Get the offset in the path where this path info starts. */
346 if (c == LOCATEDB_OLD_ESCAPE)
347 count += getw (fp) - LOCATEDB_OLD_OFFSET;
348 else
349 count += c - LOCATEDB_OLD_OFFSET;
351 /* Overlay the old path with the remainder of the new. */
352 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
353 if (c < 0200)
354 *s++ = c; /* An ordinary character. */
355 else
357 /* Bigram markers have the high bit set. */
358 c &= 0177;
359 *s++ = bigram1[c];
360 *s++ = bigram2[c];
362 *s-- = '\0';
364 else
366 if (c == LOCATEDB_ESCAPE)
367 count += (short)get_short (fp);
368 else if (c > 127)
369 count += c - 256;
370 else
371 count += c;
373 if (count > strlen(path))
375 /* This should not happen generally , but since we're
376 * reading in data which is outside our control, we
377 * cannot prevent it.
379 error(1, 0, _("locate database `%s' is corrupt or invalid"), dbfile);
382 /* Overlay the old path with the remainder of the new. */
383 nread = locate_read_str (&path, &pathsize, fp, 0, count);
384 if (nread < 0)
385 break;
386 c = getc (fp);
387 s = path + count + nread - 2; /* Move to the last char in path. */
388 assert (s[0] != '\0');
389 assert (s[1] == '\0'); /* Our terminator. */
390 assert (s[2] == '\0'); /* Added by locate_read_str. */
393 /* If the previous path matched, scan the whole path for the last
394 char in the subpattern. If not, the shared prefix doesn't match
395 the pattern, so don't scan it for the last char. */
396 cutoff = prev_fast_match ? path : path + count;
398 /* Search backward starting at the end of the path we just read in,
399 for the character at the end of the last glob-free subpattern
400 in PATHPART. */
401 for(prev_fast_match=false; s>=cutoff; s--)
403 char *s2; /* Scan the path we read in. */
404 register char *p2; /* Scan `patend'. */
406 /* Fast first char check. */
407 if(ignore_case)
409 if(TOLOWER(*s)!=*patend)
410 continue;
412 else if(*s!=*patend)
413 continue;
415 if(ignore_case)
416 for(s2=s-1, p2=patend-1; *p2!='\0' && TOLOWER(*s2)==*p2; s2--, p2--);
417 else
418 for(s2=s-1, p2=patend-1; *p2!='\0' && *s2==*p2; s2--, p2--);
420 if(*p2!='\0')
421 continue;
422 /* Success on the fast match. Compare the whole pattern
423 if it contains globbing characters. */
424 prev_fast_match = true;
425 if(globflag)
427 if(ignore_case)
429 if(fnmatch(pathpart,basename(path),FNM_CASEFOLD)!=0)
430 break;
432 else
433 if(fnmatch(pathpart,basename(path),0)!=0)
434 break;
436 if(check_existence && stat(path,&st)!=0)
437 break;
438 puts(path);
439 ++printed;
440 break;
444 if (ferror (fp))
446 error (0, errno, "%s", dbfile);
447 return 0;
449 if (fclose (fp) == EOF)
451 error (0, errno, "%s", dbfile);
452 return 0;
455 return printed;
458 extern char *version_string;
460 /* The name this program was run with. */
461 char *program_name;
463 static void
464 usage (stream)
465 FILE *stream;
467 fprintf (stream, _("\
468 Usage: %s [-d path | --database=path] [-e | --existing]\n\
469 [-i | --ignore-case] [--version] [--help] pattern...\n"),
470 program_name);
471 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
474 static struct option const longopts[] =
476 {"database", required_argument, NULL, 'd'},
477 {"existing", no_argument, NULL, 'e'},
478 {"ignore-case", no_argument, NULL, 'i'},
479 {"help", no_argument, NULL, 'h'},
480 {"version", no_argument, NULL, 'v'},
481 {NULL, no_argument, NULL, 0}
485 main (argc, argv)
486 int argc;
487 char **argv;
489 char *dbpath;
490 int fnmatch_flags = 0;
491 int found = 0, optc;
492 int ignore_case = 0;
494 program_name = argv[0];
496 #ifdef HAVE_SETLOCALE
497 setlocale (LC_ALL, "");
498 #endif
499 bindtextdomain (PACKAGE, LOCALEDIR);
500 textdomain (PACKAGE);
502 dbpath = getenv ("LOCATE_PATH");
503 if (dbpath == NULL)
504 dbpath = LOCATE_DB;
506 check_existence = 0;
508 while ((optc = getopt_long (argc, argv, "d:ei", longopts, (int *) 0)) != -1)
509 switch (optc)
511 case 'd':
512 dbpath = optarg;
513 break;
515 case 'e':
516 check_existence = 1;
517 break;
519 case 'i':
520 ignore_case = 1;
521 fnmatch_flags |= FNM_CASEFOLD;
522 break;
524 case 'h':
525 usage (stdout);
526 return 0;
528 case 'v':
529 printf (_("GNU locate version %s\n"), version_string);
530 return 0;
532 default:
533 usage (stderr);
534 return 1;
537 if (optind == argc)
539 usage (stderr);
540 return 1;
544 for (; optind < argc; optind++)
546 char *e;
547 next_element (dbpath); /* Initialize. */
548 while ((e = next_element ((char *) NULL)) != NULL)
549 found |= locate (argv[optind], e, ignore_case);
552 if (found)
553 return 0;
554 else
555 return 1;