* xargs/Makefile.am: add ansi2knr
[findutils.git] / locate / locate.c
blob4e5041f5b4b03b512461d80482415f39edd35ae0
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* Usage: locate [options] pattern...
20 Scan a pathname list for the full pathname of a file, given only
21 a piece of the name (possibly containing shell globbing metacharacters).
22 The list has been processed with front-compression, which reduces
23 the list size by a factor of 4-5.
24 Recognizes two database formats, old and new. The old format is
25 bigram coded, which reduces space by a further 20-25% and uses the
26 following encoding of the database bytes:
28 0-28 likeliest differential counts + offset (14) to make nonnegative
29 30 escape code for out-of-range count to follow in next halfword
30 128-255 bigram codes (the 128 most common, as determined by `updatedb')
31 32-127 single character (printable) ASCII remainder
33 Uses a novel two-tiered string search technique:
35 First, match a metacharacter-free subpattern and a partial pathname
36 BACKWARDS to avoid full expansion of the pathname list.
37 The time savings is 40-50% over forward matching, which cannot efficiently
38 handle overlapped search patterns and compressed path remainders.
40 Then, match the actual shell glob-style regular expression (if in this form)
41 against the candidate pathnames using the slower shell filename
42 matching routines.
44 Described more fully in Usenix ;login:, Vol 8, No 1,
45 February/March, 1983, p. 8.
47 Written by James A. Woods <jwoods@adobe.com>.
48 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
50 #include <config.h>
51 #include <stdio.h>
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 #include <time.h>
55 #include <fnmatch.h>
56 #include <getopt.h>
58 #define NDEBUG
59 #include <assert.h>
61 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
62 #include <string.h>
63 #else
64 #include <strings.h>
65 #define strchr index
66 #endif
68 #ifdef STDC_HEADERS
69 #include <stdlib.h>
70 #endif
72 #ifdef HAVE_ERRNO_H
73 #include <errno.h>
74 #else
75 extern int errno;
76 #endif
78 #ifdef HAVE_LOCALE_H
79 #include <locale.h>
80 #endif
82 #if ENABLE_NLS
83 # include <libintl.h>
84 # define _(Text) gettext (Text)
85 #else
86 # define _(Text) Text
87 #define textdomain(Domain)
88 #define bindtextdomain(Package, Directory)
89 #endif
90 #ifdef gettext_noop
91 # define N_(String) gettext_noop (String)
92 #else
93 # define N_(String) (String)
94 #endif
96 #include "locatedb.h"
97 #include <getline.h>
99 typedef enum {false, true} boolean;
101 /* Warn if a database is older than this. 8 days allows for a weekly
102 update that takes up to a day to perform. */
103 #define WARN_NUMBER_UNITS (8)
104 /* Printable name of units used in WARN_SECONDS */
105 static const char warn_name_units[] = N_("days");
106 #define SECONDS_PER_UNIT (60 * 60 * 24)
108 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
110 /* Check for existence of files before printing them out? */
111 int check_existence = 0;
113 char *next_element ();
114 char *xmalloc ();
115 char *xrealloc ();
116 void error ();
118 /* Read in a 16-bit int, high byte first (network byte order). */
120 static int
121 get_short (fp)
122 FILE *fp;
125 register short x;
127 x = fgetc (fp) << 8;
128 x |= (fgetc (fp) & 0xff);
129 return x;
132 /* Return a pointer to the last character in a static copy of the last
133 glob-free subpattern in NAME,
134 with '\0' prepended for a fast backwards pre-match. */
136 static char *
137 last_literal_end (name)
138 char *name;
140 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
141 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
142 register char *subp; /* Return value. */
143 register char *p; /* Search location in NAME. */
145 /* Find the end of the subpattern.
146 Skip trailing metacharacters and [] ranges. */
147 for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
148 p--)
150 if (*p == ']')
151 while (p >= name && *p != '[')
152 p--;
154 if (p < name)
155 p = name;
157 if (p - name + 3 > gfalloc)
159 gfalloc = p - name + 3 + 64; /* Room to grow. */
160 globfree = xrealloc (globfree, gfalloc);
162 subp = globfree;
163 *subp++ = '\0';
165 /* If the pattern has only metacharacters, make every path match the
166 subpattern, so it gets checked the slow way. */
167 if (p == name && strchr ("?*[]", *p) != NULL)
168 *subp++ = '/';
169 else
171 char *endmark;
172 /* Find the start of the metacharacter-free subpattern. */
173 for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
175 /* Copy the subpattern into globfree. */
176 for (++p; p <= endmark; )
177 *subp++ = *p++;
179 *subp-- = '\0'; /* Null terminate, though it's not needed. */
181 return subp;
184 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
185 Return the number of entries printed. */
187 static int
188 locate (pathpart, dbfile)
189 char *pathpart, *dbfile;
191 /* The pathname database. */
192 FILE *fp;
193 /* An input byte. */
194 int c;
195 /* Number of bytes read from an entry. */
196 int nread;
198 /* true if PATHPART contains globbing metacharacters. */
199 boolean globflag;
200 /* The end of the last glob-free subpattern in PATHPART. */
201 char *patend;
203 /* The current input database entry. */
204 char *path;
205 /* Amount allocated for it. */
206 size_t pathsize;
208 /* The length of the prefix shared with the previous database entry. */
209 int count = 0;
210 /* Where in `path' to stop the backward search for the last character
211 in the subpattern. Set according to `count'. */
212 char *cutoff;
214 /* true if we found a fast match (of patend) on the previous path. */
215 boolean prev_fast_match = false;
216 /* The return value. */
217 int printed = 0;
219 /* true if reading a bigram-encoded database. */
220 boolean old_format = false;
221 /* For the old database format,
222 the first and second characters of the most common bigrams. */
223 char bigram1[128], bigram2[128];
225 /* To check the age of the database. */
226 struct stat st;
227 time_t now;
229 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
231 error (0, errno, "%s", dbfile);
232 return 0;
234 time(&now);
235 if (now - st.st_mtime > WARN_SECONDS)
237 /* For example:
238 warning: database `fred' is more than 8 days old */
239 error (0, 0, _("warning: database `%s' is more than %d %s old"),
240 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
243 pathsize = 1026; /* Increased as necessary by getstr. */
244 path = xmalloc (pathsize);
246 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
247 if (nread != sizeof (LOCATEDB_MAGIC)
248 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
250 int i;
251 /* Read the list of the most common bigrams in the database. */
252 fseek (fp, 0, 0);
253 for (i = 0; i < 128; i++)
255 bigram1[i] = getc (fp);
256 bigram2[i] = getc (fp);
258 old_format = true;
261 globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
262 || strchr (pathpart, '[');
264 patend = last_literal_end (pathpart);
266 c = getc (fp);
267 while (c != EOF)
269 register char *s; /* Scan the path we read in. */
271 if (old_format)
273 /* Get the offset in the path where this path info starts. */
274 if (c == LOCATEDB_OLD_ESCAPE)
275 count += getw (fp) - LOCATEDB_OLD_OFFSET;
276 else
277 count += c - LOCATEDB_OLD_OFFSET;
279 /* Overlay the old path with the remainder of the new. */
280 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
281 if (c < 0200)
282 *s++ = c; /* An ordinary character. */
283 else
285 /* Bigram markers have the high bit set. */
286 c &= 0177;
287 *s++ = bigram1[c];
288 *s++ = bigram2[c];
290 *s-- = '\0';
292 else
294 if (c == LOCATEDB_ESCAPE)
295 count += get_short (fp);
296 else if (c > 127)
297 count += c - 256;
298 else
299 count += c;
301 /* Overlay the old path with the remainder of the new. */
302 nread = getstr (&path, &pathsize, fp, '\0', count);
303 if (nread < 0)
304 break;
305 c = getc (fp);
306 s = path + count + nread - 2; /* Move to the last char in path. */
307 assert (s[0] != '\0');
308 assert (s[1] == '\0'); /* Our terminator. */
309 assert (s[2] == '\0'); /* Added by getstr. */
312 /* If the previous path matched, scan the whole path for the last
313 char in the subpattern. If not, the shared prefix doesn't match
314 the pattern, so don't scan it for the last char. */
315 cutoff = prev_fast_match ? path : path + count;
317 /* Search backward starting at the end of the path we just read in,
318 for the character at the end of the last glob-free subpattern
319 in PATHPART. */
320 for (prev_fast_match = false; s >= cutoff; s--)
321 /* Fast first char check. */
322 if (*s == *patend)
324 char *s2; /* Scan the path we read in. */
325 register char *p2; /* Scan `patend'. */
327 for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && *s2 == *p2;
328 s2--, p2--)
330 if (*p2 == '\0')
332 /* Success on the fast match. Compare the whole pattern
333 if it contains globbing characters. */
334 prev_fast_match = true;
335 if (globflag == false || fnmatch (pathpart, path, 0) == 0)
337 if (!check_existence || stat(path, &st) == 0)
339 puts (path);
340 ++printed;
343 break;
348 if (ferror (fp))
350 error (0, errno, "%s", dbfile);
351 return 0;
353 if (fclose (fp) == EOF)
355 error (0, errno, "%s", dbfile);
356 return 0;
359 return printed;
362 extern char *version_string;
364 /* The name this program was run with. */
365 char *program_name;
367 static void
368 usage (stream, status)
369 FILE *stream;
370 int status;
372 fprintf (stream, _("\
373 Usage: %s [-d path | --database=path] [--version] [--help]\n\
374 [-e | --existing] pattern...\n"),
375 program_name);
376 exit (status);
379 static struct option const longopts[] =
381 {"database", required_argument, NULL, 'd'},
382 {"existing", no_argument, NULL, 'e'},
383 {"help", no_argument, NULL, 'h'},
384 {"version", no_argument, NULL, 'v'},
385 {NULL, no_argument, NULL, 0}
389 main (argc, argv)
390 int argc;
391 char **argv;
393 char *dbpath;
394 int found = 0, optc;
396 program_name = argv[0];
398 #ifdef HAVE_SETLOCALE
399 setlocale (LC_ALL, "");
400 #endif
401 bindtextdomain (PACKAGE, LOCALEDIR);
402 textdomain (PACKAGE);
404 dbpath = getenv ("LOCATE_PATH");
405 if (dbpath == NULL)
406 dbpath = LOCATE_DB;
408 check_existence = 0;
410 while ((optc = getopt_long (argc, argv, "d:e", longopts, (int *) 0)) != -1)
411 switch (optc)
413 case 'd':
414 dbpath = optarg;
415 break;
417 case 'e':
418 check_existence = 1;
419 break;
421 case 'h':
422 usage (stdout, 0);
424 case 'v':
425 printf (_("GNU locate version %s\n"), version_string);
426 exit (0);
428 default:
429 usage (stderr, 1);
432 if (optind == argc)
433 usage (stderr, 1);
435 for (; optind < argc; optind++)
437 char *e;
438 next_element (dbpath); /* Initialize. */
439 while ((e = next_element ((char *) NULL)) != NULL)
440 found |= locate (argv[optind], e);
443 exit (!found);