removed unused file cron.updatedb
[findutils.git] / locate / locate.c
blobec2de906c7c930830d6c1fc18a9f08d5289047e9
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* Usage: locate [options] pattern...
20 Scan a pathname list for the full pathname of a file, given only
21 a piece of the name (possibly containing shell globbing metacharacters).
22 The list has been processed with front-compression, which reduces
23 the list size by a factor of 4-5.
24 Recognizes two database formats, old and new. The old format is
25 bigram coded, which reduces space by a further 20-25% and uses the
26 following encoding of the database bytes:
28 0-28 likeliest differential counts + offset (14) to make nonnegative
29 30 escape code for out-of-range count to follow in next halfword
30 128-255 bigram codes (the 128 most common, as determined by `updatedb')
31 32-127 single character (printable) ASCII remainder
33 Uses a novel two-tiered string search technique:
35 First, match a metacharacter-free subpattern and a partial pathname
36 BACKWARDS to avoid full expansion of the pathname list.
37 The time savings is 40-50% over forward matching, which cannot efficiently
38 handle overlapped search patterns and compressed path remainders.
40 Then, match the actual shell glob-style regular expression (if in this form)
41 against the candidate pathnames using the slower shell filename
42 matching routines.
44 Described more fully in Usenix ;login:, Vol 8, No 1,
45 February/March, 1983, p. 8.
47 Written by James A. Woods <jwoods@adobe.com>.
48 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
50 #include <config.h>
51 #include <stdio.h>
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 #include <time.h>
55 #include <fnmatch.h>
56 #include <getopt.h>
58 #define NDEBUG
59 #include <assert.h>
61 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
62 #include <string.h>
63 #else
64 #include <strings.h>
65 #define strchr index
66 #endif
68 #ifdef STDC_HEADERS
69 #include <stdlib.h>
70 #endif
72 #ifdef STDC_HEADERS
73 #include <errno.h>
74 #include <stdlib.h>
75 #else
76 extern int errno;
77 #endif
79 #include "locatedb.h"
81 typedef enum {false, true} boolean;
83 /* Warn if a database is older than this. 8 days allows for a weekly
84 update that takes up to a day to perform. */
85 #define WARN_SECONDS (60 * 60 * 24 * 8)
87 /* Printable version of WARN_SECONDS. */
88 #define WARN_MESSAGE "8 days"
90 /* Check for existence of files before printing them out? */
91 int check_existence = 0;
93 char *next_element ();
94 char *xmalloc ();
95 char *xrealloc ();
96 void error ();
98 /* Read in a 16-bit int, high byte first (network byte order). */
100 static int
101 get_short (fp)
102 FILE *fp;
105 register short x;
107 x = fgetc (fp) << 8;
108 x |= (fgetc (fp) & 0xff);
109 return x;
112 /* Return a pointer to the last character in a static copy of the last
113 glob-free subpattern in NAME,
114 with '\0' prepended for a fast backwards pre-match. */
116 static char *
117 last_literal_end (name)
118 char *name;
120 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
121 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
122 register char *subp; /* Return value. */
123 register char *p; /* Search location in NAME. */
125 /* Find the end of the subpattern.
126 Skip trailing metacharacters and [] ranges. */
127 for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
128 p--)
130 if (*p == ']')
131 while (p >= name && *p != '[')
132 p--;
134 if (p < name)
135 p = name;
137 if (p - name + 3 > gfalloc)
139 gfalloc = p - name + 3 + 64; /* Room to grow. */
140 globfree = xrealloc (globfree, gfalloc);
142 subp = globfree;
143 *subp++ = '\0';
145 /* If the pattern has only metacharacters, make every path match the
146 subpattern, so it gets checked the slow way. */
147 if (p == name && strchr ("?*[]", *p) != NULL)
148 *subp++ = '/';
149 else
151 char *endmark;
152 /* Find the start of the metacharacter-free subpattern. */
153 for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
155 /* Copy the subpattern into globfree. */
156 for (++p; p <= endmark; )
157 *subp++ = *p++;
159 *subp-- = '\0'; /* Null terminate, though it's not needed. */
161 return subp;
164 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
165 Return the number of entries printed. */
167 static int
168 locate (pathpart, dbfile)
169 char *pathpart, *dbfile;
171 /* The pathname database. */
172 FILE *fp;
173 /* An input byte. */
174 int c;
175 /* Number of bytes read from an entry. */
176 int nread;
178 /* true if PATHPART contains globbing metacharacters. */
179 boolean globflag;
180 /* The end of the last glob-free subpattern in PATHPART. */
181 char *patend;
183 /* The current input database entry. */
184 char *path;
185 /* Amount allocated for it. */
186 size_t pathsize;
188 /* The length of the prefix shared with the previous database entry. */
189 int count = 0;
190 /* Where in `path' to stop the backward search for the last character
191 in the subpattern. Set according to `count'. */
192 char *cutoff;
194 /* true if we found a fast match (of patend) on the previous path. */
195 boolean prev_fast_match = false;
196 /* The return value. */
197 int printed = 0;
199 /* true if reading a bigram-encoded database. */
200 boolean old_format = false;
201 /* For the old database format,
202 the first and second characters of the most common bigrams. */
203 char bigram1[128], bigram2[128];
205 /* To check the age of the database. */
206 struct stat st;
207 time_t now;
209 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
211 error (0, errno, "%s", dbfile);
212 return 0;
214 time(&now);
215 if (now - st.st_mtime > WARN_SECONDS)
217 error (0, 0, "warning: database `%s' is more than %s old",
218 dbfile, WARN_MESSAGE);
221 pathsize = 1026; /* Increased as necessary by getstr. */
222 path = xmalloc (pathsize);
224 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
225 if (nread != sizeof (LOCATEDB_MAGIC)
226 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
228 int i;
229 /* Read the list of the most common bigrams in the database. */
230 fseek (fp, 0, 0);
231 for (i = 0; i < 128; i++)
233 bigram1[i] = getc (fp);
234 bigram2[i] = getc (fp);
236 old_format = true;
239 globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
240 || strchr (pathpart, '[');
242 patend = last_literal_end (pathpart);
244 c = getc (fp);
245 while (c != EOF)
247 register char *s; /* Scan the path we read in. */
249 if (old_format)
251 /* Get the offset in the path where this path info starts. */
252 if (c == LOCATEDB_OLD_ESCAPE)
253 count += getw (fp) - LOCATEDB_OLD_OFFSET;
254 else
255 count += c - LOCATEDB_OLD_OFFSET;
257 /* Overlay the old path with the remainder of the new. */
258 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
259 if (c < 0200)
260 *s++ = c; /* An ordinary character. */
261 else
263 /* Bigram markers have the high bit set. */
264 c &= 0177;
265 *s++ = bigram1[c];
266 *s++ = bigram2[c];
268 *s-- = '\0';
270 else
272 if (c == LOCATEDB_ESCAPE)
273 count += get_short (fp);
274 else if (c > 127)
275 count += c - 256;
276 else
277 count += c;
279 /* Overlay the old path with the remainder of the new. */
280 nread = getstr (&path, &pathsize, fp, '\0', count);
281 if (nread < 0)
282 break;
283 c = getc (fp);
284 s = path + count + nread - 2; /* Move to the last char in path. */
285 assert (s[0] != '\0');
286 assert (s[1] == '\0'); /* Our terminator. */
287 assert (s[2] == '\0'); /* Added by getstr. */
290 /* If the previous path matched, scan the whole path for the last
291 char in the subpattern. If not, the shared prefix doesn't match
292 the pattern, so don't scan it for the last char. */
293 cutoff = prev_fast_match ? path : path + count;
295 /* Search backward starting at the end of the path we just read in,
296 for the character at the end of the last glob-free subpattern
297 in PATHPART. */
298 for (prev_fast_match = false; s >= cutoff; s--)
299 /* Fast first char check. */
300 if (*s == *patend)
302 char *s2; /* Scan the path we read in. */
303 register char *p2; /* Scan `patend'. */
305 for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && *s2 == *p2;
306 s2--, p2--)
308 if (*p2 == '\0')
310 /* Success on the fast match. Compare the whole pattern
311 if it contains globbing characters. */
312 prev_fast_match = true;
313 if (globflag == false || fnmatch (pathpart, path, 0) == 0)
315 if (!check_existence || stat(path, &st) == 0)
317 puts (path);
318 ++printed;
321 break;
326 if (ferror (fp))
328 error (0, errno, "%s", dbfile);
329 return 0;
331 if (fclose (fp) == EOF)
333 error (0, errno, "%s", dbfile);
334 return 0;
337 return printed;
340 extern char *version_string;
342 /* The name this program was run with. */
343 char *program_name;
345 static void
346 usage (stream, status)
347 FILE *stream;
348 int status;
350 fprintf (stream, "\
351 Usage: %s [-d path | --database=path] [--version] [--help]\n"
352 " [-e | --existing] pattern...\n",
353 program_name);
354 exit (status);
357 static struct option const longopts[] =
359 {"database", required_argument, NULL, 'd'},
360 {"existing", no_argument, NULL, 'e'},
361 {"help", no_argument, NULL, 'h'},
362 {"version", no_argument, NULL, 'v'},
363 {NULL, no_argument, NULL, 0}
366 void
367 main (argc, argv)
368 int argc;
369 char **argv;
371 char *dbpath;
372 int found = 0, optc;
374 program_name = argv[0];
376 dbpath = getenv ("LOCATE_PATH");
377 if (dbpath == NULL)
378 dbpath = LOCATE_DB;
380 check_existence = 0;
382 while ((optc = getopt_long (argc, argv, "d:e", longopts, (int *) 0)) != -1)
383 switch (optc)
385 case 'd':
386 dbpath = optarg;
387 break;
389 case 'e':
390 check_existence = 1;
391 break;
393 case 'h':
394 usage (stdout, 0);
396 case 'v':
397 printf ("GNU locate version %s\n", version_string);
398 exit (0);
400 default:
401 usage (stderr, 1);
404 if (optind == argc)
405 usage (stderr, 1);
407 for (; optind < argc; optind++)
409 char *e;
410 next_element (dbpath); /* Initialize. */
411 while ((e = next_element ((char *) NULL)) != NULL)
412 found |= locate (argv[optind], e);
415 exit (!found);