*** empty log message ***
[findutils.git] / locate / locate.c
blobf7dc521459841840be1e257618e9a2d7a3191cce
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* Usage: locate [options] pattern...
20 Scan a pathname list for the full pathname of a file, given only
21 a piece of the name (possibly containing shell globbing metacharacters).
22 The list has been processed with front-compression, which reduces
23 the list size by a factor of 4-5.
24 Recognizes two database formats, old and new. The old format is
25 bigram coded, which reduces space by a further 20-25% and uses the
26 following encoding of the database bytes:
28 0-28 likeliest differential counts + offset (14) to make nonnegative
29 30 escape code for out-of-range count to follow in next halfword
30 128-255 bigram codes (the 128 most common, as determined by `updatedb')
31 32-127 single character (printable) ASCII remainder
33 Uses a novel two-tiered string search technique:
35 First, match a metacharacter-free subpattern and a partial pathname
36 BACKWARDS to avoid full expansion of the pathname list.
37 The time savings is 40-50% over forward matching, which cannot efficiently
38 handle overlapped search patterns and compressed path remainders.
40 Then, match the actual shell glob-style regular expression (if in this form)
41 against the candidate pathnames using the slower shell filename
42 matching routines.
44 Described more fully in Usenix ;login:, Vol 8, No 1,
45 February/March, 1983, p. 8.
47 Written by James A. Woods <jwoods@adobe.com>.
48 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
50 #include <config.h>
51 #include <stdio.h>
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 #include <time.h>
55 #include <fnmatch.h>
56 #include <getopt.h>
58 #define NDEBUG
59 #include <assert.h>
61 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
62 #include <string.h>
63 #else
64 #include <strings.h>
65 #define strchr index
66 #endif
68 #ifdef STDC_HEADERS
69 #include <stdlib.h>
70 #else
71 char *getenv ();
72 #endif
74 #ifdef STDC_HEADERS
75 #include <errno.h>
76 #include <stdlib.h>
77 #else
78 extern int errno;
79 #endif
81 #include "locatedb.h"
83 typedef enum {false, true} boolean;
85 /* Warn if a database is older than this. 8 days allows for a weekly
86 update that takes up to a day to perform. */
87 #define WARN_SECONDS (60 * 60 * 24 * 8)
89 /* Printable version of WARN_SECONDS. */
90 #define WARN_MESSAGE "8 days"
92 /* Check for existence of files before printing them out? */
93 int check_existence = 0;
95 char *next_element ();
96 char *xmalloc ();
97 char *xrealloc ();
98 void error ();
100 /* Read in a 16-bit int, high byte first (network byte order). */
102 static int
103 get_short (fp)
104 FILE *fp;
107 register short x;
109 x = fgetc (fp) << 8;
110 x |= (fgetc (fp) & 0xff);
111 return x;
114 /* Return a pointer to the last character in a static copy of the last
115 glob-free subpattern in NAME,
116 with '\0' prepended for a fast backwards pre-match. */
118 static char *
119 last_literal_end (name)
120 char *name;
122 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
123 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
124 register char *subp; /* Return value. */
125 register char *p; /* Search location in NAME. */
127 /* Find the end of the subpattern.
128 Skip trailing metacharacters and [] ranges. */
129 for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
130 p--)
132 if (*p == ']')
133 while (p >= name && *p != '[')
134 p--;
136 if (p < name)
137 p = name;
139 if (p - name + 3 > gfalloc)
141 gfalloc = p - name + 3 + 64; /* Room to grow. */
142 globfree = xrealloc (globfree, gfalloc);
144 subp = globfree;
145 *subp++ = '\0';
147 /* If the pattern has only metacharacters, make every path match the
148 subpattern, so it gets checked the slow way. */
149 if (p == name && strchr ("?*[]", *p) != NULL)
150 *subp++ = '/';
151 else
153 char *endmark;
154 /* Find the start of the metacharacter-free subpattern. */
155 for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
157 /* Copy the subpattern into globfree. */
158 for (++p; p <= endmark; )
159 *subp++ = *p++;
161 *subp-- = '\0'; /* Null terminate, though it's not needed. */
163 return subp;
166 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
167 Return the number of entries printed. */
169 static int
170 locate (pathpart, dbfile)
171 char *pathpart, *dbfile;
173 /* The pathname database. */
174 FILE *fp;
175 /* An input byte. */
176 int c;
177 /* Number of bytes read from an entry. */
178 int nread;
180 /* true if PATHPART contains globbing metacharacters. */
181 boolean globflag;
182 /* The end of the last glob-free subpattern in PATHPART. */
183 char *patend;
185 /* The current input database entry. */
186 char *path;
187 /* Amount allocated for it. */
188 size_t pathsize;
190 /* The length of the prefix shared with the previous database entry. */
191 int count = 0;
192 /* Where in `path' to stop the backward search for the last character
193 in the subpattern. Set according to `count'. */
194 char *cutoff;
196 /* true if we found a fast match (of patend) on the previous path. */
197 boolean prev_fast_match = false;
198 /* The return value. */
199 int printed = 0;
201 /* true if reading a bigram-encoded database. */
202 boolean old_format = false;
203 /* For the old database format,
204 the first and second characters of the most common bigrams. */
205 char bigram1[128], bigram2[128];
207 /* To check the age of the database. */
208 struct stat st;
209 time_t now;
211 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
213 error (0, errno, "%s", dbfile);
214 return 0;
216 time(&now);
217 if (now - st.st_mtime > WARN_SECONDS)
219 error (0, 0, "warning: database `%s' is more than %s old",
220 dbfile, WARN_MESSAGE);
223 pathsize = 1026; /* Increased as necessary by getstr. */
224 path = xmalloc (pathsize);
226 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
227 if (nread != sizeof (LOCATEDB_MAGIC)
228 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
230 int i;
231 /* Read the list of the most common bigrams in the database. */
232 fseek (fp, 0, 0);
233 for (i = 0; i < 128; i++)
235 bigram1[i] = getc (fp);
236 bigram2[i] = getc (fp);
238 old_format = true;
241 globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
242 || strchr (pathpart, '[');
244 patend = last_literal_end (pathpart);
246 c = getc (fp);
247 while (c != EOF)
249 register char *s; /* Scan the path we read in. */
251 if (old_format)
253 /* Get the offset in the path where this path info starts. */
254 if (c == LOCATEDB_OLD_ESCAPE)
255 count += getw (fp) - LOCATEDB_OLD_OFFSET;
256 else
257 count += c - LOCATEDB_OLD_OFFSET;
259 /* Overlay the old path with the remainder of the new. */
260 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
261 if (c < 0200)
262 *s++ = c; /* An ordinary character. */
263 else
265 /* Bigram markers have the high bit set. */
266 c &= 0177;
267 *s++ = bigram1[c];
268 *s++ = bigram2[c];
270 *s-- = '\0';
272 else
274 if (c == LOCATEDB_ESCAPE)
275 count += get_short (fp);
276 else if (c > 127)
277 count += c - 256;
278 else
279 count += c;
281 /* Overlay the old path with the remainder of the new. */
282 nread = getstr (&path, &pathsize, fp, '\0', count);
283 if (nread < 0)
284 break;
285 c = getc (fp);
286 s = path + count + nread - 2; /* Move to the last char in path. */
287 assert (s[0] != '\0');
288 assert (s[1] == '\0'); /* Our terminator. */
289 assert (s[2] == '\0'); /* Added by getstr. */
292 /* If the previous path matched, scan the whole path for the last
293 char in the subpattern. If not, the shared prefix doesn't match
294 the pattern, so don't scan it for the last char. */
295 cutoff = prev_fast_match ? path : path + count;
297 /* Search backward starting at the end of the path we just read in,
298 for the character at the end of the last glob-free subpattern
299 in PATHPART. */
300 for (prev_fast_match = false; s >= cutoff; s--)
301 /* Fast first char check. */
302 if (*s == *patend)
304 char *s2; /* Scan the path we read in. */
305 register char *p2; /* Scan `patend'. */
307 for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && *s2 == *p2;
308 s2--, p2--)
310 if (*p2 == '\0')
312 /* Success on the fast match. Compare the whole pattern
313 if it contains globbing characters. */
314 prev_fast_match = true;
315 if (globflag == false || fnmatch (pathpart, path, 0) == 0)
317 if (!check_existence || stat(path, &st) == 0)
319 puts (path);
320 ++printed;
323 break;
328 if (ferror (fp))
330 error (0, errno, "%s", dbfile);
331 return 0;
333 if (fclose (fp) == EOF)
335 error (0, errno, "%s", dbfile);
336 return 0;
339 return printed;
342 extern char *version_string;
344 /* The name this program was run with. */
345 char *program_name;
347 static void
348 usage (stream, status)
349 FILE *stream;
350 int status;
352 fprintf (stream, "\
353 Usage: %s [-d path | --database=path] [--version] [--help]\n"
354 " [-e | --existing] pattern...\n",
355 program_name);
356 exit (status);
359 static struct option const longopts[] =
361 {"database", required_argument, NULL, 'd'},
362 {"existing", no_argument, NULL, 'e'},
363 {"help", no_argument, NULL, 'h'},
364 {"version", no_argument, NULL, 'v'},
365 {NULL, no_argument, NULL, 0}
368 void
369 main (argc, argv)
370 int argc;
371 char **argv;
373 char *dbpath;
374 int found = 0, optc;
376 program_name = argv[0];
378 dbpath = getenv ("LOCATE_PATH");
379 if (dbpath == NULL)
380 dbpath = LOCATE_DB;
382 check_existence = 0;
384 while ((optc = getopt_long (argc, argv, "d:e", longopts, (int *) 0)) != -1)
385 switch (optc)
387 case 'd':
388 dbpath = optarg;
389 break;
391 case 'e':
392 check_existence = 1;
393 break;
395 case 'h':
396 usage (stdout, 0);
398 case 'v':
399 printf ("GNU locate version %s\n", version_string);
400 exit (0);
402 default:
403 usage (stderr, 1);
406 if (optind == argc)
407 usage (stderr, 1);
409 for (; optind < argc; optind++)
411 char *e;
412 next_element (dbpath); /* Initialize. */
413 while ((e = next_element ((char *) NULL)) != NULL)
414 found |= locate (argv[optind], e);
417 exit (!found);