Work around what appears to be a C compiler bug in HP-UX 11.23 for
[findutils.git] / locate / locate.c
blob63597783957bb0cc6b9e4035ff8910a4ac916879
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 9 Temple Place - Suite 330, Boston, MA 02111-1307,
17 USA.
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
44 matching routines.
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
52 #define _GNU_SOURCE
53 #include <gnulib/config.h>
54 #undef VERSION
55 #undef PACKAGE_VERSION
56 #undef PACKAGE_TARNAME
57 #undef PACKAGE_STRING
58 #undef PACKAGE_NAME
59 #undef PACKAGE
60 #include <config.h>
61 #include <stdio.h>
62 #include <ctype.h>
63 #include <sys/types.h>
64 #include <sys/stat.h>
65 #include <time.h>
66 #include <fnmatch.h>
67 #include <getopt.h>
69 #define NDEBUG
70 #include <assert.h>
72 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
73 #include <string.h>
74 #else
75 #include <strings.h>
76 #define strchr index
77 #endif
79 #ifdef STDC_HEADERS
80 #include <stdlib.h>
81 #endif
83 #ifdef HAVE_ERRNO_H
84 #include <errno.h>
85 #else
86 extern int errno;
87 #endif
89 #ifdef HAVE_LOCALE_H
90 #include <locale.h>
91 #endif
93 #if ENABLE_NLS
94 # include <libintl.h>
95 # define _(Text) gettext (Text)
96 #else
97 # define _(Text) Text
98 #define textdomain(Domain)
99 #define bindtextdomain(Package, Directory)
100 #endif
101 #ifdef gettext_noop
102 # define N_(String) gettext_noop (String)
103 #else
104 /* We used to use (String) instead of just String, but HP-UX 11.23 for
105 * ia64 has what seems to be a compiler bug, because it refuses input
106 * like: static const char buf[] = ("string");
108 # define N_(String) String
109 #endif
111 #include "locatedb.h"
112 #include <getline.h>
114 /* Note that this evaluates C many times. */
115 #ifdef _LIBC
116 # define TOUPPER(Ch) toupper (Ch)
117 # define TOLOWER(Ch) tolower (Ch)
118 #else
119 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
120 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
121 #endif
123 typedef enum {false, true} boolean;
125 /* Warn if a database is older than this. 8 days allows for a weekly
126 update that takes up to a day to perform. */
127 #define WARN_NUMBER_UNITS (8)
128 /* Printable name of units used in WARN_SECONDS */
129 static const char warn_name_units[] = N_("days");
130 #define SECONDS_PER_UNIT (60 * 60 * 24)
132 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
134 /* Check for existence of files before printing them out? */
135 int check_existence = 0;
137 char *next_element ();
138 char *xmalloc ();
139 char *xrealloc ();
140 void error ();
142 /* Read in a 16-bit int, high byte first (network byte order). */
144 static short
145 get_short (fp)
146 FILE *fp;
149 register short x;
151 x = fgetc (fp) << 8;
152 x |= (fgetc (fp) & 0xff);
153 return x;
156 /* Return a pointer to the last character in a static copy of the last
157 glob-free subpattern in NAME,
158 with '\0' prepended for a fast backwards pre-match. */
160 static char *
161 last_literal_end (name)
162 char *name;
164 static char *globfree = NULL; /* A copy of the subpattern in NAME. */
165 static size_t gfalloc = 0; /* Bytes allocated for `globfree'. */
166 register char *subp; /* Return value. */
167 register char *p; /* Search location in NAME. */
169 /* Find the end of the subpattern.
170 Skip trailing metacharacters and [] ranges. */
171 for (p = name + strlen (name) - 1; p >= name && strchr ("*?]", *p) != NULL;
172 p--)
174 if (*p == ']')
175 while (p >= name && *p != '[')
176 p--;
178 if (p < name)
179 p = name;
181 if (p - name + 3 > gfalloc)
183 gfalloc = p - name + 3 + 64; /* Room to grow. */
184 globfree = xrealloc (globfree, gfalloc);
186 subp = globfree;
187 *subp++ = '\0';
189 /* If the pattern has only metacharacters, make every path match the
190 subpattern, so it gets checked the slow way. */
191 if (p == name && strchr ("?*[]", *p) != NULL)
192 *subp++ = '/';
193 else
195 char *endmark;
196 /* Find the start of the metacharacter-free subpattern. */
197 for (endmark = p; p >= name && strchr ("]*?", *p) == NULL; p--)
199 /* Copy the subpattern into globfree. */
200 for (++p; p <= endmark; )
201 *subp++ = *p++;
203 *subp-- = '\0'; /* Null terminate, though it's not needed. */
205 return subp;
208 /* getstr()
210 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
211 * until we reach DELIMITER or end-of-file. We reallocate the buffer
212 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
213 * is made regarding the content of the data (i.e. the implementation is
214 * 8-bit clean, the only delimiter is DELIMITER).
216 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
217 * has been removed from gnulib.
219 * We call the function locate_read_str() to avoid a name clash with the curses
220 * function getstr().
222 static int locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
224 char * p = NULL;
225 size_t sz = 0;
226 int needed, nread;
228 nread = getdelim(&p, &sz, delimiter, fp);
229 if (nread >= 0)
231 assert(p != NULL);
233 needed = offs + nread;
234 if (needed > (*siz))
236 char *pnew = realloc(*buf, needed);
237 if (NULL == pnew)
239 return -1; /* FAIL */
241 else
243 *siz = needed;
244 *buf = pnew;
247 memcpy((*buf)+offs, p, nread);
248 free(p);
250 return nread;
254 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
255 Return the number of entries printed. */
257 static int
258 locate (pathpart, dbfile, ignore_case)
259 char *pathpart, *dbfile;
260 int ignore_case;
262 /* The pathname database. */
263 FILE *fp;
264 /* An input byte. */
265 int c;
266 /* Number of bytes read from an entry. */
267 int nread;
269 /* true if PATHPART contains globbing metacharacters. */
270 boolean globflag;
271 /* The end of the last glob-free subpattern in PATHPART. */
272 char *patend;
274 /* The current input database entry. */
275 char *path;
276 /* Amount allocated for it. */
277 size_t pathsize;
279 /* The length of the prefix shared with the previous database entry. */
280 int count = 0;
281 /* Where in `path' to stop the backward search for the last character
282 in the subpattern. Set according to `count'. */
283 char *cutoff;
285 /* true if we found a fast match (of patend) on the previous path. */
286 boolean prev_fast_match = false;
287 /* The return value. */
288 int printed = 0;
290 /* true if reading a bigram-encoded database. */
291 boolean old_format = false;
292 /* For the old database format,
293 the first and second characters of the most common bigrams. */
294 char bigram1[128], bigram2[128];
296 /* To check the age of the database. */
297 struct stat st;
298 time_t now;
300 if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
302 error (0, errno, "%s", dbfile);
303 return 0;
305 time(&now);
306 if (now - st.st_mtime > WARN_SECONDS)
308 /* For example:
309 warning: database `fred' is more than 8 days old */
310 error (0, 0, _("warning: database `%s' is more than %d %s old"),
311 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
314 pathsize = 1026; /* Increased as necessary by locate_read_str. */
315 path = xmalloc (pathsize);
317 nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
318 if (nread != sizeof (LOCATEDB_MAGIC)
319 || memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
321 int i;
322 /* Read the list of the most common bigrams in the database. */
323 fseek (fp, 0, 0);
324 for (i = 0; i < 128; i++)
326 bigram1[i] = getc (fp);
327 bigram2[i] = getc (fp);
329 old_format = true;
332 /* If we ignore case,
333 convert it to lower first so we don't have to do it every time */
334 if (ignore_case){
335 for (patend=pathpart;*patend;++patend){
336 *patend=TOLOWER(*patend);
341 globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
342 || strchr (pathpart, '[');
344 patend = last_literal_end (pathpart);
346 c = getc (fp);
347 while (c != EOF)
349 register char *s; /* Scan the path we read in. */
351 if (old_format)
353 /* Get the offset in the path where this path info starts. */
354 if (c == LOCATEDB_OLD_ESCAPE)
355 count += getw (fp) - LOCATEDB_OLD_OFFSET;
356 else
357 count += c - LOCATEDB_OLD_OFFSET;
359 /* Overlay the old path with the remainder of the new. */
360 for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
361 if (c < 0200)
362 *s++ = c; /* An ordinary character. */
363 else
365 /* Bigram markers have the high bit set. */
366 c &= 0177;
367 *s++ = bigram1[c];
368 *s++ = bigram2[c];
370 *s-- = '\0';
372 else
374 if (c == LOCATEDB_ESCAPE)
375 count += (short)get_short (fp);
376 else if (c > 127)
377 count += c - 256;
378 else
379 count += c;
381 if (count > strlen(path))
383 /* This should not happen generally , but since we're
384 * reading in data which is outside our control, we
385 * cannot prevent it.
387 error(1, 0, _("locate database `%s' is corrupt or invalid"), dbfile);
390 /* Overlay the old path with the remainder of the new. */
391 nread = locate_read_str (&path, &pathsize, fp, 0, count);
392 if (nread < 0)
393 break;
394 c = getc (fp);
395 s = path + count + nread - 2; /* Move to the last char in path. */
396 assert (s[0] != '\0');
397 assert (s[1] == '\0'); /* Our terminator. */
398 assert (s[2] == '\0'); /* Added by locate_read_str. */
401 /* If the previous path matched, scan the whole path for the last
402 char in the subpattern. If not, the shared prefix doesn't match
403 the pattern, so don't scan it for the last char. */
404 cutoff = prev_fast_match ? path : path + count;
406 /* Search backward starting at the end of the path we just read in,
407 for the character at the end of the last glob-free subpattern
408 in PATHPART. */
409 for(prev_fast_match=false; s>=cutoff; s--)
411 char *s2; /* Scan the path we read in. */
412 register char *p2; /* Scan `patend'. */
414 /* Fast first char check. */
415 if(ignore_case)
417 if(TOLOWER(*s)!=*patend)
418 continue;
420 else if(*s!=*patend)
421 continue;
423 if(ignore_case)
424 for(s2=s-1, p2=patend-1; *p2!='\0' && TOLOWER(*s2)==*p2; s2--, p2--);
425 else
426 for(s2=s-1, p2=patend-1; *p2!='\0' && *s2==*p2; s2--, p2--);
428 if(*p2!='\0')
429 continue;
430 /* Success on the fast match. Compare the whole pattern
431 if it contains globbing characters. */
432 prev_fast_match = true;
433 if(globflag)
435 if(ignore_case)
437 if(fnmatch(pathpart,basename(path),FNM_CASEFOLD)!=0)
438 break;
440 else
441 if(fnmatch(pathpart,basename(path),0)!=0)
442 break;
444 if(check_existence && stat(path,&st)!=0)
445 break;
446 puts(path);
447 ++printed;
448 break;
452 if (ferror (fp))
454 error (0, errno, "%s", dbfile);
455 return 0;
457 if (fclose (fp) == EOF)
459 error (0, errno, "%s", dbfile);
460 return 0;
463 return printed;
466 extern char *version_string;
468 /* The name this program was run with. */
469 char *program_name;
471 static void
472 usage (stream)
473 FILE *stream;
475 fprintf (stream, _("\
476 Usage: %s [-d path | --database=path] [-e | --existing]\n\
477 [-i | --ignore-case] [--version] [--help] pattern...\n"),
478 program_name);
479 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
482 static struct option const longopts[] =
484 {"database", required_argument, NULL, 'd'},
485 {"existing", no_argument, NULL, 'e'},
486 {"ignore-case", no_argument, NULL, 'i'},
487 {"help", no_argument, NULL, 'h'},
488 {"version", no_argument, NULL, 'v'},
489 {NULL, no_argument, NULL, 0}
493 main (argc, argv)
494 int argc;
495 char **argv;
497 char *dbpath;
498 int fnmatch_flags = 0;
499 int found = 0, optc;
500 int ignore_case = 0;
502 program_name = argv[0];
504 #ifdef HAVE_SETLOCALE
505 setlocale (LC_ALL, "");
506 #endif
507 bindtextdomain (PACKAGE, LOCALEDIR);
508 textdomain (PACKAGE);
510 dbpath = getenv ("LOCATE_PATH");
511 if (dbpath == NULL)
512 dbpath = LOCATE_DB;
514 check_existence = 0;
516 while ((optc = getopt_long (argc, argv, "d:ei", longopts, (int *) 0)) != -1)
517 switch (optc)
519 case 'd':
520 dbpath = optarg;
521 break;
523 case 'e':
524 check_existence = 1;
525 break;
527 case 'i':
528 ignore_case = 1;
529 fnmatch_flags |= FNM_CASEFOLD;
530 break;
532 case 'h':
533 usage (stdout);
534 return 0;
536 case 'v':
537 printf (_("GNU locate version %s\n"), version_string);
538 return 0;
540 default:
541 usage (stderr);
542 return 1;
545 if (optind == argc)
547 usage (stderr);
548 return 1;
552 for (; optind < argc; optind++)
554 char *e;
555 next_element (dbpath); /* Initialize. */
556 while ((e = next_element ((char *) NULL)) != NULL)
557 found |= locate (argv[optind], e, ignore_case);
560 if (found)
561 return 0;
562 else
563 return 1;