1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 9 Temple Place - Suite 330, Boston, MA 02111-1307,
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
53 #include <gnulib/config.h>
55 #undef PACKAGE_VERSION
56 #undef PACKAGE_TARNAME
63 #include <sys/types.h>
72 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
95 # define _(Text) gettext (Text)
98 #define textdomain(Domain)
99 #define bindtextdomain(Package, Directory)
102 # define N_(String) gettext_noop (String)
104 /* We used to use (String) instead of just String, but HP-UX 11.23 for
105 * ia64 has what seems to be a compiler bug, because it refuses input
106 * like: static const char buf[] = ("string");
108 # define N_(String) String
111 #include "locatedb.h"
114 /* Note that this evaluates C many times. */
116 # define TOUPPER(Ch) toupper (Ch)
117 # define TOLOWER(Ch) tolower (Ch)
119 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
120 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
123 typedef enum {false, true} boolean
;
125 /* Warn if a database is older than this. 8 days allows for a weekly
126 update that takes up to a day to perform. */
127 #define WARN_NUMBER_UNITS (8)
128 /* Printable name of units used in WARN_SECONDS */
129 static const char warn_name_units
[] = N_("days");
130 #define SECONDS_PER_UNIT (60 * 60 * 24)
132 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
134 /* Check for existence of files before printing them out? */
135 int check_existence
= 0;
137 char *next_element ();
142 /* Read in a 16-bit int, high byte first (network byte order). */
152 x
|= (fgetc (fp
) & 0xff);
156 /* Return a pointer to the last character in a static copy of the last
157 glob-free subpattern in NAME,
158 with '\0' prepended for a fast backwards pre-match. */
161 last_literal_end (name
)
164 static char *globfree
= NULL
; /* A copy of the subpattern in NAME. */
165 static size_t gfalloc
= 0; /* Bytes allocated for `globfree'. */
166 register char *subp
; /* Return value. */
167 register char *p
; /* Search location in NAME. */
169 /* Find the end of the subpattern.
170 Skip trailing metacharacters and [] ranges. */
171 for (p
= name
+ strlen (name
) - 1; p
>= name
&& strchr ("*?]", *p
) != NULL
;
175 while (p
>= name
&& *p
!= '[')
181 if (p
- name
+ 3 > gfalloc
)
183 gfalloc
= p
- name
+ 3 + 64; /* Room to grow. */
184 globfree
= xrealloc (globfree
, gfalloc
);
189 /* If the pattern has only metacharacters, make every path match the
190 subpattern, so it gets checked the slow way. */
191 if (p
== name
&& strchr ("?*[]", *p
) != NULL
)
196 /* Find the start of the metacharacter-free subpattern. */
197 for (endmark
= p
; p
>= name
&& strchr ("]*?", *p
) == NULL
; p
--)
199 /* Copy the subpattern into globfree. */
200 for (++p
; p
<= endmark
; )
203 *subp
-- = '\0'; /* Null terminate, though it's not needed. */
210 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
211 * until we reach DELIMITER or end-of-file. We reallocate the buffer
212 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
213 * is made regarding the content of the data (i.e. the implementation is
214 * 8-bit clean, the only delimiter is DELIMITER).
216 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
217 * has been removed from gnulib.
219 * We call the function locate_read_str() to avoid a name clash with the curses
222 static int locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
228 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
233 needed
= offs
+ nread
;
236 char *pnew
= realloc(*buf
, needed
);
239 return -1; /* FAIL */
247 memcpy((*buf
)+offs
, p
, nread
);
254 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
255 Return the number of entries printed. */
258 locate (pathpart
, dbfile
, ignore_case
)
259 char *pathpart
, *dbfile
;
262 /* The pathname database. */
266 /* Number of bytes read from an entry. */
269 /* true if PATHPART contains globbing metacharacters. */
271 /* The end of the last glob-free subpattern in PATHPART. */
274 /* The current input database entry. */
276 /* Amount allocated for it. */
279 /* The length of the prefix shared with the previous database entry. */
281 /* Where in `path' to stop the backward search for the last character
282 in the subpattern. Set according to `count'. */
285 /* true if we found a fast match (of patend) on the previous path. */
286 boolean prev_fast_match
= false;
287 /* The return value. */
290 /* true if reading a bigram-encoded database. */
291 boolean old_format
= false;
292 /* For the old database format,
293 the first and second characters of the most common bigrams. */
294 char bigram1
[128], bigram2
[128];
296 /* To check the age of the database. */
300 if (stat (dbfile
, &st
) || (fp
= fopen (dbfile
, "r")) == NULL
)
302 error (0, errno
, "%s", dbfile
);
306 if (now
- st
.st_mtime
> WARN_SECONDS
)
309 warning: database `fred' is more than 8 days old */
310 error (0, 0, _("warning: database `%s' is more than %d %s old"),
311 dbfile
, WARN_NUMBER_UNITS
, _(warn_name_units
));
314 pathsize
= 1026; /* Increased as necessary by locate_read_str. */
315 path
= xmalloc (pathsize
);
317 nread
= fread (path
, 1, sizeof (LOCATEDB_MAGIC
), fp
);
318 if (nread
!= sizeof (LOCATEDB_MAGIC
)
319 || memcmp (path
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
322 /* Read the list of the most common bigrams in the database. */
324 for (i
= 0; i
< 128; i
++)
326 bigram1
[i
] = getc (fp
);
327 bigram2
[i
] = getc (fp
);
332 /* If we ignore case,
333 convert it to lower first so we don't have to do it every time */
335 for (patend
=pathpart
;*patend
;++patend
){
336 *patend
=TOLOWER(*patend
);
341 globflag
= strchr (pathpart
, '*') || strchr (pathpart
, '?')
342 || strchr (pathpart
, '[');
344 patend
= last_literal_end (pathpart
);
349 register char *s
; /* Scan the path we read in. */
353 /* Get the offset in the path where this path info starts. */
354 if (c
== LOCATEDB_OLD_ESCAPE
)
355 count
+= getw (fp
) - LOCATEDB_OLD_OFFSET
;
357 count
+= c
- LOCATEDB_OLD_OFFSET
;
359 /* Overlay the old path with the remainder of the new. */
360 for (s
= path
+ count
; (c
= getc (fp
)) > LOCATEDB_OLD_ESCAPE
;)
362 *s
++ = c
; /* An ordinary character. */
365 /* Bigram markers have the high bit set. */
374 if (c
== LOCATEDB_ESCAPE
)
375 count
+= (short)get_short (fp
);
381 if (count
> strlen(path
))
383 /* This should not happen generally , but since we're
384 * reading in data which is outside our control, we
387 error(1, 0, _("locate database `%s' is corrupt or invalid"), dbfile
);
390 /* Overlay the old path with the remainder of the new. */
391 nread
= locate_read_str (&path
, &pathsize
, fp
, 0, count
);
395 s
= path
+ count
+ nread
- 2; /* Move to the last char in path. */
396 assert (s
[0] != '\0');
397 assert (s
[1] == '\0'); /* Our terminator. */
398 assert (s
[2] == '\0'); /* Added by locate_read_str. */
401 /* If the previous path matched, scan the whole path for the last
402 char in the subpattern. If not, the shared prefix doesn't match
403 the pattern, so don't scan it for the last char. */
404 cutoff
= prev_fast_match
? path
: path
+ count
;
406 /* Search backward starting at the end of the path we just read in,
407 for the character at the end of the last glob-free subpattern
409 for(prev_fast_match
=false; s
>=cutoff
; s
--)
411 char *s2
; /* Scan the path we read in. */
412 register char *p2
; /* Scan `patend'. */
414 /* Fast first char check. */
417 if(TOLOWER(*s
)!=*patend
)
424 for(s2
=s
-1, p2
=patend
-1; *p2
!='\0' && TOLOWER(*s2
)==*p2
; s2
--, p2
--);
426 for(s2
=s
-1, p2
=patend
-1; *p2
!='\0' && *s2
==*p2
; s2
--, p2
--);
430 /* Success on the fast match. Compare the whole pattern
431 if it contains globbing characters. */
432 prev_fast_match
= true;
437 if(fnmatch(pathpart
,basename(path
),FNM_CASEFOLD
)!=0)
441 if(fnmatch(pathpart
,basename(path
),0)!=0)
444 if(check_existence
&& stat(path
,&st
)!=0)
454 error (0, errno
, "%s", dbfile
);
457 if (fclose (fp
) == EOF
)
459 error (0, errno
, "%s", dbfile
);
466 extern char *version_string
;
468 /* The name this program was run with. */
475 fprintf (stream
, _("\
476 Usage: %s [-d path | --database=path] [-e | --existing]\n\
477 [-i | --ignore-case] [--version] [--help] pattern...\n"),
479 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
482 static struct option
const longopts
[] =
484 {"database", required_argument
, NULL
, 'd'},
485 {"existing", no_argument
, NULL
, 'e'},
486 {"ignore-case", no_argument
, NULL
, 'i'},
487 {"help", no_argument
, NULL
, 'h'},
488 {"version", no_argument
, NULL
, 'v'},
489 {NULL
, no_argument
, NULL
, 0}
498 int fnmatch_flags
= 0;
502 program_name
= argv
[0];
504 #ifdef HAVE_SETLOCALE
505 setlocale (LC_ALL
, "");
507 bindtextdomain (PACKAGE
, LOCALEDIR
);
508 textdomain (PACKAGE
);
510 dbpath
= getenv ("LOCATE_PATH");
516 while ((optc
= getopt_long (argc
, argv
, "d:ei", longopts
, (int *) 0)) != -1)
529 fnmatch_flags
|= FNM_CASEFOLD
;
537 printf (_("GNU locate version %s\n"), version_string
);
552 for (; optind
< argc
; optind
++)
555 next_element (dbpath
); /* Initialize. */
556 while ((e
= next_element ((char *) NULL
)) != NULL
)
557 found
|= locate (argv
[optind
], e
, ignore_case
);