1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 96, 98, 99, 2000, 2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 /* Usage: locate [options] pattern...
22 Scan a pathname list for the full pathname of a file, given only
23 a piece of the name (possibly containing shell globbing metacharacters).
24 The list has been processed with front-compression, which reduces
25 the list size by a factor of 4-5.
26 Recognizes two database formats, old and new. The old format is
27 bigram coded, which reduces space by a further 20-25% and uses the
28 following encoding of the database bytes:
30 0-28 likeliest differential counts + offset (14) to make nonnegative
31 30 escape code for out-of-range count to follow in next halfword
32 128-255 bigram codes (the 128 most common, as determined by `updatedb')
33 32-127 single character (printable) ASCII remainder
35 Uses a novel two-tiered string search technique:
37 First, match a metacharacter-free subpattern and a partial pathname
38 BACKWARDS to avoid full expansion of the pathname list.
39 The time savings is 40-50% over forward matching, which cannot efficiently
40 handle overlapped search patterns and compressed path remainders.
42 Then, match the actual shell glob-style regular expression (if in this form)
43 against the candidate pathnames using the slower shell filename
46 Described more fully in Usenix ;login:, Vol 8, No 1,
47 February/March, 1983, p. 8.
49 Written by James A. Woods <jwoods@adobe.com>.
50 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
55 #include <sys/types.h>
65 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
88 # define _(Text) gettext (Text)
91 #define textdomain(Domain)
92 #define bindtextdomain(Package, Directory)
95 # define N_(String) gettext_noop (String)
97 /* We used to use (String) instead of just String, but apparentl;y ISO C
98 * doesn't allow this (at least, that's what HP said when someone reported
99 * this as a compiler bug). This is HP case number 1205608192. See
100 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
101 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
102 * like: static const char buf[] = ("string");
104 # define N_(String) String
107 #include "locatedb.h"
109 #include "../gnulib/lib/xalloc.h"
110 #include "../gnulib/lib/error.h"
113 /* Note that this evaluates C many times. */
115 # define TOUPPER(Ch) toupper (Ch)
116 # define TOLOWER(Ch) tolower (Ch)
118 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
119 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
122 /* typedef enum {false, true} boolean; */
124 /* Warn if a database is older than this. 8 days allows for a weekly
125 update that takes up to a day to perform. */
126 #define WARN_NUMBER_UNITS (8)
127 /* Printable name of units used in WARN_SECONDS */
128 static const char warn_name_units
[] = N_("days");
129 #define SECONDS_PER_UNIT (60 * 60 * 24)
131 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
133 /* Check for existence of files before printing them out? */
134 int check_existence
= 0;
136 /* What to separate the results with. */
137 static int separator
= '\n';
139 char *next_element ();
142 /* Read in a 16-bit int, high byte first (network byte order). */
151 x
= (signed char) fgetc (fp
) << 8;
152 x
|= (fgetc (fp
) & 0xff);
156 const char * const metacharacters
= "*?[]\\";
159 /* Return nonzero if CH is a shell glob character.
162 is_metacharacter(char ch
)
164 if (NULL
== strchr (metacharacters
, ch
))
171 /* Return nonzero if S contains any shell glob characters.
174 contains_metacharacter(const char *s
)
176 if (NULL
== strpbrk(s
, metacharacters
))
185 /* Return a pointer to the last character in a static copy of the last
186 glob-free subpattern in NAME,
187 with '\0' prepended for a fast backwards pre-match. */
190 last_literal_end (name
)
193 static char *globfree
= NULL
; /* A copy of the subpattern in NAME. */
194 static size_t gfalloc
= 0; /* Bytes allocated for `globfree'. */
195 register char *subp
; /* Return value. */
196 register char *p
; /* Search location in NAME. */
198 /* Find the end of the subpattern.
199 Skip trailing metacharacters and [] ranges. */
200 for (p
= name
+ strlen (name
) - 1; p
>= name
&& is_metacharacter(*p
);
204 while (p
>= name
&& *p
!= '[')
210 if (p
- name
+ 3 > gfalloc
)
212 gfalloc
= p
- name
+ 3 + 64; /* Room to grow. */
213 globfree
= xrealloc (globfree
, gfalloc
);
218 /* If the pattern has only metacharacters, make every path match the
219 subpattern, so it gets checked the slow way. */
220 if (p
== name
&& is_metacharacter(*p
))
225 /* Find the start of the metacharacter-free subpattern. */
226 for (endmark
= p
; p
>= name
&& !is_metacharacter(*p
); p
--)
228 /* Copy the subpattern into globfree. */
229 for (++p
; p
<= endmark
; )
232 *subp
-- = '\0'; /* Null terminate, though it's not needed. */
241 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
242 * until we reach DELIMITER or end-of-file. We reallocate the buffer
243 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
244 * is made regarding the content of the data (i.e. the implementation is
245 * 8-bit clean, the only delimiter is DELIMITER).
247 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
248 * has been removed from gnulib.
250 * We call the function locate_read_str() to avoid a name clash with the curses
254 locate_read_str(char **buf
, size_t *siz
, FILE *fp
, int delimiter
, int offs
)
260 nread
= getdelim(&p
, &sz
, delimiter
, fp
);
265 needed
= offs
+ nread
+ 1;
268 char *pnew
= realloc(*buf
, needed
);
271 return -1; /* FAIL */
279 memcpy((*buf
)+offs
, p
, nread
);
287 lc_strcpy(char *dest
, const char *src
)
291 *dest
++ = TOLOWER(*src
);
298 VISIT_CONTINUE
= 1, /* please call the next visitor */
299 VISIT_ACCEPTED
= 2, /* accepted, call no futher callbacks for this file */
300 VISIT_REJECTED
= 4, /* rejected, process next file. */
301 VISIT_ABORT
= 8 /* rejected, process no more files. */
315 typedef int (*visitfunc
)(const char *tested_filename
,
316 const char *printed_filename
,
323 struct visitor
*next
;
327 static struct visitor
*inspectors
= NULL
;
328 static struct visitor
*lastinspector
= NULL
;
331 process_filename(const char *tested_filename
, const char *printed_filename
)
333 int result
= VISIT_CONTINUE
;
334 const struct visitor
*p
= inspectors
;
336 while ( (VISIT_CONTINUE
== result
) && (NULL
!= p
) )
338 result
= (p
->inspector
)(tested_filename
, printed_filename
, p
->context
);
342 if (VISIT_CONTINUE
== result
)
343 return VISIT_ACCEPTED
;
349 add_visitor(visitfunc fn
, void *context
)
351 struct visitor
*p
= xmalloc(sizeof(struct visitor
));
353 p
->context
= context
;
356 if (NULL
== lastinspector
)
358 lastinspector
= inspectors
= p
;
362 lastinspector
->next
= p
;
369 visit_justprint(const char *testname
, const char *printname
, void *context
)
373 fputs(printname
, stdout
);
375 return VISIT_CONTINUE
;
379 visit_exists(const char *testname
, const char *printname
, void *context
)
385 if (stat(testname
, &st
) != 0)
387 return VISIT_REJECTED
;
391 return VISIT_CONTINUE
;
396 visit_substring_match_nocasefold(const char *testname
, const char *printname
, void *context
)
398 const char *pattern
= context
;
401 if (NULL
!= strstr(testname
, pattern
))
402 return VISIT_CONTINUE
;
404 return VISIT_REJECTED
;
408 visit_substring_match_casefold(const char *testname
, const char *printname
, void *context
)
410 struct casefolder
* p
= context
;
411 size_t len
= strlen(testname
);
414 if (len
> p
->buffersize
)
416 p
->buffer
= xrealloc(p
->buffer
, len
+1);
417 p
->buffersize
= len
+1;
419 lc_strcpy(p
->buffer
, testname
);
422 if (NULL
!= strstr(p
->buffer
, p
->pattern
))
423 return VISIT_CONTINUE
;
425 return VISIT_REJECTED
;
430 visit_globmatch_nofold(const char *testname
, const char *printname
, void *context
)
432 const char *glob
= context
;
434 if (fnmatch(glob
, testname
, 0) != 0)
435 return VISIT_REJECTED
;
437 return VISIT_CONTINUE
;
442 visit_globmatch_casefold(const char *testname
, const char *printname
, void *context
)
444 const char *glob
= context
;
446 if (fnmatch(glob
, testname
, FNM_CASEFOLD
) != 0)
447 return VISIT_REJECTED
;
449 return VISIT_CONTINUE
;
452 /* Print the entries in DBFILE that match shell globbing pattern PATHPART.
453 Return the number of entries printed. */
456 new_locate (char *pathpart
, char *dbfile
, int ignore_case
, int enable_print
, int basename_only
, int use_limit
, uintmax_t limit
)
459 FILE *fp
; /* The pathname database. */
460 int c
; /* An input byte. */
461 int nread
; /* number of bytes read from an entry. */
462 char *path
; /* The current input database entry. */
463 const char *testpath
;
464 size_t pathsize
; /* Amount allocated for it. */
465 int count
= 0; /* The length of the prefix shared with the previous database entry. */
467 int old_format
= 0; /* true if reading a bigram-encoded database. */
471 /* for the old database format,
472 the first and second characters of the most common bigrams. */
473 char bigram1
[128], bigram2
[128];
475 /* number of items accepted (i.e. printed) */
476 unsigned long int items_accepted
= 0uL;
478 /* To check the age of the database. */
482 /* Set up the inspection regime */
484 lastinspector
= NULL
;
487 add_visitor(visit_exists
, NULL
);
489 if (contains_metacharacter(pathpart
))
492 add_visitor(visit_globmatch_casefold
, pathpart
);
494 add_visitor(visit_globmatch_nofold
, pathpart
);
498 /* No glob characters reuired. Hence we match on
499 * _any part_ of the filename, not just the
500 * basename. This seems odd to me, but it is the
501 * traditional behaviour.
502 * James Youngman <jay@gnu.org>
506 struct casefolder
* cf
= xmalloc(sizeof(*cf
));
507 cf
->pattern
= pathpart
;
510 add_visitor(visit_substring_match_casefold
, cf
);
514 add_visitor(visit_substring_match_nocasefold
, pathpart
);
519 add_visitor(visit_justprint
, NULL
);
521 if (stat (dbfile
, &st
) || (fp
= fopen (dbfile
, "r")) == NULL
)
523 error (0, errno
, "%s", dbfile
);
527 if (now
- st
.st_mtime
> WARN_SECONDS
)
530 warning: database `fred' is more than 8 days old */
531 error (0, 0, _("warning: database `%s' is more than %d %s old"),
532 dbfile
, WARN_NUMBER_UNITS
, _(warn_name_units
));
535 pathsize
= 1026; /* Increased as necessary by locate_read_str. */
536 path
= xmalloc (pathsize
);
538 nread
= fread (path
, 1, sizeof (LOCATEDB_MAGIC
), fp
);
539 if (nread
!= sizeof (LOCATEDB_MAGIC
)
540 || memcmp (path
, LOCATEDB_MAGIC
, sizeof (LOCATEDB_MAGIC
)))
543 /* Read the list of the most common bigrams in the database. */
545 for (i
= 0; i
< 128; i
++)
547 bigram1
[i
] = getc (fp
);
548 bigram2
[i
] = getc (fp
);
553 /* If we ignore case, convert it to lower first so we don't have to
558 lc_strcpy(pathpart
, pathpart
);
564 while ( (c
!= EOF
) && (!use_limit
|| (limit
> 0)) )
566 register char *s
; /* Scan the path we read in. */
570 /* Get the offset in the path where this path info starts. */
571 if (c
== LOCATEDB_OLD_ESCAPE
)
572 count
+= getw (fp
) - LOCATEDB_OLD_OFFSET
;
574 count
+= c
- LOCATEDB_OLD_OFFSET
;
576 /* Overlay the old path with the remainder of the new. */
577 for (s
= path
+ count
; (c
= getc (fp
)) > LOCATEDB_OLD_ESCAPE
;)
579 *s
++ = c
; /* An ordinary character. */
582 /* Bigram markers have the high bit set. */
591 if (c
== LOCATEDB_ESCAPE
)
592 count
+= (short)get_short (fp
);
598 if (count
> strlen(path
))
600 /* This should not happen generally , but since we're
601 * reading in data which is outside our control, we
604 error(1, 0, _("locate database `%s' is corrupt or invalid"), dbfile
);
607 /* Overlay the old path with the remainder of the new. */
608 nread
= locate_read_str (&path
, &pathsize
, fp
, 0, count
);
612 s
= path
+ count
+ nread
- 1; /* Move to the last char in path. */
613 assert (s
[0] != '\0');
614 assert (s
[1] == '\0'); /* Our terminator. */
615 assert (s
[2] == '\0'); /* Added by locate_read_str. */
618 testpath
= basename_only
? base_name(path
) : path
;
619 if (VISIT_ACCEPTED
== process_filename(testpath
, path
))
621 if ((++items_accepted
>= limit
) && use_limit
)
630 error (0, errno
, "%s", dbfile
);
633 if (fclose (fp
) == EOF
)
635 error (0, errno
, "%s", dbfile
);
639 return items_accepted
;
645 extern char *version_string
;
647 /* The name this program was run with. */
654 fprintf (stream
, _("\
655 Usage: %s [-d path | --database=path] [-e | --existing]\n\
656 [-i | --ignore-case] [--wholepath] [--basename] [--limit=N | -l N]\n\
657 [--version] [--help] pattern...\n"),
659 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream
);
663 sanity_check_dbpath(const char *path
)
669 error(0, 0, _("warning: locate database path `%s' contains a leading colon, which is not a valid database name"), path
);
675 if (':' == path
[len
-1u])
677 error(0, 0, _("warning: locate database path `%s' contains a trailing colon, which is not a valid database name"), path
);
683 static struct option
const longopts
[] =
685 {"database", required_argument
, NULL
, 'd'},
686 {"existing", no_argument
, NULL
, 'e'},
687 {"ignore-case", no_argument
, NULL
, 'i'},
688 {"help", no_argument
, NULL
, 'h'},
689 {"version", no_argument
, NULL
, 'v'},
690 {"null", no_argument
, NULL
, '0'},
691 {"count", no_argument
, NULL
, 'c'},
692 {"wholename", no_argument
, NULL
, 'w'},
693 {"basename", no_argument
, NULL
, 'b'},
694 {"stdio", no_argument
, NULL
, 's'},
695 {"mmap", no_argument
, NULL
, 'm'},
696 {"limit", required_argument
, NULL
, 'l'},
697 {NULL
, no_argument
, NULL
, 0}
706 unsigned long int found
= 0uL;
711 int basename_only
= 0;
715 program_name
= argv
[0];
717 #ifdef HAVE_SETLOCALE
718 setlocale (LC_ALL
, "");
720 bindtextdomain (PACKAGE
, LOCALEDIR
);
721 textdomain (PACKAGE
);
723 dbpath
= getenv ("LOCATE_PATH");
729 while ((optc
= getopt_long (argc
, argv
, "cd:eil:sm0", longopts
, (int *) 0)) != -1)
762 printf (_("GNU locate version %s\n"), version_string
);
772 strtol_error err
= xstrtoumax(optarg
, &end
, 10, &limit
, NULL
);
773 if (LONGINT_OK
!= err
)
775 STRTOL_FATAL_ERROR(optarg
, _("argument to --limit"), err
);
781 case 's': /* use stdio */
782 case 'm': /* use mmap */
783 /* These options are implemented simply for
784 * compatibility with FreeBSD
799 sanity_check_dbpath(dbpath
);
801 for (; optind
< argc
; optind
++)
804 next_element (dbpath
); /* Initialize. */
805 while ((e
= next_element ((char *) NULL
)) != NULL
)
807 found
+= new_locate (argv
[optind
], e
, ignore_case
, print
, basename_only
, use_limit
, limit
);
813 printf("%ld\n", found
);
816 if (found
|| (use_limit
&& (limit
==0)) )