Switch to a new scheme for saving directory entry data which is extensible to allow...
[findutils.git] / locate / locate.c
blob6cd29aebf6852ba8b8f8cc0deed8eaadb9ef09f0
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <ctype.h>
66 #include <sys/types.h>
67 #include <sys/stat.h>
68 #include <time.h>
69 #include <fnmatch.h>
70 #include <getopt.h>
71 #include <xstrtol.h>
73 #ifdef HAVE_UNISTD_H
74 /* We need <unistd.h> for isatty(). */
75 #include <unistd.h>
76 #endif
79 #define NDEBUG
80 #include <assert.h>
82 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
83 #include <string.h>
84 #else
85 #include <strings.h>
86 #define strchr index
87 #endif
89 #ifdef STDC_HEADERS
90 #include <stdlib.h>
91 #endif
93 #ifdef HAVE_ERRNO_H
94 #include <errno.h>
95 #else
96 extern int errno;
97 #endif
99 #ifdef HAVE_LOCALE_H
100 #include <locale.h>
101 #endif
103 #if ENABLE_NLS
104 # include <libintl.h>
105 # define _(Text) gettext (Text)
106 #else
107 # define _(Text) Text
108 #define textdomain(Domain)
109 #define bindtextdomain(Package, Directory)
110 #endif
111 #ifdef gettext_noop
112 # define N_(String) gettext_noop (String)
113 #else
114 /* We used to use (String) instead of just String, but apparentl;y ISO C
115 * doesn't allow this (at least, that's what HP said when someone reported
116 * this as a compiler bug). This is HP case number 1205608192. See
117 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
118 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
119 * like: static const char buf[] = ("string");
121 # define N_(String) String
122 #endif
124 #include "locatedb.h"
125 #include <getline.h>
126 #include "../gnulib/lib/xalloc.h"
127 #include "../gnulib/lib/error.h"
128 #include "../gnulib/lib/human.h"
129 #include "dirname.h"
130 #include "closeout.h"
131 #include "nextelem.h"
132 #include "regex.h"
133 #include "quote.h"
134 #include "quotearg.h"
135 #include "printquoted.h"
138 /* Note that this evaluates C many times. */
139 #ifdef _LIBC
140 # define TOUPPER(Ch) toupper (Ch)
141 # define TOLOWER(Ch) tolower (Ch)
142 #else
143 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
144 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
145 #endif
147 /* typedef enum {false, true} boolean; */
149 /* Warn if a database is older than this. 8 days allows for a weekly
150 update that takes up to a day to perform. */
151 #define WARN_NUMBER_UNITS (8)
152 /* Printable name of units used in WARN_SECONDS */
153 static const char warn_name_units[] = N_("days");
154 #define SECONDS_PER_UNIT (60 * 60 * 24)
156 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
158 enum visit_result
160 VISIT_CONTINUE = 1, /* please call the next visitor */
161 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
162 VISIT_REJECTED = 4, /* rejected, process next file. */
163 VISIT_ABORT = 8 /* rejected, process no more files. */
166 enum ExistenceCheckType
168 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
169 ACCEPT_EXISTING, /* Corresponds to option -e */
170 ACCEPT_NON_EXISTING /* Corresponds to option -E */
173 /* Check for existence of files before printing them out? */
174 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
176 static int follow_symlinks = 1;
178 /* What to separate the results with. */
179 static int separator = '\n';
181 static struct quoting_options * quote_opts = NULL;
182 static bool stdout_is_a_tty;
183 static bool print_quoted_filename;
185 /* Read in a 16-bit int, high byte first (network byte order). */
187 static short
188 get_short (FILE *fp)
191 register short x;
193 x = (signed char) fgetc (fp) << 8;
194 x |= (fgetc (fp) & 0xff);
195 return x;
198 const char * const metacharacters = "*?[]\\";
200 /* Return nonzero if S contains any shell glob characters.
202 static int
203 contains_metacharacter(const char *s)
205 if (NULL == strpbrk(s, metacharacters))
206 return 0;
207 else
208 return 1;
211 /* locate_read_str()
213 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
214 * until we reach DELIMITER or end-of-file. We reallocate the buffer
215 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
216 * is made regarding the content of the data (i.e. the implementation is
217 * 8-bit clean, the only delimiter is DELIMITER).
219 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
220 * has been removed from gnulib.
222 * We call the function locate_read_str() to avoid a name clash with the curses
223 * function getstr().
225 static int
226 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
228 char * p = NULL;
229 size_t sz = 0;
230 int needed, nread;
232 nread = getdelim(&p, &sz, delimiter, fp);
233 if (nread >= 0)
235 assert(p != NULL);
237 needed = offs + nread + 1;
238 if (needed > (*siz))
240 char *pnew = realloc(*buf, needed);
241 if (NULL == pnew)
243 return -1; /* FAIL */
245 else
247 *siz = needed;
248 *buf = pnew;
251 memcpy((*buf)+offs, p, nread);
252 free(p);
254 return nread;
258 static void
259 lc_strcpy(char *dest, const char *src)
261 while (*src)
263 *dest++ = TOLOWER(*src);
264 ++src;
266 *dest = 0;
269 struct locate_limits
271 uintmax_t limit;
272 uintmax_t items_accepted;
274 static struct locate_limits limits;
277 struct locate_stats
279 uintmax_t compressed_bytes;
280 uintmax_t total_filename_count;
281 uintmax_t total_filename_length;
282 uintmax_t whitespace_count;
283 uintmax_t newline_count;
284 uintmax_t highbit_filename_count;
286 static struct locate_stats statistics;
289 struct stringbuf
291 char *buffer;
292 size_t buffersize;
293 size_t *soffs;
294 size_t *preqlen;
296 static struct stringbuf casebuf;
299 struct casefolder
301 const char *pattern;
302 struct stringbuf *pbuf;
305 struct regular_expression
307 regex_t re;
311 struct process_data
313 int c; /* An input byte. */
314 int count; /* The length of the prefix shared with the previous database entry. */
315 int len;
316 char *original_filename; /* The current input database entry. */
317 size_t pathsize; /* Amount allocated for it. */
318 char *munged_filename; /* path or base_name(path) */
319 FILE *fp; /* The pathname database. */
320 char *dbfile; /* Its name, or "<stdin>" */
321 /* for the old database format,
322 the first and second characters of the most common bigrams. */
323 char bigram1[128];
324 char bigram2[128];
328 typedef int (*visitfunc)(struct process_data *procdata,
329 void *context);
331 struct visitor
333 visitfunc inspector;
334 void * context;
335 struct visitor *next;
339 static struct visitor *inspectors = NULL;
340 static struct visitor *lastinspector = NULL;
341 static struct visitor *past_pat_inspector = NULL;
343 /* 0 or 1 pattern(s) */
344 static int
345 process_simple(struct process_data *procdata)
347 int result = VISIT_CONTINUE;
348 const struct visitor *p = inspectors;
350 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
352 result = (p->inspector)(procdata, p->context);
353 p = p->next;
356 return result;
359 /* Accept if any pattern matches. */
360 static int
361 process_or (struct process_data *procdata)
363 int result = VISIT_CONTINUE;
364 const struct visitor *p = inspectors;
366 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
368 result = (p->inspector)(procdata, p->context);
369 p = p->next;
372 if (result == VISIT_CONTINUE)
373 result = VISIT_REJECTED;
374 if (result & (VISIT_ABORT | VISIT_REJECTED))
375 return result;
377 p = past_pat_inspector;
378 result = VISIT_CONTINUE;
380 while ( (VISIT_CONTINUE == result) && (NULL != p) )
382 result = (p->inspector)(procdata, p->context);
383 p = p->next;
386 if (VISIT_CONTINUE == result)
387 return VISIT_ACCEPTED;
388 else
389 return result;
392 typedef int (*processfunc)(struct process_data *procdata);
394 static processfunc mainprocessor = NULL;
396 static void
397 add_visitor(visitfunc fn, void *context)
399 struct visitor *p = xmalloc(sizeof(struct visitor));
400 p->inspector = fn;
401 p->context = context;
402 p->next = NULL;
404 if (NULL == lastinspector)
406 lastinspector = inspectors = p;
408 else
410 lastinspector->next = p;
411 lastinspector = p;
417 static int
418 visit_justprint_quoted(struct process_data *procdata, void *context)
420 (void) context;
421 print_quoted (stdout, quote_opts, stdout_is_a_tty,
422 "%s",
423 procdata->original_filename);
424 putchar(separator);
425 return VISIT_CONTINUE;
428 static int
429 visit_justprint_unquoted(struct process_data *procdata, void *context)
431 (void) context;
432 fputs(procdata->original_filename, stdout);
433 putchar(separator);
434 return VISIT_CONTINUE;
437 static int
438 visit_old_format(struct process_data *procdata, void *context)
440 register char *s;
441 (void) context;
443 /* Get the offset in the path where this path info starts. */
444 if (procdata->c == LOCATEDB_OLD_ESCAPE)
445 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
446 else
447 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
449 /* Overlay the old path with the remainder of the new. */
450 for (s = procdata->original_filename + procdata->count;
451 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
452 if (procdata->c < 0200)
453 *s++ = procdata->c; /* An ordinary character. */
454 else
456 /* Bigram markers have the high bit set. */
457 procdata->c &= 0177;
458 *s++ = procdata->bigram1[procdata->c];
459 *s++ = procdata->bigram2[procdata->c];
461 *s-- = '\0';
463 procdata->munged_filename = procdata->original_filename;
465 return VISIT_CONTINUE;
469 static int
470 visit_locate02_format(struct process_data *procdata, void *context)
472 register char *s;
473 int nread;
474 (void) context;
476 if (procdata->c == LOCATEDB_ESCAPE)
477 procdata->count += (short)get_short (procdata->fp);
478 else if (procdata->c > 127)
479 procdata->count += procdata->c - 256;
480 else
481 procdata->count += procdata->c;
483 if (procdata->count > procdata->len || procdata->count < 0)
485 /* This should not happen generally , but since we're
486 * reading in data which is outside our control, we
487 * cannot prevent it.
489 error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
492 /* Overlay the old path with the remainder of the new. */
493 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
494 procdata->fp, 0, procdata->count);
495 if (nread < 0)
496 return VISIT_ABORT;
497 procdata->c = getc (procdata->fp);
498 procdata->len = procdata->count + nread;
499 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
500 assert (s[0] != '\0');
501 assert (s[1] == '\0'); /* Our terminator. */
502 assert (s[2] == '\0'); /* Added by locate_read_str. */
504 procdata->munged_filename = procdata->original_filename;
506 return VISIT_CONTINUE;
509 static int
510 visit_basename(struct process_data *procdata, void *context)
512 (void) context;
513 procdata->munged_filename = base_name(procdata->original_filename);
515 return VISIT_CONTINUE;
519 static int
520 visit_casefold(struct process_data *procdata, void *context)
522 struct stringbuf *b = context;
524 if (*b->preqlen+1 > b->buffersize)
526 b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
527 b->buffersize = *b->preqlen+1;
529 lc_strcpy(b->buffer, procdata->munged_filename);
531 return VISIT_CONTINUE;
534 /* visit_existing_follow implements -L -e */
535 static int
536 visit_existing_follow(struct process_data *procdata, void *context)
538 struct stat st;
539 (void) context;
541 /* munged_filename has been converted in some way (to lower case,
542 * or is just the base name of the file), and original_filename has not.
543 * Hence only original_filename is still actually the name of the file
544 * whose existence we would need to check.
546 if (stat(procdata->original_filename, &st) != 0)
548 return VISIT_REJECTED;
550 else
552 return VISIT_CONTINUE;
556 /* visit_non_existing_follow implements -L -E */
557 static int
558 visit_non_existing_follow(struct process_data *procdata, void *context)
560 struct stat st;
561 (void) context;
563 /* munged_filename has been converted in some way (to lower case,
564 * or is just the base name of the file), and original_filename has not.
565 * Hence only original_filename is still actually the name of the file
566 * whose existence we would need to check.
568 if (stat(procdata->original_filename, &st) == 0)
570 return VISIT_REJECTED;
572 else
574 return VISIT_CONTINUE;
578 /* visit_existing_nofollow implements -P -e */
579 static int
580 visit_existing_nofollow(struct process_data *procdata, void *context)
582 struct stat st;
583 (void) context;
585 /* munged_filename has been converted in some way (to lower case,
586 * or is just the base name of the file), and original_filename has not.
587 * Hence only original_filename is still actually the name of the file
588 * whose existence we would need to check.
590 if (lstat(procdata->original_filename, &st) != 0)
592 return VISIT_REJECTED;
594 else
596 return VISIT_CONTINUE;
600 /* visit_non_existing_nofollow implements -P -E */
601 static int
602 visit_non_existing_nofollow(struct process_data *procdata, void *context)
604 struct stat st;
605 (void) context;
607 /* munged_filename has been converted in some way (to lower case,
608 * or is just the base name of the file), and original_filename has not.
609 * Hence only original_filename is still actually the name of the file
610 * whose existence we would need to check.
612 if (lstat(procdata->original_filename, &st) == 0)
614 return VISIT_REJECTED;
616 else
618 return VISIT_CONTINUE;
622 static int
623 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
625 const char *pattern = context;
627 if (NULL != strstr(procdata->munged_filename, pattern))
628 return VISIT_ACCEPTED;
629 else
630 return VISIT_REJECTED;
633 static int
634 visit_substring_match_casefold(struct process_data *procdata, void *context)
636 const struct casefolder * p = context;
637 const struct stringbuf * b = p->pbuf;
638 (void) procdata;
640 if (NULL != strstr(b->buffer, p->pattern))
641 return VISIT_ACCEPTED;
642 else
643 return VISIT_REJECTED;
647 static int
648 visit_globmatch_nofold(struct process_data *procdata, void *context)
650 const char *glob = context;
651 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
652 return VISIT_REJECTED;
653 else
654 return VISIT_ACCEPTED;
658 static int
659 visit_globmatch_casefold(struct process_data *procdata, void *context)
661 const char *glob = context;
662 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
663 return VISIT_REJECTED;
664 else
665 return VISIT_ACCEPTED;
669 static int
670 visit_regex(struct process_data *procdata, void *context)
672 struct regular_expression *p = context;
674 if (0 == regexec(&p->re, procdata->munged_filename, 0u, NULL, 0))
675 return VISIT_ACCEPTED; /* match */
676 else
677 return VISIT_REJECTED; /* no match */
681 static int
682 visit_stats(struct process_data *procdata, void *context)
684 struct locate_stats *p = context;
685 size_t len = strlen(procdata->original_filename);
686 const char *s;
687 int highbit, whitespace, newline;
689 ++(p->total_filename_count);
690 p->total_filename_length += len;
692 highbit = whitespace = newline = 0;
693 for (s=procdata->original_filename; *s; ++s)
695 if ( (int)(*s) & 128 )
696 highbit = 1;
697 if ('\n' == *s)
699 newline = whitespace = 1;
701 else if (isspace((unsigned char)*s))
703 whitespace = 1;
707 if (highbit)
708 ++(p->highbit_filename_count);
709 if (whitespace)
710 ++(p->whitespace_count);
711 if (newline)
712 ++(p->newline_count);
714 return VISIT_CONTINUE;
718 /* Emit the statistics.
720 static void
721 print_stats(int argc, size_t database_file_size)
723 char hbuf[LONGEST_HUMAN_READABLE + 1];
725 printf(_("Locate database size: %s bytes\n"),
726 human_readable ((uintmax_t) database_file_size,
727 hbuf, human_ceiling, 1, 1));
729 printf(_("Filenames: %s "),
730 human_readable (statistics.total_filename_count,
731 hbuf, human_ceiling, 1, 1));
732 printf(_("with a cumulative length of %s bytes"),
733 human_readable (statistics.total_filename_length,
734 hbuf, human_ceiling, 1, 1));
736 printf(_("\n\tof which %s contain whitespace, "),
737 human_readable (statistics.whitespace_count,
738 hbuf, human_ceiling, 1, 1));
739 printf(_("\n\t%s contain newline characters, "),
740 human_readable (statistics.newline_count,
741 hbuf, human_ceiling, 1, 1));
742 printf(_("\n\tand %s contain characters with the high bit set.\n"),
743 human_readable (statistics.highbit_filename_count,
744 hbuf, human_ceiling, 1, 1));
746 if (!argc)
747 printf(_("Compression ratio %4.2f%%\n"),
748 100.0 * ((double)statistics.total_filename_length
749 - (double) database_file_size)
750 / (double) statistics.total_filename_length);
751 printf("\n");
755 /* Print the entries in DBFILE that match shell globbing patterns in ARGV.
756 Return the number of entries printed. */
758 static unsigned long
759 locate (int argc,
760 char **argv,
761 char *dbfile,
762 int ignore_case,
763 int enable_print,
764 int basename_only,
765 int use_limit,
766 struct locate_limits *plimit,
767 int stats,
768 int regex)
770 char *pathpart; /* A pattern to consider. */
771 int argn; /* Index to current pattern in argv. */
772 int need_fold; /* Set when folding and any pattern is non-glob. */
773 int nread; /* number of bytes read from an entry. */
774 struct process_data procdata; /* Storage for data shared with visitors. */
776 int old_format = 0; /* true if reading a bigram-encoded database. */
777 static bool did_stdin = false; /* Set to prevent rereading stdin. */
778 struct visitor* pvis; /* temp for determining past_pat_inspector. */
780 /* To check the age of the database. */
781 struct stat st;
782 time_t now;
785 procdata.len = procdata.count = 0;
786 if (!strcmp (dbfile, "-"))
788 if (did_stdin)
790 error (0, 0, _("warning: the locate database can only be read from stdin once."));
791 return 0;
795 procdata.dbfile = "<stdin>";
796 procdata.fp = stdin;
797 did_stdin = true;
799 else
801 if (stat (dbfile, &st) || (procdata.fp = fopen (dbfile, "r")) == NULL)
803 error (0, errno, "%s", dbfile);
804 return 0;
806 time(&now);
807 if (now - st.st_mtime > WARN_SECONDS)
809 /* For example:
810 warning: database `fred' is more than 8 days old */
811 error (0, 0, _("warning: database `%s' is more than %d %s old"),
812 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
814 procdata.dbfile = dbfile;
817 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
818 procdata.original_filename = xmalloc (procdata.pathsize);
820 nread = fread (procdata.original_filename, 1, sizeof (LOCATEDB_MAGIC),
821 procdata.fp);
822 if (nread != sizeof (LOCATEDB_MAGIC)
823 || memcmp (procdata.original_filename, LOCATEDB_MAGIC,
824 sizeof (LOCATEDB_MAGIC)))
826 int i;
827 /* Read the list of the most common bigrams in the database. */
828 nread = fread (procdata.original_filename + sizeof (LOCATEDB_MAGIC), 1,
829 256 - sizeof (LOCATEDB_MAGIC), procdata.fp);
830 for (i = 0; i < 128; i++)
832 procdata.bigram1[i] = procdata.original_filename[i << 1];
833 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
835 old_format = 1;
838 /* Set up the inspection regime */
839 inspectors = NULL;
840 lastinspector = NULL;
841 past_pat_inspector = NULL;
843 if (old_format)
844 add_visitor(visit_old_format, NULL);
845 else
846 add_visitor(visit_locate02_format, NULL);
848 if (basename_only)
849 add_visitor(visit_basename, NULL);
851 /* See if we need fold. */
852 if (ignore_case && !regex)
853 for ( argn = 0; argn < argc; argn++ )
855 pathpart = argv[argn];
856 if (!contains_metacharacter(pathpart))
858 need_fold = 1;
859 break;
863 if (need_fold)
865 add_visitor(visit_casefold, &casebuf);
866 casebuf.preqlen = &procdata.pathsize;
867 casebuf.soffs = &procdata.count;
870 /* Add an inspector for each pattern we're looking for. */
871 for ( argn = 0; argn < argc; argn++ )
873 pathpart = argv[argn];
874 if (regex)
876 struct regular_expression *p = xmalloc(sizeof(*p));
877 int cflags = REG_EXTENDED | REG_NOSUB
878 | (ignore_case ? REG_ICASE : 0);
879 errno = 0;
880 if (0 == regcomp(&p->re, pathpart, cflags))
882 add_visitor(visit_regex, p);
884 else
886 error (1, errno, "Invalid regular expression; %s", pathpart);
889 else if (contains_metacharacter(pathpart))
891 if (ignore_case)
892 add_visitor(visit_globmatch_casefold, pathpart);
893 else
894 add_visitor(visit_globmatch_nofold, pathpart);
896 else
898 /* No glob characters used. Hence we match on
899 * _any part_ of the filename, not just the
900 * basename. This seems odd to me, but it is the
901 * traditional behaviour.
902 * James Youngman <jay@gnu.org>
904 if (ignore_case)
906 struct casefolder * cf = xmalloc(sizeof(*cf));
907 cf->pattern = pathpart;
908 cf->pbuf = &casebuf;
909 add_visitor(visit_substring_match_casefold, cf);
910 /* If we ignore case, convert it to lower now so we don't have to
911 * do it every time
913 lc_strcpy(pathpart, pathpart);
915 else
917 add_visitor(visit_substring_match_nocasefold, pathpart);
922 pvis = lastinspector;
924 /* We add visit_existing_*() as late as possible to reduce the
925 * number of stat() calls.
927 switch (check_existence)
929 case ACCEPT_EXISTING:
930 if (follow_symlinks) /* -L, default */
931 add_visitor(visit_existing_follow, NULL);
932 else /* -P */
933 add_visitor(visit_existing_nofollow, NULL);
934 break;
936 case ACCEPT_NON_EXISTING:
937 if (follow_symlinks) /* -L, default */
938 add_visitor(visit_non_existing_follow, NULL);
939 else /* -P */
940 add_visitor(visit_non_existing_nofollow, NULL);
941 break;
943 case ACCEPT_EITHER: /* Default, neither -E nor -e */
944 /* do nothing; no extra processing. */
945 break;
948 if (stats)
949 add_visitor(visit_stats, &statistics);
951 if (enable_print)
953 if (print_quoted_filename)
954 add_visitor(visit_justprint_quoted, NULL);
955 else
956 add_visitor(visit_justprint_unquoted, NULL);
960 if (argc > 1)
962 past_pat_inspector = pvis->next;
963 mainprocessor = process_or;
965 else
966 mainprocessor = process_simple;
968 if (stats)
970 printf(_("Database %s is in the %s format.\n"),
971 procdata.dbfile,
972 old_format ? _("old") : "LOCATE02");
975 procdata.c = getc (procdata.fp);
976 while ( (procdata.c != EOF) && (!use_limit || (plimit->limit > 0)) )
979 /* If we are searching for filename patterns, the inspector list
980 * will contain an entry for each pattern for which we are searching.
982 if ((VISIT_ACCEPTED | VISIT_CONTINUE) & (mainprocessor)(&procdata))
984 if ((++plimit->items_accepted >= plimit->limit) && use_limit)
986 break;
992 if (stats)
994 print_stats(argc, st.st_size);
997 if (ferror (procdata.fp))
999 error (0, errno, "%s", procdata.dbfile);
1000 return 0;
1002 if (procdata.fp != stdin && fclose (procdata.fp) == EOF)
1004 error (0, errno, "%s", dbfile);
1005 return 0;
1008 return plimit->items_accepted;
1014 extern char *version_string;
1016 /* The name this program was run with. */
1017 char *program_name;
1019 static void
1020 usage (FILE *stream)
1022 fprintf (stream, _("\
1023 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1024 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1025 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1026 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1027 [-p | --print] [-r | --regex ] [--version] [--help] pattern...\n"),
1028 program_name);
1029 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1032 static struct option const longopts[] =
1034 {"database", required_argument, NULL, 'd'},
1035 {"existing", no_argument, NULL, 'e'},
1036 {"non-existing", no_argument, NULL, 'E'},
1037 {"ignore-case", no_argument, NULL, 'i'},
1038 {"help", no_argument, NULL, 'h'},
1039 {"version", no_argument, NULL, 'v'},
1040 {"null", no_argument, NULL, '0'},
1041 {"count", no_argument, NULL, 'c'},
1042 {"wholename", no_argument, NULL, 'w'},
1043 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1044 {"basename", no_argument, NULL, 'b'},
1045 {"print", no_argument, NULL, 'p'},
1046 {"stdio", no_argument, NULL, 's'},
1047 {"mmap", no_argument, NULL, 'm'},
1048 {"limit", required_argument, NULL, 'l'},
1049 {"regex", no_argument, NULL, 'r'},
1050 {"statistics", no_argument, NULL, 'S'},
1051 {"follow", no_argument, NULL, 'L'},
1052 {"nofollow", no_argument, NULL, 'P'},
1053 {NULL, no_argument, NULL, 0}
1057 main (int argc, char **argv)
1059 char *dbpath;
1060 unsigned long int found = 0uL;
1061 int optc;
1062 int ignore_case = 0;
1063 int print = 0;
1064 int just_count = 0;
1065 int basename_only = 0;
1066 int use_limit = 0;
1067 int regex = 0;
1068 int stats = 0;
1069 char *e;
1071 program_name = argv[0];
1073 #ifdef HAVE_SETLOCALE
1074 setlocale (LC_ALL, "");
1075 #endif
1076 bindtextdomain (PACKAGE, LOCALEDIR);
1077 textdomain (PACKAGE);
1078 atexit (close_stdout);
1080 limits.limit = 0;
1081 limits.items_accepted = 0;
1083 quote_opts = clone_quoting_options (NULL);
1084 print_quoted_filename = true;
1086 dbpath = getenv ("LOCATE_PATH");
1087 if (dbpath == NULL)
1088 dbpath = LOCATE_DB;
1090 check_existence = ACCEPT_EITHER;
1092 while ((optc = getopt_long (argc, argv, "bcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1093 switch (optc)
1095 case '0':
1096 separator = 0;
1097 print_quoted_filename = false; /* print filename 'raw'. */
1098 break;
1100 case 'b':
1101 basename_only = 1;
1102 break;
1104 case 'c':
1105 just_count = 1;
1106 break;
1108 case 'd':
1109 dbpath = optarg;
1110 break;
1112 case 'e':
1113 check_existence = ACCEPT_EXISTING;
1114 break;
1116 case 'E':
1117 check_existence = ACCEPT_NON_EXISTING;
1118 break;
1120 case 'i':
1121 ignore_case = 1;
1122 break;
1124 case 'h':
1125 usage (stdout);
1126 return 0;
1128 case 'p':
1129 print = 1;
1130 break;
1132 case 'v':
1133 printf (_("GNU locate version %s\n"), version_string);
1134 return 0;
1136 case 'w':
1137 basename_only = 0;
1138 break;
1140 case 'r':
1141 regex = 1;
1142 break;
1144 case 'S':
1145 stats = 1;
1146 break;
1148 case 'L':
1149 follow_symlinks = 1;
1150 break;
1152 /* In find, -P and -H differ in the way they handle paths
1153 * given on the command line. This is not relevant for
1154 * locate, but the -H option is supported because it is
1155 * probably more intuitive to do so.
1157 case 'P':
1158 case 'H':
1159 follow_symlinks = 0;
1160 break;
1162 case 'l':
1164 char *end = optarg;
1165 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1166 if (LONGINT_OK != err)
1168 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1170 use_limit = 1;
1172 break;
1174 case 's': /* use stdio */
1175 case 'm': /* use mmap */
1176 /* These options are implemented simply for
1177 * compatibility with FreeBSD
1179 break;
1181 default:
1182 usage (stderr);
1183 return 1;
1186 if (!just_count && !stats)
1187 print = 1;
1189 if (stats)
1191 if (optind == argc)
1192 use_limit = 0;
1194 else
1196 if (!just_count && optind == argc)
1198 usage (stderr);
1199 return 1;
1204 if (1 == isatty(STDOUT_FILENO))
1205 stdout_is_a_tty = true;
1206 else
1207 stdout_is_a_tty = false;
1209 next_element (dbpath, 0); /* Initialize. */
1210 while ((e = next_element ((char *) NULL, 0)) != NULL)
1212 statistics.compressed_bytes =
1213 statistics.total_filename_count =
1214 statistics.total_filename_length =
1215 statistics.whitespace_count =
1216 statistics.newline_count =
1217 statistics.highbit_filename_count = 0u;
1219 if (0 == strlen(e) || 0 == strcmp(e, "."))
1221 /* Use the default database name instead (note: we
1222 * don't use 'dbpath' since that might itself contain a
1223 * colon-separated list.
1225 e = LOCATE_DB;
1228 found = locate (argc - optind, &argv[optind], e, ignore_case, print, basename_only, use_limit, &limits, stats, regex);
1231 if (just_count)
1233 printf("%ld\n", found);
1236 if (found || (use_limit && (limits.limit==0)) || stats )
1237 return 0;
1238 else
1239 return 1;