Fixed a typo in a comment.
[findutils.git] / locate / locate.c
blob6f085b0187224b01393a7292f2e8cc1fcab524c0
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <ctype.h>
66 #include <sys/types.h>
67 #include <sys/stat.h>
68 #include <time.h>
69 #include <fnmatch.h>
70 #include <getopt.h>
71 #include <xstrtol.h>
73 /* The presence of unistd.h is assumed by gnulib these days, so we
74 * might as well assume it too.
76 /* We need <unistd.h> for isatty(). */
77 #include <unistd.h>
80 #define NDEBUG
81 #include <assert.h>
83 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
84 #include <string.h>
85 #else
86 #include <strings.h>
87 #define strchr index
88 #endif
90 #ifdef STDC_HEADERS
91 #include <stdlib.h>
92 #endif
94 #ifdef HAVE_ERRNO_H
95 #include <errno.h>
96 #else
97 extern int errno;
98 #endif
100 #ifdef HAVE_LOCALE_H
101 #include <locale.h>
102 #endif
104 #if ENABLE_NLS
105 # include <libintl.h>
106 # define _(Text) gettext (Text)
107 #else
108 # define _(Text) Text
109 #define textdomain(Domain)
110 #define bindtextdomain(Package, Directory)
111 #endif
112 #ifdef gettext_noop
113 # define N_(String) gettext_noop (String)
114 #else
115 /* We used to use (String) instead of just String, but apparently ISO C
116 * doesn't allow this (at least, that's what HP said when someone reported
117 * this as a compiler bug). This is HP case number 1205608192. See
118 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
119 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
120 * like: static const char buf[] = ("string");
122 # define N_(String) String
123 #endif
125 #include "locatedb.h"
126 #include <getline.h>
127 #include "../gnulib/lib/xalloc.h"
128 #include "../gnulib/lib/error.h"
129 #include "../gnulib/lib/human.h"
130 #include "dirname.h"
131 #include "closeout.h"
132 #include "nextelem.h"
133 #include "regex.h"
134 #include "quote.h"
135 #include "quotearg.h"
136 #include "printquoted.h"
137 #include "regextype.h"
140 /* Note that this evaluates C many times. */
141 #ifdef _LIBC
142 # define TOUPPER(Ch) toupper (Ch)
143 # define TOLOWER(Ch) tolower (Ch)
144 #else
145 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
146 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
147 #endif
149 /* typedef enum {false, true} boolean; */
151 /* Warn if a database is older than this. 8 days allows for a weekly
152 update that takes up to a day to perform. */
153 #define WARN_NUMBER_UNITS (8)
154 /* Printable name of units used in WARN_SECONDS */
155 static const char warn_name_units[] = N_("days");
156 #define SECONDS_PER_UNIT (60 * 60 * 24)
158 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
160 enum visit_result
162 VISIT_CONTINUE = 1, /* please call the next visitor */
163 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
164 VISIT_REJECTED = 4, /* rejected, process next file. */
165 VISIT_ABORT = 8 /* rejected, process no more files. */
168 enum ExistenceCheckType
170 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
171 ACCEPT_EXISTING, /* Corresponds to option -e */
172 ACCEPT_NON_EXISTING /* Corresponds to option -E */
175 /* Check for existence of files before printing them out? */
176 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
178 static int follow_symlinks = 1;
180 /* What to separate the results with. */
181 static int separator = '\n';
183 static struct quoting_options * quote_opts = NULL;
184 static bool stdout_is_a_tty;
185 static bool print_quoted_filename;
187 /* Read in a 16-bit int, high byte first (network byte order). */
189 static short
190 get_short (FILE *fp)
193 register short x;
195 x = (signed char) fgetc (fp) << 8;
196 x |= (fgetc (fp) & 0xff);
197 return x;
200 const char * const metacharacters = "*?[]\\";
202 /* Return nonzero if S contains any shell glob characters.
204 static int
205 contains_metacharacter(const char *s)
207 if (NULL == strpbrk(s, metacharacters))
208 return 0;
209 else
210 return 1;
213 /* locate_read_str()
215 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
216 * until we reach DELIMITER or end-of-file. We reallocate the buffer
217 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
218 * is made regarding the content of the data (i.e. the implementation is
219 * 8-bit clean, the only delimiter is DELIMITER).
221 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
222 * has been removed from gnulib.
224 * We call the function locate_read_str() to avoid a name clash with the curses
225 * function getstr().
227 static int
228 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
230 char * p = NULL;
231 size_t sz = 0;
232 int nread;
233 size_t needed;
235 nread = getdelim(&p, &sz, delimiter, fp);
236 if (nread >= 0)
238 assert(p != NULL);
240 needed = offs + nread + 1u;
241 if (needed > (*siz))
243 char *pnew = realloc(*buf, needed);
244 if (NULL == pnew)
246 return -1; /* FAIL */
248 else
250 *siz = needed;
251 *buf = pnew;
254 memcpy((*buf)+offs, p, nread);
255 free(p);
257 return nread;
261 static void
262 lc_strcpy(char *dest, const char *src)
264 while (*src)
266 *dest++ = TOLOWER(*src);
267 ++src;
269 *dest = 0;
272 struct locate_limits
274 uintmax_t limit;
275 uintmax_t items_accepted;
277 static struct locate_limits limits;
280 struct locate_stats
282 uintmax_t compressed_bytes;
283 uintmax_t total_filename_count;
284 uintmax_t total_filename_length;
285 uintmax_t whitespace_count;
286 uintmax_t newline_count;
287 uintmax_t highbit_filename_count;
289 static struct locate_stats statistics;
292 struct stringbuf
294 char *buffer;
295 size_t buffersize;
296 size_t *preqlen;
298 static struct stringbuf casebuf;
301 struct casefolder
303 const char *pattern;
304 struct stringbuf *pbuf;
307 struct regular_expression
309 struct re_pattern_buffer regex; /* for --regex */
313 struct process_data
315 int c; /* An input byte. */
316 int count; /* The length of the prefix shared with the previous database entry. */
317 int len;
318 char *original_filename; /* The current input database entry. */
319 size_t pathsize; /* Amount allocated for it. */
320 char *munged_filename; /* path or base_name(path) */
321 FILE *fp; /* The pathname database. */
322 char *dbfile; /* Its name, or "<stdin>" */
323 /* for the old database format,
324 the first and second characters of the most common bigrams. */
325 char bigram1[128];
326 char bigram2[128];
330 typedef int (*visitfunc)(struct process_data *procdata,
331 void *context);
333 struct visitor
335 visitfunc inspector;
336 void * context;
337 struct visitor *next;
341 static struct visitor *inspectors = NULL;
342 static struct visitor *lastinspector = NULL;
343 static struct visitor *past_pat_inspector = NULL;
345 /* 0 or 1 pattern(s) */
346 static int
347 process_simple(struct process_data *procdata)
349 int result = VISIT_CONTINUE;
350 const struct visitor *p = inspectors;
352 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
354 result = (p->inspector)(procdata, p->context);
355 p = p->next;
358 return result;
361 /* Accept if any pattern matches. */
362 static int
363 process_or (struct process_data *procdata)
365 int result = VISIT_CONTINUE;
366 const struct visitor *p = inspectors;
368 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
370 result = (p->inspector)(procdata, p->context);
371 p = p->next;
374 if (result == VISIT_CONTINUE)
375 result = VISIT_REJECTED;
376 if (result & (VISIT_ABORT | VISIT_REJECTED))
377 return result;
379 p = past_pat_inspector;
380 result = VISIT_CONTINUE;
382 while ( (VISIT_CONTINUE == result) && (NULL != p) )
384 result = (p->inspector)(procdata, p->context);
385 p = p->next;
388 if (VISIT_CONTINUE == result)
389 return VISIT_ACCEPTED;
390 else
391 return result;
394 /* Accept if all pattern match. */
395 static int
396 process_and (struct process_data *procdata)
398 int result = VISIT_CONTINUE;
399 const struct visitor *p = inspectors;
401 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
403 result = (p->inspector)(procdata, p->context);
404 p = p->next;
407 if (result == VISIT_CONTINUE)
408 result = VISIT_REJECTED;
409 if (result & (VISIT_ABORT | VISIT_REJECTED))
410 return result;
412 p = past_pat_inspector;
413 result = VISIT_CONTINUE;
415 while ( (VISIT_CONTINUE == result) && (NULL != p) )
417 result = (p->inspector)(procdata, p->context);
418 p = p->next;
421 if (VISIT_CONTINUE == result)
422 return VISIT_ACCEPTED;
423 else
424 return result;
427 typedef int (*processfunc)(struct process_data *procdata);
429 static processfunc mainprocessor = NULL;
431 static void
432 add_visitor(visitfunc fn, void *context)
434 struct visitor *p = xmalloc(sizeof(struct visitor));
435 p->inspector = fn;
436 p->context = context;
437 p->next = NULL;
439 if (NULL == lastinspector)
441 lastinspector = inspectors = p;
443 else
445 lastinspector->next = p;
446 lastinspector = p;
452 static int
453 visit_justprint_quoted(struct process_data *procdata, void *context)
455 (void) context;
456 print_quoted (stdout, quote_opts, stdout_is_a_tty,
457 "%s",
458 procdata->original_filename);
459 putchar(separator);
460 return VISIT_CONTINUE;
463 static int
464 visit_justprint_unquoted(struct process_data *procdata, void *context)
466 (void) context;
467 fputs(procdata->original_filename, stdout);
468 putchar(separator);
469 return VISIT_CONTINUE;
472 static int
473 visit_old_format(struct process_data *procdata, void *context)
475 register char *s;
476 (void) context;
478 /* Get the offset in the path where this path info starts. */
479 if (procdata->c == LOCATEDB_OLD_ESCAPE)
480 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
481 else
482 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
484 /* Overlay the old path with the remainder of the new. */
485 for (s = procdata->original_filename + procdata->count;
486 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
487 if (procdata->c < 0200)
488 *s++ = procdata->c; /* An ordinary character. */
489 else
491 /* Bigram markers have the high bit set. */
492 procdata->c &= 0177;
493 *s++ = procdata->bigram1[procdata->c];
494 *s++ = procdata->bigram2[procdata->c];
496 *s-- = '\0';
498 procdata->munged_filename = procdata->original_filename;
500 return VISIT_CONTINUE;
504 static int
505 visit_locate02_format(struct process_data *procdata, void *context)
507 register char *s;
508 int nread;
509 (void) context;
511 if (procdata->c == LOCATEDB_ESCAPE)
512 procdata->count += (short)get_short (procdata->fp);
513 else if (procdata->c > 127)
514 procdata->count += procdata->c - 256;
515 else
516 procdata->count += procdata->c;
518 if (procdata->count > procdata->len || procdata->count < 0)
520 /* This should not happen generally , but since we're
521 * reading in data which is outside our control, we
522 * cannot prevent it.
524 error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
527 /* Overlay the old path with the remainder of the new. */
528 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
529 procdata->fp, 0, procdata->count);
530 if (nread < 0)
531 return VISIT_ABORT;
532 procdata->c = getc (procdata->fp);
533 procdata->len = procdata->count + nread;
534 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
535 assert (s[0] != '\0');
536 assert (s[1] == '\0'); /* Our terminator. */
537 assert (s[2] == '\0'); /* Added by locate_read_str. */
539 procdata->munged_filename = procdata->original_filename;
541 return VISIT_CONTINUE;
544 static int
545 visit_basename(struct process_data *procdata, void *context)
547 (void) context;
548 procdata->munged_filename = base_name(procdata->original_filename);
550 return VISIT_CONTINUE;
554 static int
555 visit_casefold(struct process_data *procdata, void *context)
557 struct stringbuf *b = context;
559 if (*b->preqlen+1 > b->buffersize)
561 b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
562 b->buffersize = *b->preqlen+1;
564 lc_strcpy(b->buffer, procdata->munged_filename);
566 return VISIT_CONTINUE;
569 /* visit_existing_follow implements -L -e */
570 static int
571 visit_existing_follow(struct process_data *procdata, void *context)
573 struct stat st;
574 (void) context;
576 /* munged_filename has been converted in some way (to lower case,
577 * or is just the base name of the file), and original_filename has not.
578 * Hence only original_filename is still actually the name of the file
579 * whose existence we would need to check.
581 if (stat(procdata->original_filename, &st) != 0)
583 return VISIT_REJECTED;
585 else
587 return VISIT_CONTINUE;
591 /* visit_non_existing_follow implements -L -E */
592 static int
593 visit_non_existing_follow(struct process_data *procdata, void *context)
595 struct stat st;
596 (void) context;
598 /* munged_filename has been converted in some way (to lower case,
599 * or is just the base name of the file), and original_filename has not.
600 * Hence only original_filename is still actually the name of the file
601 * whose existence we would need to check.
603 if (stat(procdata->original_filename, &st) == 0)
605 return VISIT_REJECTED;
607 else
609 return VISIT_CONTINUE;
613 /* visit_existing_nofollow implements -P -e */
614 static int
615 visit_existing_nofollow(struct process_data *procdata, void *context)
617 struct stat st;
618 (void) context;
620 /* munged_filename has been converted in some way (to lower case,
621 * or is just the base name of the file), and original_filename has not.
622 * Hence only original_filename is still actually the name of the file
623 * whose existence we would need to check.
625 if (lstat(procdata->original_filename, &st) != 0)
627 return VISIT_REJECTED;
629 else
631 return VISIT_CONTINUE;
635 /* visit_non_existing_nofollow implements -P -E */
636 static int
637 visit_non_existing_nofollow(struct process_data *procdata, void *context)
639 struct stat st;
640 (void) context;
642 /* munged_filename has been converted in some way (to lower case,
643 * or is just the base name of the file), and original_filename has not.
644 * Hence only original_filename is still actually the name of the file
645 * whose existence we would need to check.
647 if (lstat(procdata->original_filename, &st) == 0)
649 return VISIT_REJECTED;
651 else
653 return VISIT_CONTINUE;
657 static int
658 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
660 const char *pattern = context;
662 if (NULL != strstr(procdata->munged_filename, pattern))
663 return VISIT_ACCEPTED;
664 else
665 return VISIT_REJECTED;
668 static int
669 visit_substring_match_casefold(struct process_data *procdata, void *context)
671 const struct casefolder * p = context;
672 const struct stringbuf * b = p->pbuf;
673 (void) procdata;
675 if (NULL != strstr(b->buffer, p->pattern))
676 return VISIT_ACCEPTED;
677 else
678 return VISIT_REJECTED;
682 static int
683 visit_globmatch_nofold(struct process_data *procdata, void *context)
685 const char *glob = context;
686 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
687 return VISIT_REJECTED;
688 else
689 return VISIT_ACCEPTED;
693 static int
694 visit_globmatch_casefold(struct process_data *procdata, void *context)
696 const char *glob = context;
697 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
698 return VISIT_REJECTED;
699 else
700 return VISIT_ACCEPTED;
704 static int
705 visit_regex(struct process_data *procdata, void *context)
707 struct regular_expression *p = context;
708 const size_t len = strlen(procdata->munged_filename);
710 int rv = re_search (&p->regex, procdata->munged_filename,
711 len, 0, len,
712 (struct re_registers *) NULL);
713 if (rv < 0)
715 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
717 else
719 return VISIT_ACCEPTED; /* match */
724 static int
725 visit_stats(struct process_data *procdata, void *context)
727 struct locate_stats *p = context;
728 size_t len = strlen(procdata->original_filename);
729 const char *s;
730 int highbit, whitespace, newline;
732 ++(p->total_filename_count);
733 p->total_filename_length += len;
735 highbit = whitespace = newline = 0;
736 for (s=procdata->original_filename; *s; ++s)
738 if ( (int)(*s) & 128 )
739 highbit = 1;
740 if ('\n' == *s)
742 newline = whitespace = 1;
744 else if (isspace((unsigned char)*s))
746 whitespace = 1;
750 if (highbit)
751 ++(p->highbit_filename_count);
752 if (whitespace)
753 ++(p->whitespace_count);
754 if (newline)
755 ++(p->newline_count);
757 return VISIT_CONTINUE;
761 static int
762 visit_limit(struct process_data *procdata, void *context)
764 struct locate_limits *p = context;
766 (void) procdata;
768 if (++p->items_accepted >= p->limit)
769 return VISIT_ABORT;
770 else
771 return VISIT_CONTINUE;
774 static int
775 visit_count(struct process_data *procdata, void *context)
777 struct locate_limits *p = context;
779 (void) procdata;
781 ++p->items_accepted;
782 return VISIT_CONTINUE;
785 /* Emit the statistics.
787 static void
788 print_stats(int argc, size_t database_file_size)
790 char hbuf[LONGEST_HUMAN_READABLE + 1];
792 printf(_("Locate database size: %s bytes\n"),
793 human_readable ((uintmax_t) database_file_size,
794 hbuf, human_ceiling, 1, 1));
796 printf(_("Filenames: %s "),
797 human_readable (statistics.total_filename_count,
798 hbuf, human_ceiling, 1, 1));
799 printf(_("with a cumulative length of %s bytes"),
800 human_readable (statistics.total_filename_length,
801 hbuf, human_ceiling, 1, 1));
803 printf(_("\n\tof which %s contain whitespace, "),
804 human_readable (statistics.whitespace_count,
805 hbuf, human_ceiling, 1, 1));
806 printf(_("\n\t%s contain newline characters, "),
807 human_readable (statistics.newline_count,
808 hbuf, human_ceiling, 1, 1));
809 printf(_("\n\tand %s contain characters with the high bit set.\n"),
810 human_readable (statistics.highbit_filename_count,
811 hbuf, human_ceiling, 1, 1));
813 if (!argc)
814 printf(_("Compression ratio %4.2f%%\n"),
815 100.0 * ((double)statistics.total_filename_length
816 - (double) database_file_size)
817 / (double) statistics.total_filename_length);
818 printf("\n");
822 /* Print or count the entries in DBFILE that match shell globbing patterns in
823 ARGV. Return the number of entries matched. */
825 static unsigned long
826 locate (int argc,
827 char **argv,
828 char *dbfile,
829 int ignore_case,
830 int enable_print,
831 int basename_only,
832 int use_limit,
833 struct locate_limits *plimit,
834 int stats,
835 int op_and,
836 int regex,
837 int regex_options)
839 char *pathpart; /* A pattern to consider. */
840 int argn; /* Index to current pattern in argv. */
841 int need_fold; /* Set when folding and any pattern is non-glob. */
842 int nread; /* number of bytes read from an entry. */
843 struct process_data procdata; /* Storage for data shared with visitors. */
845 int old_format = 0; /* true if reading a bigram-encoded database. */
846 static bool did_stdin = false; /* Set to prevent rereading stdin. */
847 struct visitor* pvis; /* temp for determining past_pat_inspector. */
849 /* To check the age of the database. */
850 struct stat st;
851 time_t now;
854 if (ignore_case)
855 regex_options |= RE_ICASE;
857 procdata.len = procdata.count = 0;
858 if (!strcmp (dbfile, "-"))
860 if (did_stdin)
862 error (0, 0, _("warning: the locate database can only be read from stdin once."));
863 return 0;
867 procdata.dbfile = "<stdin>";
868 procdata.fp = stdin;
869 did_stdin = true;
871 else
873 if (stat (dbfile, &st) || (procdata.fp = fopen (dbfile, "r")) == NULL)
875 error (0, errno, "%s", dbfile);
876 return 0;
878 time(&now);
879 if (now - st.st_mtime > WARN_SECONDS)
881 /* For example:
882 warning: database `fred' is more than 8 days old */
883 error (0, 0, _("warning: database `%s' is more than %d %s old"),
884 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
886 procdata.dbfile = dbfile;
889 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
890 procdata.original_filename = xmalloc (procdata.pathsize);
892 nread = fread (procdata.original_filename, 1, sizeof (LOCATEDB_MAGIC),
893 procdata.fp);
894 if (nread != sizeof (LOCATEDB_MAGIC)
895 || memcmp (procdata.original_filename, LOCATEDB_MAGIC,
896 sizeof (LOCATEDB_MAGIC)))
898 int i;
899 /* Read the list of the most common bigrams in the database. */
900 nread = fread (procdata.original_filename + sizeof (LOCATEDB_MAGIC), 1,
901 256 - sizeof (LOCATEDB_MAGIC), procdata.fp);
902 for (i = 0; i < 128; i++)
904 procdata.bigram1[i] = procdata.original_filename[i << 1];
905 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
907 old_format = 1;
910 /* Set up the inspection regime */
911 inspectors = NULL;
912 lastinspector = NULL;
913 past_pat_inspector = NULL;
915 if (old_format)
916 add_visitor(visit_old_format, NULL);
917 else
918 add_visitor(visit_locate02_format, NULL);
920 if (basename_only)
921 add_visitor(visit_basename, NULL);
923 /* See if we need fold. */
924 if (ignore_case && !regex)
925 for ( argn = 0; argn < argc; argn++ )
927 pathpart = argv[argn];
928 if (!contains_metacharacter(pathpart))
930 need_fold = 1;
931 break;
935 if (need_fold)
937 add_visitor(visit_casefold, &casebuf);
938 casebuf.preqlen = &procdata.pathsize;
941 /* Add an inspector for each pattern we're looking for. */
942 for ( argn = 0; argn < argc; argn++ )
944 pathpart = argv[argn];
945 if (regex)
947 struct regular_expression *p = xmalloc(sizeof(*p));
948 const char *error_message = NULL;
950 memset (&p->regex, 0, sizeof (p->regex));
952 re_set_syntax(regex_options);
953 p->regex.allocated = 100;
954 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
955 p->regex.fastmap = NULL;
956 p->regex.syntax = regex_options;
957 p->regex.translate = NULL;
959 error_message = re_compile_pattern (pathpart, strlen (pathpart),
960 &p->regex);
961 if (error_message)
963 error (1, 0, "%s", error_message);
965 else
967 add_visitor(visit_regex, p);
970 else if (contains_metacharacter(pathpart))
972 if (ignore_case)
973 add_visitor(visit_globmatch_casefold, pathpart);
974 else
975 add_visitor(visit_globmatch_nofold, pathpart);
977 else
979 /* No glob characters used. Hence we match on
980 * _any part_ of the filename, not just the
981 * basename. This seems odd to me, but it is the
982 * traditional behaviour.
983 * James Youngman <jay@gnu.org>
985 if (ignore_case)
987 struct casefolder * cf = xmalloc(sizeof(*cf));
988 cf->pattern = pathpart;
989 cf->pbuf = &casebuf;
990 add_visitor(visit_substring_match_casefold, cf);
991 /* If we ignore case, convert it to lower now so we don't have to
992 * do it every time
994 lc_strcpy(pathpart, pathpart);
996 else
998 add_visitor(visit_substring_match_nocasefold, pathpart);
1003 pvis = lastinspector;
1005 /* We add visit_existing_*() as late as possible to reduce the
1006 * number of stat() calls.
1008 switch (check_existence)
1010 case ACCEPT_EXISTING:
1011 if (follow_symlinks) /* -L, default */
1012 add_visitor(visit_existing_follow, NULL);
1013 else /* -P */
1014 add_visitor(visit_existing_nofollow, NULL);
1015 break;
1017 case ACCEPT_NON_EXISTING:
1018 if (follow_symlinks) /* -L, default */
1019 add_visitor(visit_non_existing_follow, NULL);
1020 else /* -P */
1021 add_visitor(visit_non_existing_nofollow, NULL);
1022 break;
1024 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1025 /* do nothing; no extra processing. */
1026 break;
1029 if (stats)
1030 add_visitor(visit_stats, &statistics);
1032 if (enable_print)
1034 if (print_quoted_filename)
1035 add_visitor(visit_justprint_quoted, NULL);
1036 else
1037 add_visitor(visit_justprint_unquoted, NULL);
1041 if (use_limit)
1042 add_visitor(visit_limit, plimit);
1043 else
1044 add_visitor(visit_count, plimit);
1047 if (argc > 1)
1049 past_pat_inspector = pvis->next;
1050 if (op_and)
1051 mainprocessor = process_and;
1052 else
1053 mainprocessor = process_or;
1055 else
1056 mainprocessor = process_simple;
1058 if (stats)
1060 printf(_("Database %s is in the %s format.\n"),
1061 procdata.dbfile,
1062 old_format ? _("old") : "LOCATE02");
1066 procdata.c = getc (procdata.fp);
1067 /* If we are searching for filename patterns, the inspector list
1068 * will contain an entry for each pattern for which we are searching.
1070 while ( (procdata.c != EOF) &&
1071 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1073 /* Do nothing; all the work is done in the visitor functions. */
1076 if (stats)
1078 print_stats(argc, st.st_size);
1081 if (ferror (procdata.fp))
1083 error (0, errno, "%s", procdata.dbfile);
1084 return 0;
1086 if (procdata.fp != stdin && fclose (procdata.fp) == EOF)
1088 error (0, errno, "%s", dbfile);
1089 return 0;
1092 return plimit->items_accepted;
1098 extern char *version_string;
1100 /* The name this program was run with. */
1101 char *program_name;
1103 static void
1104 usage (FILE *stream)
1106 fprintf (stream, _("\
1107 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1108 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1109 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1110 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1111 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1112 [-version] [--help]\n\
1113 pattern...\n"),
1114 program_name);
1115 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1117 enum
1119 REGEXTYPE_OPTION = CHAR_MAX + 1
1123 static struct option const longopts[] =
1125 {"database", required_argument, NULL, 'd'},
1126 {"existing", no_argument, NULL, 'e'},
1127 {"non-existing", no_argument, NULL, 'E'},
1128 {"ignore-case", no_argument, NULL, 'i'},
1129 {"all", no_argument, NULL, 'A'},
1130 {"help", no_argument, NULL, 'h'},
1131 {"version", no_argument, NULL, 'v'},
1132 {"null", no_argument, NULL, '0'},
1133 {"count", no_argument, NULL, 'c'},
1134 {"wholename", no_argument, NULL, 'w'},
1135 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1136 {"basename", no_argument, NULL, 'b'},
1137 {"print", no_argument, NULL, 'p'},
1138 {"stdio", no_argument, NULL, 's'},
1139 {"mmap", no_argument, NULL, 'm'},
1140 {"limit", required_argument, NULL, 'l'},
1141 {"regex", no_argument, NULL, 'r'},
1142 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1143 {"statistics", no_argument, NULL, 'S'},
1144 {"follow", no_argument, NULL, 'L'},
1145 {"nofollow", no_argument, NULL, 'P'},
1146 {NULL, no_argument, NULL, 0}
1150 main (int argc, char **argv)
1152 char *dbpath;
1153 unsigned long int found = 0uL;
1154 int optc;
1155 int ignore_case = 0;
1156 int print = 0;
1157 int just_count = 0;
1158 int basename_only = 0;
1159 int use_limit = 0;
1160 int regex = 0;
1161 int regex_options = RE_SYNTAX_EMACS;
1162 int stats = 0;
1163 int op_and = 0;
1164 char *e;
1166 program_name = argv[0];
1168 #ifdef HAVE_SETLOCALE
1169 setlocale (LC_ALL, "");
1170 #endif
1171 bindtextdomain (PACKAGE, LOCALEDIR);
1172 textdomain (PACKAGE);
1173 atexit (close_stdout);
1175 limits.limit = 0;
1176 limits.items_accepted = 0;
1178 quote_opts = clone_quoting_options (NULL);
1179 print_quoted_filename = true;
1181 dbpath = getenv ("LOCATE_PATH");
1182 if (dbpath == NULL)
1183 dbpath = LOCATE_DB;
1185 check_existence = ACCEPT_EITHER;
1187 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1188 switch (optc)
1190 case '0':
1191 separator = 0;
1192 print_quoted_filename = false; /* print filename 'raw'. */
1193 break;
1195 case 'A':
1196 op_and = 1;
1197 break;
1199 case 'b':
1200 basename_only = 1;
1201 break;
1203 case 'c':
1204 just_count = 1;
1205 break;
1207 case 'd':
1208 dbpath = optarg;
1209 break;
1211 case 'e':
1212 check_existence = ACCEPT_EXISTING;
1213 break;
1215 case 'E':
1216 check_existence = ACCEPT_NON_EXISTING;
1217 break;
1219 case 'i':
1220 ignore_case = 1;
1221 break;
1223 case 'h':
1224 usage (stdout);
1225 return 0;
1227 case 'p':
1228 print = 1;
1229 break;
1231 case 'v':
1232 printf (_("GNU locate version %s\n"), version_string);
1233 return 0;
1235 case 'w':
1236 basename_only = 0;
1237 break;
1239 case 'r':
1240 regex = 1;
1241 break;
1243 case REGEXTYPE_OPTION:
1244 regex_options = get_regex_type(optarg);
1245 break;
1247 case 'S':
1248 stats = 1;
1249 break;
1251 case 'L':
1252 follow_symlinks = 1;
1253 break;
1255 /* In find, -P and -H differ in the way they handle paths
1256 * given on the command line. This is not relevant for
1257 * locate, but the -H option is supported because it is
1258 * probably more intuitive to do so.
1260 case 'P':
1261 case 'H':
1262 follow_symlinks = 0;
1263 break;
1265 case 'l':
1267 char *end = optarg;
1268 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1269 if (LONGINT_OK != err)
1271 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1273 use_limit = 1;
1275 break;
1277 case 's': /* use stdio */
1278 case 'm': /* use mmap */
1279 /* These options are implemented simply for
1280 * compatibility with FreeBSD
1282 break;
1284 default:
1285 usage (stderr);
1286 return 1;
1289 if (!just_count && !stats)
1290 print = 1;
1292 if (stats)
1294 if (optind == argc)
1295 use_limit = 0;
1297 else
1299 if (!just_count && optind == argc)
1301 usage (stderr);
1302 return 1;
1307 if (1 == isatty(STDOUT_FILENO))
1308 stdout_is_a_tty = true;
1309 else
1310 stdout_is_a_tty = false;
1312 next_element (dbpath, 0); /* Initialize. */
1314 /* Bail out early if limit already reached. */
1315 while ((e = next_element ((char *) NULL, 0)) != NULL &&
1316 (!use_limit || limits.limit > limits.items_accepted))
1318 statistics.compressed_bytes =
1319 statistics.total_filename_count =
1320 statistics.total_filename_length =
1321 statistics.whitespace_count =
1322 statistics.newline_count =
1323 statistics.highbit_filename_count = 0u;
1325 if (0 == strlen(e) || 0 == strcmp(e, "."))
1327 /* Use the default database name instead (note: we
1328 * don't use 'dbpath' since that might itself contain a
1329 * colon-separated list.
1331 e = LOCATE_DB;
1334 found = locate (argc - optind, &argv[optind], e, ignore_case, print, basename_only, use_limit, &limits, stats, op_and, regex, regex_options);
1337 if (just_count)
1339 printf("%ld\n", found);
1342 if (found || (use_limit && (limits.limit==0)) || stats )
1343 return 0;
1344 else
1345 return 1;