Fixed Davannah bug #13650; programs run by -exec cannot read stdin
[findutils.git] / locate / locate.c
blobe2d9498e4a8b6b60766c01e5030a48d9e94889fb
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <ctype.h>
66 #include <sys/types.h>
67 #include <sys/stat.h>
68 #include <time.h>
69 #include <fnmatch.h>
70 #include <getopt.h>
71 #include <xstrtol.h>
73 /* The presence of unistd.h is assumed by gnulib these days, so we
74 * might as well assume it too.
76 /* We need <unistd.h> for isatty(). */
77 #include <unistd.h>
80 #define NDEBUG
81 #include <assert.h>
83 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
84 #include <string.h>
85 #else
86 #include <strings.h>
87 #define strchr index
88 #endif
90 #ifdef STDC_HEADERS
91 #include <stdlib.h>
92 #endif
94 #ifdef HAVE_ERRNO_H
95 #include <errno.h>
96 #else
97 extern int errno;
98 #endif
100 #ifdef HAVE_LOCALE_H
101 #include <locale.h>
102 #endif
104 #if ENABLE_NLS
105 # include <libintl.h>
106 # define _(Text) gettext (Text)
107 #else
108 # define _(Text) Text
109 #define textdomain(Domain)
110 #define bindtextdomain(Package, Directory)
111 #endif
112 #ifdef gettext_noop
113 # define N_(String) gettext_noop (String)
114 #else
115 /* We used to use (String) instead of just String, but apparentl;y ISO C
116 * doesn't allow this (at least, that's what HP said when someone reported
117 * this as a compiler bug). This is HP case number 1205608192. See
118 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
119 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
120 * like: static const char buf[] = ("string");
122 # define N_(String) String
123 #endif
125 #include "locatedb.h"
126 #include <getline.h>
127 #include "../gnulib/lib/xalloc.h"
128 #include "../gnulib/lib/error.h"
129 #include "../gnulib/lib/human.h"
130 #include "dirname.h"
131 #include "closeout.h"
132 #include "nextelem.h"
133 #include "regex.h"
134 #include "quote.h"
135 #include "quotearg.h"
136 #include "printquoted.h"
137 #include "regextype.h"
140 /* Note that this evaluates C many times. */
141 #ifdef _LIBC
142 # define TOUPPER(Ch) toupper (Ch)
143 # define TOLOWER(Ch) tolower (Ch)
144 #else
145 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
146 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
147 #endif
149 /* typedef enum {false, true} boolean; */
151 /* Warn if a database is older than this. 8 days allows for a weekly
152 update that takes up to a day to perform. */
153 #define WARN_NUMBER_UNITS (8)
154 /* Printable name of units used in WARN_SECONDS */
155 static const char warn_name_units[] = N_("days");
156 #define SECONDS_PER_UNIT (60 * 60 * 24)
158 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
160 enum visit_result
162 VISIT_CONTINUE = 1, /* please call the next visitor */
163 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
164 VISIT_REJECTED = 4, /* rejected, process next file. */
165 VISIT_ABORT = 8 /* rejected, process no more files. */
168 enum ExistenceCheckType
170 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
171 ACCEPT_EXISTING, /* Corresponds to option -e */
172 ACCEPT_NON_EXISTING /* Corresponds to option -E */
175 /* Check for existence of files before printing them out? */
176 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
178 static int follow_symlinks = 1;
180 /* What to separate the results with. */
181 static int separator = '\n';
183 static struct quoting_options * quote_opts = NULL;
184 static bool stdout_is_a_tty;
185 static bool print_quoted_filename;
187 /* Read in a 16-bit int, high byte first (network byte order). */
189 static short
190 get_short (FILE *fp)
193 register short x;
195 x = (signed char) fgetc (fp) << 8;
196 x |= (fgetc (fp) & 0xff);
197 return x;
200 const char * const metacharacters = "*?[]\\";
202 /* Return nonzero if S contains any shell glob characters.
204 static int
205 contains_metacharacter(const char *s)
207 if (NULL == strpbrk(s, metacharacters))
208 return 0;
209 else
210 return 1;
213 /* locate_read_str()
215 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
216 * until we reach DELIMITER or end-of-file. We reallocate the buffer
217 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
218 * is made regarding the content of the data (i.e. the implementation is
219 * 8-bit clean, the only delimiter is DELIMITER).
221 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
222 * has been removed from gnulib.
224 * We call the function locate_read_str() to avoid a name clash with the curses
225 * function getstr().
227 static int
228 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
230 char * p = NULL;
231 size_t sz = 0;
232 int needed, nread;
234 nread = getdelim(&p, &sz, delimiter, fp);
235 if (nread >= 0)
237 assert(p != NULL);
239 needed = offs + nread + 1;
240 if (needed > (*siz))
242 char *pnew = realloc(*buf, needed);
243 if (NULL == pnew)
245 return -1; /* FAIL */
247 else
249 *siz = needed;
250 *buf = pnew;
253 memcpy((*buf)+offs, p, nread);
254 free(p);
256 return nread;
260 static void
261 lc_strcpy(char *dest, const char *src)
263 while (*src)
265 *dest++ = TOLOWER(*src);
266 ++src;
268 *dest = 0;
271 struct locate_limits
273 uintmax_t limit;
274 uintmax_t items_accepted;
276 static struct locate_limits limits;
279 struct locate_stats
281 uintmax_t compressed_bytes;
282 uintmax_t total_filename_count;
283 uintmax_t total_filename_length;
284 uintmax_t whitespace_count;
285 uintmax_t newline_count;
286 uintmax_t highbit_filename_count;
288 static struct locate_stats statistics;
291 struct stringbuf
293 char *buffer;
294 size_t buffersize;
295 size_t *soffs;
296 size_t *preqlen;
298 static struct stringbuf casebuf;
301 struct casefolder
303 const char *pattern;
304 struct stringbuf *pbuf;
307 struct regular_expression
309 struct re_pattern_buffer regex; /* for --regex */
313 struct process_data
315 int c; /* An input byte. */
316 int count; /* The length of the prefix shared with the previous database entry. */
317 int len;
318 char *original_filename; /* The current input database entry. */
319 size_t pathsize; /* Amount allocated for it. */
320 char *munged_filename; /* path or base_name(path) */
321 FILE *fp; /* The pathname database. */
322 char *dbfile; /* Its name, or "<stdin>" */
323 /* for the old database format,
324 the first and second characters of the most common bigrams. */
325 char bigram1[128];
326 char bigram2[128];
330 typedef int (*visitfunc)(struct process_data *procdata,
331 void *context);
333 struct visitor
335 visitfunc inspector;
336 void * context;
337 struct visitor *next;
341 static struct visitor *inspectors = NULL;
342 static struct visitor *lastinspector = NULL;
343 static struct visitor *past_pat_inspector = NULL;
345 /* 0 or 1 pattern(s) */
346 static int
347 process_simple(struct process_data *procdata)
349 int result = VISIT_CONTINUE;
350 const struct visitor *p = inspectors;
352 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
354 result = (p->inspector)(procdata, p->context);
355 p = p->next;
358 return result;
361 /* Accept if any pattern matches. */
362 static int
363 process_or (struct process_data *procdata)
365 int result = VISIT_CONTINUE;
366 const struct visitor *p = inspectors;
368 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
370 result = (p->inspector)(procdata, p->context);
371 p = p->next;
374 if (result == VISIT_CONTINUE)
375 result = VISIT_REJECTED;
376 if (result & (VISIT_ABORT | VISIT_REJECTED))
377 return result;
379 p = past_pat_inspector;
380 result = VISIT_CONTINUE;
382 while ( (VISIT_CONTINUE == result) && (NULL != p) )
384 result = (p->inspector)(procdata, p->context);
385 p = p->next;
388 if (VISIT_CONTINUE == result)
389 return VISIT_ACCEPTED;
390 else
391 return result;
394 /* Accept if all pattern match. */
395 static int
396 process_and (struct process_data *procdata)
398 int result = VISIT_CONTINUE;
399 const struct visitor *p = inspectors;
401 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
403 result = (p->inspector)(procdata, p->context);
404 p = p->next;
407 if (result == VISIT_CONTINUE)
408 result = VISIT_REJECTED;
409 if (result & (VISIT_ABORT | VISIT_REJECTED))
410 return result;
412 p = past_pat_inspector;
413 result = VISIT_CONTINUE;
415 while ( (VISIT_CONTINUE == result) && (NULL != p) )
417 result = (p->inspector)(procdata, p->context);
418 p = p->next;
421 if (VISIT_CONTINUE == result)
422 return VISIT_ACCEPTED;
423 else
424 return result;
427 typedef int (*processfunc)(struct process_data *procdata);
429 static processfunc mainprocessor = NULL;
431 static void
432 add_visitor(visitfunc fn, void *context)
434 struct visitor *p = xmalloc(sizeof(struct visitor));
435 p->inspector = fn;
436 p->context = context;
437 p->next = NULL;
439 if (NULL == lastinspector)
441 lastinspector = inspectors = p;
443 else
445 lastinspector->next = p;
446 lastinspector = p;
452 static int
453 visit_justprint_quoted(struct process_data *procdata, void *context)
455 (void) context;
456 print_quoted (stdout, quote_opts, stdout_is_a_tty,
457 "%s",
458 procdata->original_filename);
459 putchar(separator);
460 return VISIT_CONTINUE;
463 static int
464 visit_justprint_unquoted(struct process_data *procdata, void *context)
466 (void) context;
467 fputs(procdata->original_filename, stdout);
468 putchar(separator);
469 return VISIT_CONTINUE;
472 static int
473 visit_old_format(struct process_data *procdata, void *context)
475 register char *s;
476 (void) context;
478 /* Get the offset in the path where this path info starts. */
479 if (procdata->c == LOCATEDB_OLD_ESCAPE)
480 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
481 else
482 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
484 /* Overlay the old path with the remainder of the new. */
485 for (s = procdata->original_filename + procdata->count;
486 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
487 if (procdata->c < 0200)
488 *s++ = procdata->c; /* An ordinary character. */
489 else
491 /* Bigram markers have the high bit set. */
492 procdata->c &= 0177;
493 *s++ = procdata->bigram1[procdata->c];
494 *s++ = procdata->bigram2[procdata->c];
496 *s-- = '\0';
498 procdata->munged_filename = procdata->original_filename;
500 return VISIT_CONTINUE;
504 static int
505 visit_locate02_format(struct process_data *procdata, void *context)
507 register char *s;
508 int nread;
509 (void) context;
511 if (procdata->c == LOCATEDB_ESCAPE)
512 procdata->count += (short)get_short (procdata->fp);
513 else if (procdata->c > 127)
514 procdata->count += procdata->c - 256;
515 else
516 procdata->count += procdata->c;
518 if (procdata->count > procdata->len || procdata->count < 0)
520 /* This should not happen generally , but since we're
521 * reading in data which is outside our control, we
522 * cannot prevent it.
524 error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
527 /* Overlay the old path with the remainder of the new. */
528 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
529 procdata->fp, 0, procdata->count);
530 if (nread < 0)
531 return VISIT_ABORT;
532 procdata->c = getc (procdata->fp);
533 procdata->len = procdata->count + nread;
534 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
535 assert (s[0] != '\0');
536 assert (s[1] == '\0'); /* Our terminator. */
537 assert (s[2] == '\0'); /* Added by locate_read_str. */
539 procdata->munged_filename = procdata->original_filename;
541 return VISIT_CONTINUE;
544 static int
545 visit_basename(struct process_data *procdata, void *context)
547 (void) context;
548 procdata->munged_filename = base_name(procdata->original_filename);
550 return VISIT_CONTINUE;
554 static int
555 visit_casefold(struct process_data *procdata, void *context)
557 struct stringbuf *b = context;
559 if (*b->preqlen+1 > b->buffersize)
561 b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
562 b->buffersize = *b->preqlen+1;
564 lc_strcpy(b->buffer, procdata->munged_filename);
566 return VISIT_CONTINUE;
569 /* visit_existing_follow implements -L -e */
570 static int
571 visit_existing_follow(struct process_data *procdata, void *context)
573 struct stat st;
574 (void) context;
576 /* munged_filename has been converted in some way (to lower case,
577 * or is just the base name of the file), and original_filename has not.
578 * Hence only original_filename is still actually the name of the file
579 * whose existence we would need to check.
581 if (stat(procdata->original_filename, &st) != 0)
583 return VISIT_REJECTED;
585 else
587 return VISIT_CONTINUE;
591 /* visit_non_existing_follow implements -L -E */
592 static int
593 visit_non_existing_follow(struct process_data *procdata, void *context)
595 struct stat st;
596 (void) context;
598 /* munged_filename has been converted in some way (to lower case,
599 * or is just the base name of the file), and original_filename has not.
600 * Hence only original_filename is still actually the name of the file
601 * whose existence we would need to check.
603 if (stat(procdata->original_filename, &st) == 0)
605 return VISIT_REJECTED;
607 else
609 return VISIT_CONTINUE;
613 /* visit_existing_nofollow implements -P -e */
614 static int
615 visit_existing_nofollow(struct process_data *procdata, void *context)
617 struct stat st;
618 (void) context;
620 /* munged_filename has been converted in some way (to lower case,
621 * or is just the base name of the file), and original_filename has not.
622 * Hence only original_filename is still actually the name of the file
623 * whose existence we would need to check.
625 if (lstat(procdata->original_filename, &st) != 0)
627 return VISIT_REJECTED;
629 else
631 return VISIT_CONTINUE;
635 /* visit_non_existing_nofollow implements -P -E */
636 static int
637 visit_non_existing_nofollow(struct process_data *procdata, void *context)
639 struct stat st;
640 (void) context;
642 /* munged_filename has been converted in some way (to lower case,
643 * or is just the base name of the file), and original_filename has not.
644 * Hence only original_filename is still actually the name of the file
645 * whose existence we would need to check.
647 if (lstat(procdata->original_filename, &st) == 0)
649 return VISIT_REJECTED;
651 else
653 return VISIT_CONTINUE;
657 static int
658 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
660 const char *pattern = context;
662 if (NULL != strstr(procdata->munged_filename, pattern))
663 return VISIT_ACCEPTED;
664 else
665 return VISIT_REJECTED;
668 static int
669 visit_substring_match_casefold(struct process_data *procdata, void *context)
671 const struct casefolder * p = context;
672 const struct stringbuf * b = p->pbuf;
673 (void) procdata;
675 if (NULL != strstr(b->buffer, p->pattern))
676 return VISIT_ACCEPTED;
677 else
678 return VISIT_REJECTED;
682 static int
683 visit_globmatch_nofold(struct process_data *procdata, void *context)
685 const char *glob = context;
686 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
687 return VISIT_REJECTED;
688 else
689 return VISIT_ACCEPTED;
693 static int
694 visit_globmatch_casefold(struct process_data *procdata, void *context)
696 const char *glob = context;
697 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
698 return VISIT_REJECTED;
699 else
700 return VISIT_ACCEPTED;
704 static int
705 visit_regex(struct process_data *procdata, void *context)
707 struct regular_expression *p = context;
708 const size_t len = strlen(procdata->munged_filename);
710 int rv = re_search (&p->regex, procdata->munged_filename,
711 len, 0, len,
712 (struct re_registers *) NULL);
713 if (rv < 0)
715 return VISIT_REJECTED; /* no match (-1), or internal error (-2) */
717 else
719 return VISIT_ACCEPTED; /* match */
724 static int
725 visit_stats(struct process_data *procdata, void *context)
727 struct locate_stats *p = context;
728 size_t len = strlen(procdata->original_filename);
729 const char *s;
730 int highbit, whitespace, newline;
732 ++(p->total_filename_count);
733 p->total_filename_length += len;
735 highbit = whitespace = newline = 0;
736 for (s=procdata->original_filename; *s; ++s)
738 if ( (int)(*s) & 128 )
739 highbit = 1;
740 if ('\n' == *s)
742 newline = whitespace = 1;
744 else if (isspace((unsigned char)*s))
746 whitespace = 1;
750 if (highbit)
751 ++(p->highbit_filename_count);
752 if (whitespace)
753 ++(p->whitespace_count);
754 if (newline)
755 ++(p->newline_count);
757 return VISIT_CONTINUE;
761 static int
762 visit_limit(struct process_data *procdata, void *context)
764 struct locate_limits *p = context;
766 (void) procdata;
768 if (++p->items_accepted >= p->limit)
769 return VISIT_ABORT;
770 else
771 return VISIT_CONTINUE;
774 static int
775 visit_count(struct process_data *procdata, void *context)
777 struct locate_limits *p = context;
779 (void) procdata;
781 ++p->items_accepted;
782 return VISIT_CONTINUE;
785 /* Emit the statistics.
787 static void
788 print_stats(int argc, size_t database_file_size)
790 char hbuf[LONGEST_HUMAN_READABLE + 1];
792 printf(_("Locate database size: %s bytes\n"),
793 human_readable ((uintmax_t) database_file_size,
794 hbuf, human_ceiling, 1, 1));
796 printf(_("Filenames: %s "),
797 human_readable (statistics.total_filename_count,
798 hbuf, human_ceiling, 1, 1));
799 printf(_("with a cumulative length of %s bytes"),
800 human_readable (statistics.total_filename_length,
801 hbuf, human_ceiling, 1, 1));
803 printf(_("\n\tof which %s contain whitespace, "),
804 human_readable (statistics.whitespace_count,
805 hbuf, human_ceiling, 1, 1));
806 printf(_("\n\t%s contain newline characters, "),
807 human_readable (statistics.newline_count,
808 hbuf, human_ceiling, 1, 1));
809 printf(_("\n\tand %s contain characters with the high bit set.\n"),
810 human_readable (statistics.highbit_filename_count,
811 hbuf, human_ceiling, 1, 1));
813 if (!argc)
814 printf(_("Compression ratio %4.2f%%\n"),
815 100.0 * ((double)statistics.total_filename_length
816 - (double) database_file_size)
817 / (double) statistics.total_filename_length);
818 printf("\n");
822 /* Print or count the entries in DBFILE that match shell globbing patterns in
823 ARGV. Return the number of entries matched. */
825 static unsigned long
826 locate (int argc,
827 char **argv,
828 char *dbfile,
829 int ignore_case,
830 int enable_print,
831 int basename_only,
832 int use_limit,
833 struct locate_limits *plimit,
834 int stats,
835 int op_and,
836 int regex,
837 int regex_options)
839 char *pathpart; /* A pattern to consider. */
840 int argn; /* Index to current pattern in argv. */
841 int need_fold; /* Set when folding and any pattern is non-glob. */
842 int nread; /* number of bytes read from an entry. */
843 struct process_data procdata; /* Storage for data shared with visitors. */
845 int old_format = 0; /* true if reading a bigram-encoded database. */
846 static bool did_stdin = false; /* Set to prevent rereading stdin. */
847 struct visitor* pvis; /* temp for determining past_pat_inspector. */
849 /* To check the age of the database. */
850 struct stat st;
851 time_t now;
854 if (ignore_case)
855 regex_options |= RE_ICASE;
857 procdata.len = procdata.count = 0;
858 if (!strcmp (dbfile, "-"))
860 if (did_stdin)
862 error (0, 0, _("warning: the locate database can only be read from stdin once."));
863 return 0;
867 procdata.dbfile = "<stdin>";
868 procdata.fp = stdin;
869 did_stdin = true;
871 else
873 if (stat (dbfile, &st) || (procdata.fp = fopen (dbfile, "r")) == NULL)
875 error (0, errno, "%s", dbfile);
876 return 0;
878 time(&now);
879 if (now - st.st_mtime > WARN_SECONDS)
881 /* For example:
882 warning: database `fred' is more than 8 days old */
883 error (0, 0, _("warning: database `%s' is more than %d %s old"),
884 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
886 procdata.dbfile = dbfile;
889 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
890 procdata.original_filename = xmalloc (procdata.pathsize);
892 nread = fread (procdata.original_filename, 1, sizeof (LOCATEDB_MAGIC),
893 procdata.fp);
894 if (nread != sizeof (LOCATEDB_MAGIC)
895 || memcmp (procdata.original_filename, LOCATEDB_MAGIC,
896 sizeof (LOCATEDB_MAGIC)))
898 int i;
899 /* Read the list of the most common bigrams in the database. */
900 nread = fread (procdata.original_filename + sizeof (LOCATEDB_MAGIC), 1,
901 256 - sizeof (LOCATEDB_MAGIC), procdata.fp);
902 for (i = 0; i < 128; i++)
904 procdata.bigram1[i] = procdata.original_filename[i << 1];
905 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
907 old_format = 1;
910 /* Set up the inspection regime */
911 inspectors = NULL;
912 lastinspector = NULL;
913 past_pat_inspector = NULL;
915 if (old_format)
916 add_visitor(visit_old_format, NULL);
917 else
918 add_visitor(visit_locate02_format, NULL);
920 if (basename_only)
921 add_visitor(visit_basename, NULL);
923 /* See if we need fold. */
924 if (ignore_case && !regex)
925 for ( argn = 0; argn < argc; argn++ )
927 pathpart = argv[argn];
928 if (!contains_metacharacter(pathpart))
930 need_fold = 1;
931 break;
935 if (need_fold)
937 add_visitor(visit_casefold, &casebuf);
938 casebuf.preqlen = &procdata.pathsize;
939 casebuf.soffs = &procdata.count;
942 /* Add an inspector for each pattern we're looking for. */
943 for ( argn = 0; argn < argc; argn++ )
945 pathpart = argv[argn];
946 if (regex)
948 struct regular_expression *p = xmalloc(sizeof(*p));
949 const char *error_message = NULL;
951 memset (&p->regex, 0, sizeof (p->regex));
953 re_set_syntax(regex_options);
954 p->regex.allocated = 100;
955 p->regex.buffer = (unsigned char *) xmalloc (p->regex.allocated);
956 p->regex.fastmap = NULL;
957 p->regex.syntax = regex_options;
958 p->regex.translate = NULL;
960 error_message = re_compile_pattern (pathpart, strlen (pathpart),
961 &p->regex);
962 if (error_message)
964 error (1, 0, "%s", error_message);
966 else
968 add_visitor(visit_regex, p);
971 else if (contains_metacharacter(pathpart))
973 if (ignore_case)
974 add_visitor(visit_globmatch_casefold, pathpart);
975 else
976 add_visitor(visit_globmatch_nofold, pathpart);
978 else
980 /* No glob characters used. Hence we match on
981 * _any part_ of the filename, not just the
982 * basename. This seems odd to me, but it is the
983 * traditional behaviour.
984 * James Youngman <jay@gnu.org>
986 if (ignore_case)
988 struct casefolder * cf = xmalloc(sizeof(*cf));
989 cf->pattern = pathpart;
990 cf->pbuf = &casebuf;
991 add_visitor(visit_substring_match_casefold, cf);
992 /* If we ignore case, convert it to lower now so we don't have to
993 * do it every time
995 lc_strcpy(pathpart, pathpart);
997 else
999 add_visitor(visit_substring_match_nocasefold, pathpart);
1004 pvis = lastinspector;
1006 /* We add visit_existing_*() as late as possible to reduce the
1007 * number of stat() calls.
1009 switch (check_existence)
1011 case ACCEPT_EXISTING:
1012 if (follow_symlinks) /* -L, default */
1013 add_visitor(visit_existing_follow, NULL);
1014 else /* -P */
1015 add_visitor(visit_existing_nofollow, NULL);
1016 break;
1018 case ACCEPT_NON_EXISTING:
1019 if (follow_symlinks) /* -L, default */
1020 add_visitor(visit_non_existing_follow, NULL);
1021 else /* -P */
1022 add_visitor(visit_non_existing_nofollow, NULL);
1023 break;
1025 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1026 /* do nothing; no extra processing. */
1027 break;
1030 if (stats)
1031 add_visitor(visit_stats, &statistics);
1033 if (enable_print)
1035 if (print_quoted_filename)
1036 add_visitor(visit_justprint_quoted, NULL);
1037 else
1038 add_visitor(visit_justprint_unquoted, NULL);
1042 if (use_limit)
1043 add_visitor(visit_limit, plimit);
1044 else
1045 add_visitor(visit_count, plimit);
1048 if (argc > 1)
1050 past_pat_inspector = pvis->next;
1051 if (op_and)
1052 mainprocessor = process_and;
1053 else
1054 mainprocessor = process_or;
1056 else
1057 mainprocessor = process_simple;
1059 if (stats)
1061 printf(_("Database %s is in the %s format.\n"),
1062 procdata.dbfile,
1063 old_format ? _("old") : "LOCATE02");
1067 procdata.c = getc (procdata.fp);
1068 /* If we are searching for filename patterns, the inspector list
1069 * will contain an entry for each pattern for which we are searching.
1071 while ( (procdata.c != EOF) &&
1072 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1074 /* Do nothing; all the work is done in the visitor functions. */
1077 if (stats)
1079 print_stats(argc, st.st_size);
1082 if (ferror (procdata.fp))
1084 error (0, errno, "%s", procdata.dbfile);
1085 return 0;
1087 if (procdata.fp != stdin && fclose (procdata.fp) == EOF)
1089 error (0, errno, "%s", dbfile);
1090 return 0;
1093 return plimit->items_accepted;
1099 extern char *version_string;
1101 /* The name this program was run with. */
1102 char *program_name;
1104 static void
1105 usage (FILE *stream)
1107 fprintf (stream, _("\
1108 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1109 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1110 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1111 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1112 [-A | --all] [-p | --print] [-r | --regex ] [--regextype=TYPE]\n\
1113 [-version] [--help]\n\
1114 pattern...\n"),
1115 program_name);
1116 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1118 enum
1120 REGEXTYPE_OPTION = CHAR_MAX + 1
1124 static struct option const longopts[] =
1126 {"database", required_argument, NULL, 'd'},
1127 {"existing", no_argument, NULL, 'e'},
1128 {"non-existing", no_argument, NULL, 'E'},
1129 {"ignore-case", no_argument, NULL, 'i'},
1130 {"all", no_argument, NULL, 'A'},
1131 {"help", no_argument, NULL, 'h'},
1132 {"version", no_argument, NULL, 'v'},
1133 {"null", no_argument, NULL, '0'},
1134 {"count", no_argument, NULL, 'c'},
1135 {"wholename", no_argument, NULL, 'w'},
1136 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1137 {"basename", no_argument, NULL, 'b'},
1138 {"print", no_argument, NULL, 'p'},
1139 {"stdio", no_argument, NULL, 's'},
1140 {"mmap", no_argument, NULL, 'm'},
1141 {"limit", required_argument, NULL, 'l'},
1142 {"regex", no_argument, NULL, 'r'},
1143 {"regextype", required_argument, NULL, REGEXTYPE_OPTION},
1144 {"statistics", no_argument, NULL, 'S'},
1145 {"follow", no_argument, NULL, 'L'},
1146 {"nofollow", no_argument, NULL, 'P'},
1147 {NULL, no_argument, NULL, 0}
1151 main (int argc, char **argv)
1153 char *dbpath;
1154 unsigned long int found = 0uL;
1155 int optc;
1156 int ignore_case = 0;
1157 int print = 0;
1158 int just_count = 0;
1159 int basename_only = 0;
1160 int use_limit = 0;
1161 int regex = 0;
1162 int regex_options = RE_SYNTAX_EMACS;
1163 int stats = 0;
1164 int op_and = 0;
1165 char *e;
1167 program_name = argv[0];
1169 #ifdef HAVE_SETLOCALE
1170 setlocale (LC_ALL, "");
1171 #endif
1172 bindtextdomain (PACKAGE, LOCALEDIR);
1173 textdomain (PACKAGE);
1174 atexit (close_stdout);
1176 limits.limit = 0;
1177 limits.items_accepted = 0;
1179 quote_opts = clone_quoting_options (NULL);
1180 print_quoted_filename = true;
1182 dbpath = getenv ("LOCATE_PATH");
1183 if (dbpath == NULL)
1184 dbpath = LOCATE_DB;
1186 check_existence = ACCEPT_EITHER;
1188 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1189 switch (optc)
1191 case '0':
1192 separator = 0;
1193 print_quoted_filename = false; /* print filename 'raw'. */
1194 break;
1196 case 'A':
1197 op_and = 1;
1198 break;
1200 case 'b':
1201 basename_only = 1;
1202 break;
1204 case 'c':
1205 just_count = 1;
1206 break;
1208 case 'd':
1209 dbpath = optarg;
1210 break;
1212 case 'e':
1213 check_existence = ACCEPT_EXISTING;
1214 break;
1216 case 'E':
1217 check_existence = ACCEPT_NON_EXISTING;
1218 break;
1220 case 'i':
1221 ignore_case = 1;
1222 break;
1224 case 'h':
1225 usage (stdout);
1226 return 0;
1228 case 'p':
1229 print = 1;
1230 break;
1232 case 'v':
1233 printf (_("GNU locate version %s\n"), version_string);
1234 return 0;
1236 case 'w':
1237 basename_only = 0;
1238 break;
1240 case 'r':
1241 regex = 1;
1242 break;
1244 case REGEXTYPE_OPTION:
1245 regex_options = get_regex_type(optarg);
1246 break;
1248 case 'S':
1249 stats = 1;
1250 break;
1252 case 'L':
1253 follow_symlinks = 1;
1254 break;
1256 /* In find, -P and -H differ in the way they handle paths
1257 * given on the command line. This is not relevant for
1258 * locate, but the -H option is supported because it is
1259 * probably more intuitive to do so.
1261 case 'P':
1262 case 'H':
1263 follow_symlinks = 0;
1264 break;
1266 case 'l':
1268 char *end = optarg;
1269 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1270 if (LONGINT_OK != err)
1272 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1274 use_limit = 1;
1276 break;
1278 case 's': /* use stdio */
1279 case 'm': /* use mmap */
1280 /* These options are implemented simply for
1281 * compatibility with FreeBSD
1283 break;
1285 default:
1286 usage (stderr);
1287 return 1;
1290 if (!just_count && !stats)
1291 print = 1;
1293 if (stats)
1295 if (optind == argc)
1296 use_limit = 0;
1298 else
1300 if (!just_count && optind == argc)
1302 usage (stderr);
1303 return 1;
1308 if (1 == isatty(STDOUT_FILENO))
1309 stdout_is_a_tty = true;
1310 else
1311 stdout_is_a_tty = false;
1313 next_element (dbpath, 0); /* Initialize. */
1315 /* Bail out early if limit already reached. */
1316 while ((e = next_element ((char *) NULL, 0)) != NULL &&
1317 (!use_limit || limits.limit > limits.items_accepted))
1319 statistics.compressed_bytes =
1320 statistics.total_filename_count =
1321 statistics.total_filename_length =
1322 statistics.whitespace_count =
1323 statistics.newline_count =
1324 statistics.highbit_filename_count = 0u;
1326 if (0 == strlen(e) || 0 == strcmp(e, "."))
1328 /* Use the default database name instead (note: we
1329 * don't use 'dbpath' since that might itself contain a
1330 * colon-separated list.
1332 e = LOCATE_DB;
1335 found = locate (argc - optind, &argv[optind], e, ignore_case, print, basename_only, use_limit, &limits, stats, op_and, regex, regex_options);
1338 if (just_count)
1340 printf("%ld\n", found);
1343 if (found || (use_limit && (limits.limit==0)) || stats )
1344 return 0;
1345 else
1346 return 1;