Assume unistd.h is present - avoid using HAVE_UNISTD_H
[findutils.git] / locate / locate.c
blob0b57452eeb6f46fc13302f529947ada63aab1fc2
1 /* locate -- search databases for filenames that match patterns
2 Copyright (C) 1994, 1996, 1998, 1999, 2000, 2003,
3 2004, 2005 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
21 /* Usage: locate [options] pattern...
23 Scan a pathname list for the full pathname of a file, given only
24 a piece of the name (possibly containing shell globbing metacharacters).
25 The list has been processed with front-compression, which reduces
26 the list size by a factor of 4-5.
27 Recognizes two database formats, old and new. The old format is
28 bigram coded, which reduces space by a further 20-25% and uses the
29 following encoding of the database bytes:
31 0-28 likeliest differential counts + offset (14) to make nonnegative
32 30 escape code for out-of-range count to follow in next halfword
33 128-255 bigram codes (the 128 most common, as determined by `updatedb')
34 32-127 single character (printable) ASCII remainder
36 Earlier versions of GNU locate used to use a novel two-tiered
37 string search technique, which was described in Usenix ;login:, Vol
38 8, No 1, February/March, 1983, p. 8.
40 However, latterly code changes to provide additional functionality
41 became dificult to make with the existing reading scheme, and so
42 we no longer perform the matching as efficiently as we used to (that is,
43 we no longer use the same algorithm).
45 The old algorithm was:
47 First, match a metacharacter-free subpattern and a partial
48 pathname BACKWARDS to avoid full expansion of the pathname list.
49 The time savings is 40-50% over forward matching, which cannot
50 efficiently handle overlapped search patterns and compressed
51 path remainders.
53 Then, match the actual shell glob pattern (if in this form)
54 against the candidate pathnames using the slower shell filename
55 matching routines.
58 Written by James A. Woods <jwoods@adobe.com>.
59 Modified by David MacKenzie <djm@gnu.org>.
60 Additional work by James Youngman and Bas van Gompel.
63 #include <config.h>
64 #include <stdio.h>
65 #include <ctype.h>
66 #include <sys/types.h>
67 #include <sys/stat.h>
68 #include <time.h>
69 #include <fnmatch.h>
70 #include <getopt.h>
71 #include <xstrtol.h>
73 /* The presence of unistd.h is assumed by gnulib these days, so we
74 * might as well assume it too.
76 /* We need <unistd.h> for isatty(). */
77 #include <unistd.h>
80 #define NDEBUG
81 #include <assert.h>
83 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
84 #include <string.h>
85 #else
86 #include <strings.h>
87 #define strchr index
88 #endif
90 #ifdef STDC_HEADERS
91 #include <stdlib.h>
92 #endif
94 #ifdef HAVE_ERRNO_H
95 #include <errno.h>
96 #else
97 extern int errno;
98 #endif
100 #ifdef HAVE_LOCALE_H
101 #include <locale.h>
102 #endif
104 #if ENABLE_NLS
105 # include <libintl.h>
106 # define _(Text) gettext (Text)
107 #else
108 # define _(Text) Text
109 #define textdomain(Domain)
110 #define bindtextdomain(Package, Directory)
111 #endif
112 #ifdef gettext_noop
113 # define N_(String) gettext_noop (String)
114 #else
115 /* We used to use (String) instead of just String, but apparentl;y ISO C
116 * doesn't allow this (at least, that's what HP said when someone reported
117 * this as a compiler bug). This is HP case number 1205608192. See
118 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
119 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
120 * like: static const char buf[] = ("string");
122 # define N_(String) String
123 #endif
125 #include "locatedb.h"
126 #include <getline.h>
127 #include "../gnulib/lib/xalloc.h"
128 #include "../gnulib/lib/error.h"
129 #include "../gnulib/lib/human.h"
130 #include "dirname.h"
131 #include "closeout.h"
132 #include "nextelem.h"
133 #include "regex.h"
134 #include "quote.h"
135 #include "quotearg.h"
136 #include "printquoted.h"
139 /* Note that this evaluates C many times. */
140 #ifdef _LIBC
141 # define TOUPPER(Ch) toupper (Ch)
142 # define TOLOWER(Ch) tolower (Ch)
143 #else
144 # define TOUPPER(Ch) (islower (Ch) ? toupper (Ch) : (Ch))
145 # define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
146 #endif
148 /* typedef enum {false, true} boolean; */
150 /* Warn if a database is older than this. 8 days allows for a weekly
151 update that takes up to a day to perform. */
152 #define WARN_NUMBER_UNITS (8)
153 /* Printable name of units used in WARN_SECONDS */
154 static const char warn_name_units[] = N_("days");
155 #define SECONDS_PER_UNIT (60 * 60 * 24)
157 #define WARN_SECONDS ((SECONDS_PER_UNIT) * (WARN_NUMBER_UNITS))
159 enum visit_result
161 VISIT_CONTINUE = 1, /* please call the next visitor */
162 VISIT_ACCEPTED = 2, /* accepted, call no futher callbacks for this file */
163 VISIT_REJECTED = 4, /* rejected, process next file. */
164 VISIT_ABORT = 8 /* rejected, process no more files. */
167 enum ExistenceCheckType
169 ACCEPT_EITHER, /* Corresponds to lack of -E/-e option */
170 ACCEPT_EXISTING, /* Corresponds to option -e */
171 ACCEPT_NON_EXISTING /* Corresponds to option -E */
174 /* Check for existence of files before printing them out? */
175 enum ExistenceCheckType check_existence = ACCEPT_EITHER;
177 static int follow_symlinks = 1;
179 /* What to separate the results with. */
180 static int separator = '\n';
182 static struct quoting_options * quote_opts = NULL;
183 static bool stdout_is_a_tty;
184 static bool print_quoted_filename;
186 /* Read in a 16-bit int, high byte first (network byte order). */
188 static short
189 get_short (FILE *fp)
192 register short x;
194 x = (signed char) fgetc (fp) << 8;
195 x |= (fgetc (fp) & 0xff);
196 return x;
199 const char * const metacharacters = "*?[]\\";
201 /* Return nonzero if S contains any shell glob characters.
203 static int
204 contains_metacharacter(const char *s)
206 if (NULL == strpbrk(s, metacharacters))
207 return 0;
208 else
209 return 1;
212 /* locate_read_str()
214 * Read bytes from FP into the buffer at offset OFFSET in (*BUF),
215 * until we reach DELIMITER or end-of-file. We reallocate the buffer
216 * as necessary, altering (*BUF) and (*SIZ) as appropriate. No assumption
217 * is made regarding the content of the data (i.e. the implementation is
218 * 8-bit clean, the only delimiter is DELIMITER).
220 * Written Fri May 23 18:41:16 2003 by James Youngman, because getstr()
221 * has been removed from gnulib.
223 * We call the function locate_read_str() to avoid a name clash with the curses
224 * function getstr().
226 static int
227 locate_read_str(char **buf, size_t *siz, FILE *fp, int delimiter, int offs)
229 char * p = NULL;
230 size_t sz = 0;
231 int needed, nread;
233 nread = getdelim(&p, &sz, delimiter, fp);
234 if (nread >= 0)
236 assert(p != NULL);
238 needed = offs + nread + 1;
239 if (needed > (*siz))
241 char *pnew = realloc(*buf, needed);
242 if (NULL == pnew)
244 return -1; /* FAIL */
246 else
248 *siz = needed;
249 *buf = pnew;
252 memcpy((*buf)+offs, p, nread);
253 free(p);
255 return nread;
259 static void
260 lc_strcpy(char *dest, const char *src)
262 while (*src)
264 *dest++ = TOLOWER(*src);
265 ++src;
267 *dest = 0;
270 struct locate_limits
272 uintmax_t limit;
273 uintmax_t items_accepted;
275 static struct locate_limits limits;
278 struct locate_stats
280 uintmax_t compressed_bytes;
281 uintmax_t total_filename_count;
282 uintmax_t total_filename_length;
283 uintmax_t whitespace_count;
284 uintmax_t newline_count;
285 uintmax_t highbit_filename_count;
287 static struct locate_stats statistics;
290 struct stringbuf
292 char *buffer;
293 size_t buffersize;
294 size_t *soffs;
295 size_t *preqlen;
297 static struct stringbuf casebuf;
300 struct casefolder
302 const char *pattern;
303 struct stringbuf *pbuf;
306 struct regular_expression
308 regex_t re;
312 struct process_data
314 int c; /* An input byte. */
315 int count; /* The length of the prefix shared with the previous database entry. */
316 int len;
317 char *original_filename; /* The current input database entry. */
318 size_t pathsize; /* Amount allocated for it. */
319 char *munged_filename; /* path or base_name(path) */
320 FILE *fp; /* The pathname database. */
321 char *dbfile; /* Its name, or "<stdin>" */
322 /* for the old database format,
323 the first and second characters of the most common bigrams. */
324 char bigram1[128];
325 char bigram2[128];
329 typedef int (*visitfunc)(struct process_data *procdata,
330 void *context);
332 struct visitor
334 visitfunc inspector;
335 void * context;
336 struct visitor *next;
340 static struct visitor *inspectors = NULL;
341 static struct visitor *lastinspector = NULL;
342 static struct visitor *past_pat_inspector = NULL;
344 /* 0 or 1 pattern(s) */
345 static int
346 process_simple(struct process_data *procdata)
348 int result = VISIT_CONTINUE;
349 const struct visitor *p = inspectors;
351 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
353 result = (p->inspector)(procdata, p->context);
354 p = p->next;
357 return result;
360 /* Accept if any pattern matches. */
361 static int
362 process_or (struct process_data *procdata)
364 int result = VISIT_CONTINUE;
365 const struct visitor *p = inspectors;
367 while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
369 result = (p->inspector)(procdata, p->context);
370 p = p->next;
373 if (result == VISIT_CONTINUE)
374 result = VISIT_REJECTED;
375 if (result & (VISIT_ABORT | VISIT_REJECTED))
376 return result;
378 p = past_pat_inspector;
379 result = VISIT_CONTINUE;
381 while ( (VISIT_CONTINUE == result) && (NULL != p) )
383 result = (p->inspector)(procdata, p->context);
384 p = p->next;
387 if (VISIT_CONTINUE == result)
388 return VISIT_ACCEPTED;
389 else
390 return result;
393 /* Accept if all pattern match. */
394 static int
395 process_and (struct process_data *procdata)
397 int result = VISIT_CONTINUE;
398 const struct visitor *p = inspectors;
400 while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
402 result = (p->inspector)(procdata, p->context);
403 p = p->next;
406 if (result == VISIT_CONTINUE)
407 result = VISIT_REJECTED;
408 if (result & (VISIT_ABORT | VISIT_REJECTED))
409 return result;
411 p = past_pat_inspector;
412 result = VISIT_CONTINUE;
414 while ( (VISIT_CONTINUE == result) && (NULL != p) )
416 result = (p->inspector)(procdata, p->context);
417 p = p->next;
420 if (VISIT_CONTINUE == result)
421 return VISIT_ACCEPTED;
422 else
423 return result;
426 typedef int (*processfunc)(struct process_data *procdata);
428 static processfunc mainprocessor = NULL;
430 static void
431 add_visitor(visitfunc fn, void *context)
433 struct visitor *p = xmalloc(sizeof(struct visitor));
434 p->inspector = fn;
435 p->context = context;
436 p->next = NULL;
438 if (NULL == lastinspector)
440 lastinspector = inspectors = p;
442 else
444 lastinspector->next = p;
445 lastinspector = p;
451 static int
452 visit_justprint_quoted(struct process_data *procdata, void *context)
454 (void) context;
455 print_quoted (stdout, quote_opts, stdout_is_a_tty,
456 "%s",
457 procdata->original_filename);
458 putchar(separator);
459 return VISIT_CONTINUE;
462 static int
463 visit_justprint_unquoted(struct process_data *procdata, void *context)
465 (void) context;
466 fputs(procdata->original_filename, stdout);
467 putchar(separator);
468 return VISIT_CONTINUE;
471 static int
472 visit_old_format(struct process_data *procdata, void *context)
474 register char *s;
475 (void) context;
477 /* Get the offset in the path where this path info starts. */
478 if (procdata->c == LOCATEDB_OLD_ESCAPE)
479 procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
480 else
481 procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
483 /* Overlay the old path with the remainder of the new. */
484 for (s = procdata->original_filename + procdata->count;
485 (procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
486 if (procdata->c < 0200)
487 *s++ = procdata->c; /* An ordinary character. */
488 else
490 /* Bigram markers have the high bit set. */
491 procdata->c &= 0177;
492 *s++ = procdata->bigram1[procdata->c];
493 *s++ = procdata->bigram2[procdata->c];
495 *s-- = '\0';
497 procdata->munged_filename = procdata->original_filename;
499 return VISIT_CONTINUE;
503 static int
504 visit_locate02_format(struct process_data *procdata, void *context)
506 register char *s;
507 int nread;
508 (void) context;
510 if (procdata->c == LOCATEDB_ESCAPE)
511 procdata->count += (short)get_short (procdata->fp);
512 else if (procdata->c > 127)
513 procdata->count += procdata->c - 256;
514 else
515 procdata->count += procdata->c;
517 if (procdata->count > procdata->len || procdata->count < 0)
519 /* This should not happen generally , but since we're
520 * reading in data which is outside our control, we
521 * cannot prevent it.
523 error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
526 /* Overlay the old path with the remainder of the new. */
527 nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
528 procdata->fp, 0, procdata->count);
529 if (nread < 0)
530 return VISIT_ABORT;
531 procdata->c = getc (procdata->fp);
532 procdata->len = procdata->count + nread;
533 s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
534 assert (s[0] != '\0');
535 assert (s[1] == '\0'); /* Our terminator. */
536 assert (s[2] == '\0'); /* Added by locate_read_str. */
538 procdata->munged_filename = procdata->original_filename;
540 return VISIT_CONTINUE;
543 static int
544 visit_basename(struct process_data *procdata, void *context)
546 (void) context;
547 procdata->munged_filename = base_name(procdata->original_filename);
549 return VISIT_CONTINUE;
553 static int
554 visit_casefold(struct process_data *procdata, void *context)
556 struct stringbuf *b = context;
558 if (*b->preqlen+1 > b->buffersize)
560 b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
561 b->buffersize = *b->preqlen+1;
563 lc_strcpy(b->buffer, procdata->munged_filename);
565 return VISIT_CONTINUE;
568 /* visit_existing_follow implements -L -e */
569 static int
570 visit_existing_follow(struct process_data *procdata, void *context)
572 struct stat st;
573 (void) context;
575 /* munged_filename has been converted in some way (to lower case,
576 * or is just the base name of the file), and original_filename has not.
577 * Hence only original_filename is still actually the name of the file
578 * whose existence we would need to check.
580 if (stat(procdata->original_filename, &st) != 0)
582 return VISIT_REJECTED;
584 else
586 return VISIT_CONTINUE;
590 /* visit_non_existing_follow implements -L -E */
591 static int
592 visit_non_existing_follow(struct process_data *procdata, void *context)
594 struct stat st;
595 (void) context;
597 /* munged_filename has been converted in some way (to lower case,
598 * or is just the base name of the file), and original_filename has not.
599 * Hence only original_filename is still actually the name of the file
600 * whose existence we would need to check.
602 if (stat(procdata->original_filename, &st) == 0)
604 return VISIT_REJECTED;
606 else
608 return VISIT_CONTINUE;
612 /* visit_existing_nofollow implements -P -e */
613 static int
614 visit_existing_nofollow(struct process_data *procdata, void *context)
616 struct stat st;
617 (void) context;
619 /* munged_filename has been converted in some way (to lower case,
620 * or is just the base name of the file), and original_filename has not.
621 * Hence only original_filename is still actually the name of the file
622 * whose existence we would need to check.
624 if (lstat(procdata->original_filename, &st) != 0)
626 return VISIT_REJECTED;
628 else
630 return VISIT_CONTINUE;
634 /* visit_non_existing_nofollow implements -P -E */
635 static int
636 visit_non_existing_nofollow(struct process_data *procdata, void *context)
638 struct stat st;
639 (void) context;
641 /* munged_filename has been converted in some way (to lower case,
642 * or is just the base name of the file), and original_filename has not.
643 * Hence only original_filename is still actually the name of the file
644 * whose existence we would need to check.
646 if (lstat(procdata->original_filename, &st) == 0)
648 return VISIT_REJECTED;
650 else
652 return VISIT_CONTINUE;
656 static int
657 visit_substring_match_nocasefold(struct process_data *procdata, void *context)
659 const char *pattern = context;
661 if (NULL != strstr(procdata->munged_filename, pattern))
662 return VISIT_ACCEPTED;
663 else
664 return VISIT_REJECTED;
667 static int
668 visit_substring_match_casefold(struct process_data *procdata, void *context)
670 const struct casefolder * p = context;
671 const struct stringbuf * b = p->pbuf;
672 (void) procdata;
674 if (NULL != strstr(b->buffer, p->pattern))
675 return VISIT_ACCEPTED;
676 else
677 return VISIT_REJECTED;
681 static int
682 visit_globmatch_nofold(struct process_data *procdata, void *context)
684 const char *glob = context;
685 if (fnmatch(glob, procdata->munged_filename, 0) != 0)
686 return VISIT_REJECTED;
687 else
688 return VISIT_ACCEPTED;
692 static int
693 visit_globmatch_casefold(struct process_data *procdata, void *context)
695 const char *glob = context;
696 if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
697 return VISIT_REJECTED;
698 else
699 return VISIT_ACCEPTED;
703 static int
704 visit_regex(struct process_data *procdata, void *context)
706 struct regular_expression *p = context;
708 if (0 == regexec(&p->re, procdata->munged_filename, 0u, NULL, 0))
709 return VISIT_ACCEPTED; /* match */
710 else
711 return VISIT_REJECTED; /* no match */
715 static int
716 visit_stats(struct process_data *procdata, void *context)
718 struct locate_stats *p = context;
719 size_t len = strlen(procdata->original_filename);
720 const char *s;
721 int highbit, whitespace, newline;
723 ++(p->total_filename_count);
724 p->total_filename_length += len;
726 highbit = whitespace = newline = 0;
727 for (s=procdata->original_filename; *s; ++s)
729 if ( (int)(*s) & 128 )
730 highbit = 1;
731 if ('\n' == *s)
733 newline = whitespace = 1;
735 else if (isspace((unsigned char)*s))
737 whitespace = 1;
741 if (highbit)
742 ++(p->highbit_filename_count);
743 if (whitespace)
744 ++(p->whitespace_count);
745 if (newline)
746 ++(p->newline_count);
748 return VISIT_CONTINUE;
752 static int
753 visit_limit(struct process_data *procdata, void *context)
755 struct locate_limits *p = context;
757 (void) procdata;
759 if (++p->items_accepted >= p->limit)
760 return VISIT_ABORT;
761 else
762 return VISIT_CONTINUE;
765 static int
766 visit_count(struct process_data *procdata, void *context)
768 struct locate_limits *p = context;
770 (void) procdata;
772 ++p->items_accepted;
773 return VISIT_CONTINUE;
776 /* Emit the statistics.
778 static void
779 print_stats(int argc, size_t database_file_size)
781 char hbuf[LONGEST_HUMAN_READABLE + 1];
783 printf(_("Locate database size: %s bytes\n"),
784 human_readable ((uintmax_t) database_file_size,
785 hbuf, human_ceiling, 1, 1));
787 printf(_("Filenames: %s "),
788 human_readable (statistics.total_filename_count,
789 hbuf, human_ceiling, 1, 1));
790 printf(_("with a cumulative length of %s bytes"),
791 human_readable (statistics.total_filename_length,
792 hbuf, human_ceiling, 1, 1));
794 printf(_("\n\tof which %s contain whitespace, "),
795 human_readable (statistics.whitespace_count,
796 hbuf, human_ceiling, 1, 1));
797 printf(_("\n\t%s contain newline characters, "),
798 human_readable (statistics.newline_count,
799 hbuf, human_ceiling, 1, 1));
800 printf(_("\n\tand %s contain characters with the high bit set.\n"),
801 human_readable (statistics.highbit_filename_count,
802 hbuf, human_ceiling, 1, 1));
804 if (!argc)
805 printf(_("Compression ratio %4.2f%%\n"),
806 100.0 * ((double)statistics.total_filename_length
807 - (double) database_file_size)
808 / (double) statistics.total_filename_length);
809 printf("\n");
813 /* Print or count the entries in DBFILE that match shell globbing patterns in
814 ARGV. Return the number of entries matched. */
816 static unsigned long
817 locate (int argc,
818 char **argv,
819 char *dbfile,
820 int ignore_case,
821 int enable_print,
822 int basename_only,
823 int use_limit,
824 struct locate_limits *plimit,
825 int stats,
826 int op_and,
827 int regex)
829 char *pathpart; /* A pattern to consider. */
830 int argn; /* Index to current pattern in argv. */
831 int need_fold; /* Set when folding and any pattern is non-glob. */
832 int nread; /* number of bytes read from an entry. */
833 struct process_data procdata; /* Storage for data shared with visitors. */
835 int old_format = 0; /* true if reading a bigram-encoded database. */
836 static bool did_stdin = false; /* Set to prevent rereading stdin. */
837 struct visitor* pvis; /* temp for determining past_pat_inspector. */
839 /* To check the age of the database. */
840 struct stat st;
841 time_t now;
844 procdata.len = procdata.count = 0;
845 if (!strcmp (dbfile, "-"))
847 if (did_stdin)
849 error (0, 0, _("warning: the locate database can only be read from stdin once."));
850 return 0;
854 procdata.dbfile = "<stdin>";
855 procdata.fp = stdin;
856 did_stdin = true;
858 else
860 if (stat (dbfile, &st) || (procdata.fp = fopen (dbfile, "r")) == NULL)
862 error (0, errno, "%s", dbfile);
863 return 0;
865 time(&now);
866 if (now - st.st_mtime > WARN_SECONDS)
868 /* For example:
869 warning: database `fred' is more than 8 days old */
870 error (0, 0, _("warning: database `%s' is more than %d %s old"),
871 dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
873 procdata.dbfile = dbfile;
876 procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
877 procdata.original_filename = xmalloc (procdata.pathsize);
879 nread = fread (procdata.original_filename, 1, sizeof (LOCATEDB_MAGIC),
880 procdata.fp);
881 if (nread != sizeof (LOCATEDB_MAGIC)
882 || memcmp (procdata.original_filename, LOCATEDB_MAGIC,
883 sizeof (LOCATEDB_MAGIC)))
885 int i;
886 /* Read the list of the most common bigrams in the database. */
887 nread = fread (procdata.original_filename + sizeof (LOCATEDB_MAGIC), 1,
888 256 - sizeof (LOCATEDB_MAGIC), procdata.fp);
889 for (i = 0; i < 128; i++)
891 procdata.bigram1[i] = procdata.original_filename[i << 1];
892 procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
894 old_format = 1;
897 /* Set up the inspection regime */
898 inspectors = NULL;
899 lastinspector = NULL;
900 past_pat_inspector = NULL;
902 if (old_format)
903 add_visitor(visit_old_format, NULL);
904 else
905 add_visitor(visit_locate02_format, NULL);
907 if (basename_only)
908 add_visitor(visit_basename, NULL);
910 /* See if we need fold. */
911 if (ignore_case && !regex)
912 for ( argn = 0; argn < argc; argn++ )
914 pathpart = argv[argn];
915 if (!contains_metacharacter(pathpart))
917 need_fold = 1;
918 break;
922 if (need_fold)
924 add_visitor(visit_casefold, &casebuf);
925 casebuf.preqlen = &procdata.pathsize;
926 casebuf.soffs = &procdata.count;
929 /* Add an inspector for each pattern we're looking for. */
930 for ( argn = 0; argn < argc; argn++ )
932 pathpart = argv[argn];
933 if (regex)
935 struct regular_expression *p = xmalloc(sizeof(*p));
936 int cflags = REG_EXTENDED | REG_NOSUB
937 | (ignore_case ? REG_ICASE : 0);
938 errno = 0;
939 if (0 == regcomp(&p->re, pathpart, cflags))
941 add_visitor(visit_regex, p);
943 else
945 error (1, errno, "Invalid regular expression; %s", pathpart);
948 else if (contains_metacharacter(pathpart))
950 if (ignore_case)
951 add_visitor(visit_globmatch_casefold, pathpart);
952 else
953 add_visitor(visit_globmatch_nofold, pathpart);
955 else
957 /* No glob characters used. Hence we match on
958 * _any part_ of the filename, not just the
959 * basename. This seems odd to me, but it is the
960 * traditional behaviour.
961 * James Youngman <jay@gnu.org>
963 if (ignore_case)
965 struct casefolder * cf = xmalloc(sizeof(*cf));
966 cf->pattern = pathpart;
967 cf->pbuf = &casebuf;
968 add_visitor(visit_substring_match_casefold, cf);
969 /* If we ignore case, convert it to lower now so we don't have to
970 * do it every time
972 lc_strcpy(pathpart, pathpart);
974 else
976 add_visitor(visit_substring_match_nocasefold, pathpart);
981 pvis = lastinspector;
983 /* We add visit_existing_*() as late as possible to reduce the
984 * number of stat() calls.
986 switch (check_existence)
988 case ACCEPT_EXISTING:
989 if (follow_symlinks) /* -L, default */
990 add_visitor(visit_existing_follow, NULL);
991 else /* -P */
992 add_visitor(visit_existing_nofollow, NULL);
993 break;
995 case ACCEPT_NON_EXISTING:
996 if (follow_symlinks) /* -L, default */
997 add_visitor(visit_non_existing_follow, NULL);
998 else /* -P */
999 add_visitor(visit_non_existing_nofollow, NULL);
1000 break;
1002 case ACCEPT_EITHER: /* Default, neither -E nor -e */
1003 /* do nothing; no extra processing. */
1004 break;
1007 if (stats)
1008 add_visitor(visit_stats, &statistics);
1010 if (enable_print)
1012 if (print_quoted_filename)
1013 add_visitor(visit_justprint_quoted, NULL);
1014 else
1015 add_visitor(visit_justprint_unquoted, NULL);
1019 if (use_limit)
1020 add_visitor(visit_limit, plimit);
1021 else
1022 add_visitor(visit_count, plimit);
1025 if (argc > 1)
1027 past_pat_inspector = pvis->next;
1028 if (op_and)
1029 mainprocessor = process_and;
1030 else
1031 mainprocessor = process_or;
1033 else
1034 mainprocessor = process_simple;
1036 if (stats)
1038 printf(_("Database %s is in the %s format.\n"),
1039 procdata.dbfile,
1040 old_format ? _("old") : "LOCATE02");
1044 procdata.c = getc (procdata.fp);
1045 /* If we are searching for filename patterns, the inspector list
1046 * will contain an entry for each pattern for which we are searching.
1048 while ( (procdata.c != EOF) &&
1049 (VISIT_ABORT != (mainprocessor)(&procdata)) )
1051 /* Do nothing; all the work is done in the visitor functions. */
1054 if (stats)
1056 print_stats(argc, st.st_size);
1059 if (ferror (procdata.fp))
1061 error (0, errno, "%s", procdata.dbfile);
1062 return 0;
1064 if (procdata.fp != stdin && fclose (procdata.fp) == EOF)
1066 error (0, errno, "%s", dbfile);
1067 return 0;
1070 return plimit->items_accepted;
1076 extern char *version_string;
1078 /* The name this program was run with. */
1079 char *program_name;
1081 static void
1082 usage (FILE *stream)
1084 fprintf (stream, _("\
1085 Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1086 [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1087 [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1088 [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1089 [-A | --all] [-p | --print] [-r | --regex ] [--version] [--help]\n\
1090 pattern...\n"),
1091 program_name);
1092 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
1095 static struct option const longopts[] =
1097 {"database", required_argument, NULL, 'd'},
1098 {"existing", no_argument, NULL, 'e'},
1099 {"non-existing", no_argument, NULL, 'E'},
1100 {"ignore-case", no_argument, NULL, 'i'},
1101 {"all", no_argument, NULL, 'A'},
1102 {"help", no_argument, NULL, 'h'},
1103 {"version", no_argument, NULL, 'v'},
1104 {"null", no_argument, NULL, '0'},
1105 {"count", no_argument, NULL, 'c'},
1106 {"wholename", no_argument, NULL, 'w'},
1107 {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1108 {"basename", no_argument, NULL, 'b'},
1109 {"print", no_argument, NULL, 'p'},
1110 {"stdio", no_argument, NULL, 's'},
1111 {"mmap", no_argument, NULL, 'm'},
1112 {"limit", required_argument, NULL, 'l'},
1113 {"regex", no_argument, NULL, 'r'},
1114 {"statistics", no_argument, NULL, 'S'},
1115 {"follow", no_argument, NULL, 'L'},
1116 {"nofollow", no_argument, NULL, 'P'},
1117 {NULL, no_argument, NULL, 0}
1121 main (int argc, char **argv)
1123 char *dbpath;
1124 unsigned long int found = 0uL;
1125 int optc;
1126 int ignore_case = 0;
1127 int print = 0;
1128 int just_count = 0;
1129 int basename_only = 0;
1130 int use_limit = 0;
1131 int regex = 0;
1132 int stats = 0;
1133 int op_and = 0;
1134 char *e;
1136 program_name = argv[0];
1138 #ifdef HAVE_SETLOCALE
1139 setlocale (LC_ALL, "");
1140 #endif
1141 bindtextdomain (PACKAGE, LOCALEDIR);
1142 textdomain (PACKAGE);
1143 atexit (close_stdout);
1145 limits.limit = 0;
1146 limits.items_accepted = 0;
1148 quote_opts = clone_quoting_options (NULL);
1149 print_quoted_filename = true;
1151 dbpath = getenv ("LOCATE_PATH");
1152 if (dbpath == NULL)
1153 dbpath = LOCATE_DB;
1155 check_existence = ACCEPT_EITHER;
1157 while ((optc = getopt_long (argc, argv, "Abcd:eEil:prsm0SwHPL", longopts, (int *) 0)) != -1)
1158 switch (optc)
1160 case '0':
1161 separator = 0;
1162 print_quoted_filename = false; /* print filename 'raw'. */
1163 break;
1165 case 'A':
1166 op_and = 1;
1167 break;
1169 case 'b':
1170 basename_only = 1;
1171 break;
1173 case 'c':
1174 just_count = 1;
1175 break;
1177 case 'd':
1178 dbpath = optarg;
1179 break;
1181 case 'e':
1182 check_existence = ACCEPT_EXISTING;
1183 break;
1185 case 'E':
1186 check_existence = ACCEPT_NON_EXISTING;
1187 break;
1189 case 'i':
1190 ignore_case = 1;
1191 break;
1193 case 'h':
1194 usage (stdout);
1195 return 0;
1197 case 'p':
1198 print = 1;
1199 break;
1201 case 'v':
1202 printf (_("GNU locate version %s\n"), version_string);
1203 return 0;
1205 case 'w':
1206 basename_only = 0;
1207 break;
1209 case 'r':
1210 regex = 1;
1211 break;
1213 case 'S':
1214 stats = 1;
1215 break;
1217 case 'L':
1218 follow_symlinks = 1;
1219 break;
1221 /* In find, -P and -H differ in the way they handle paths
1222 * given on the command line. This is not relevant for
1223 * locate, but the -H option is supported because it is
1224 * probably more intuitive to do so.
1226 case 'P':
1227 case 'H':
1228 follow_symlinks = 0;
1229 break;
1231 case 'l':
1233 char *end = optarg;
1234 strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
1235 if (LONGINT_OK != err)
1237 STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
1239 use_limit = 1;
1241 break;
1243 case 's': /* use stdio */
1244 case 'm': /* use mmap */
1245 /* These options are implemented simply for
1246 * compatibility with FreeBSD
1248 break;
1250 default:
1251 usage (stderr);
1252 return 1;
1255 if (!just_count && !stats)
1256 print = 1;
1258 if (stats)
1260 if (optind == argc)
1261 use_limit = 0;
1263 else
1265 if (!just_count && optind == argc)
1267 usage (stderr);
1268 return 1;
1273 if (1 == isatty(STDOUT_FILENO))
1274 stdout_is_a_tty = true;
1275 else
1276 stdout_is_a_tty = false;
1278 next_element (dbpath, 0); /* Initialize. */
1280 /* Bail out early if limit already reached. */
1281 while ((e = next_element ((char *) NULL, 0)) != NULL &&
1282 (!use_limit || limits.limit > limits.items_accepted))
1284 statistics.compressed_bytes =
1285 statistics.total_filename_count =
1286 statistics.total_filename_length =
1287 statistics.whitespace_count =
1288 statistics.newline_count =
1289 statistics.highbit_filename_count = 0u;
1291 if (0 == strlen(e) || 0 == strcmp(e, "."))
1293 /* Use the default database name instead (note: we
1294 * don't use 'dbpath' since that might itself contain a
1295 * colon-separated list.
1297 e = LOCATE_DB;
1300 found = locate (argc - optind, &argv[optind], e, ignore_case, print, basename_only, use_limit, &limits, stats, op_and, regex);
1303 if (just_count)
1305 printf("%ld\n", found);
1308 if (found || (use_limit && (limits.limit==0)) || stats )
1309 return 0;
1310 else
1311 return 1;