usbmodeswitch: Updated to v.1.2.6 from shibby's branch.
[tomato.git] / release / src / router / pcre / pcregrep.c
blob2e0dc03f4c589ccd16431603c5db279bdf3e5b63
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
9 Copyright (c) 1997-2012 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
51 #include <sys/types.h>
52 #include <sys/stat.h>
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
66 #include "pcre.h"
68 #define FALSE 0
69 #define TRUE 1
71 typedef int BOOL;
73 #define OFFSET_SIZE 99
75 #if BUFSIZ > 8192
76 #define MAXPATLEN BUFSIZ
77 #else
78 #define MAXPATLEN 8192
79 #endif
81 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
83 /* Values for the "filenames" variable, which specifies options for file name
84 output. The order is important; it is assumed that a file name is wanted for
85 all values greater than FN_DEFAULT. */
87 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
89 /* File reading styles */
91 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
93 /* Actions for the -d and -D options */
95 enum { dee_READ, dee_SKIP, dee_RECURSE };
96 enum { DEE_READ, DEE_SKIP };
98 /* Actions for special processing options (flag bits) */
100 #define PO_WORD_MATCH 0x0001
101 #define PO_LINE_MATCH 0x0002
102 #define PO_FIXED_STRINGS 0x0004
104 /* Line ending types */
106 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
108 /* Binary file options */
110 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
112 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113 environments), a warning is issued if the value of fwrite() is ignored.
114 Unfortunately, casting to (void) does not suppress the warning. To get round
115 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116 apply to fprintf(). */
118 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
122 /*************************************************
123 * Global variables *
124 *************************************************/
126 /* Jeffrey Friedl has some debugging requirements that are not part of the
127 regular code. */
129 #ifdef JFRIEDL_DEBUG
130 static int S_arg = -1;
131 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133 static const char *jfriedl_prefix = "";
134 static const char *jfriedl_postfix = "";
135 #endif
137 static int endlinetype;
139 static char *colour_string = (char *)"1;31";
140 static char *colour_option = NULL;
141 static char *dee_option = NULL;
142 static char *DEE_option = NULL;
143 static char *locale = NULL;
144 static char *main_buffer = NULL;
145 static char *newline = NULL;
146 static char *om_separator = (char *)"";
147 static char *stdin_name = (char *)"(standard input)";
149 static const unsigned char *pcretables = NULL;
151 static int after_context = 0;
152 static int before_context = 0;
153 static int binary_files = BIN_BINARY;
154 static int both_context = 0;
155 static int bufthird = PCREGREP_BUFSIZE;
156 static int bufsize = 3*PCREGREP_BUFSIZE;
158 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
159 static int dee_action = dee_SKIP;
160 #else
161 static int dee_action = dee_READ;
162 #endif
164 static int DEE_action = DEE_READ;
165 static int error_count = 0;
166 static int filenames = FN_DEFAULT;
167 static int pcre_options = 0;
168 static int process_options = 0;
170 #ifdef SUPPORT_PCREGREP_JIT
171 static int study_options = PCRE_STUDY_JIT_COMPILE;
172 #else
173 static int study_options = 0;
174 #endif
176 static unsigned long int match_limit = 0;
177 static unsigned long int match_limit_recursion = 0;
179 static BOOL count_only = FALSE;
180 static BOOL do_colour = FALSE;
181 static BOOL file_offsets = FALSE;
182 static BOOL hyphenpending = FALSE;
183 static BOOL invert = FALSE;
184 static BOOL line_buffered = FALSE;
185 static BOOL line_offsets = FALSE;
186 static BOOL multiline = FALSE;
187 static BOOL number = FALSE;
188 static BOOL omit_zero_count = FALSE;
189 static BOOL resource_error = FALSE;
190 static BOOL quiet = FALSE;
191 static BOOL show_only_matching = FALSE;
192 static BOOL silent = FALSE;
193 static BOOL utf8 = FALSE;
195 /* Structure for list of --only-matching capturing numbers. */
197 typedef struct omstr {
198 struct omstr *next;
199 int groupnum;
200 } omstr;
202 static omstr *only_matching = NULL;
203 static omstr *only_matching_last = NULL;
205 /* Structure for holding the two variables that describe a number chain. */
207 typedef struct omdatastr {
208 omstr **anchor;
209 omstr **lastptr;
210 } omdatastr;
212 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
214 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
216 typedef struct fnstr {
217 struct fnstr *next;
218 char *name;
219 } fnstr;
221 static fnstr *exclude_from = NULL;
222 static fnstr *exclude_from_last = NULL;
223 static fnstr *include_from = NULL;
224 static fnstr *include_from_last = NULL;
226 static fnstr *file_lists = NULL;
227 static fnstr *file_lists_last = NULL;
228 static fnstr *pattern_files = NULL;
229 static fnstr *pattern_files_last = NULL;
231 /* Structure for holding the two variables that describe a file name chain. */
233 typedef struct fndatastr {
234 fnstr **anchor;
235 fnstr **lastptr;
236 } fndatastr;
238 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
239 static fndatastr include_from_data = { &include_from, &include_from_last };
240 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
241 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
243 /* Structure for pattern and its compiled form; used for matching patterns and
244 also for include/exclude patterns. */
246 typedef struct patstr {
247 struct patstr *next;
248 char *string;
249 pcre *compiled;
250 pcre_extra *hint;
251 } patstr;
253 static patstr *patterns = NULL;
254 static patstr *patterns_last = NULL;
255 static patstr *include_patterns = NULL;
256 static patstr *include_patterns_last = NULL;
257 static patstr *exclude_patterns = NULL;
258 static patstr *exclude_patterns_last = NULL;
259 static patstr *include_dir_patterns = NULL;
260 static patstr *include_dir_patterns_last = NULL;
261 static patstr *exclude_dir_patterns = NULL;
262 static patstr *exclude_dir_patterns_last = NULL;
264 /* Structure holding the two variables that describe a pattern chain. A pointer
265 to such structures is used for each appropriate option. */
267 typedef struct patdatastr {
268 patstr **anchor;
269 patstr **lastptr;
270 } patdatastr;
272 static patdatastr match_patdata = { &patterns, &patterns_last };
273 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
274 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
275 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
276 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
278 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
279 &include_dir_patterns, &exclude_dir_patterns };
281 static const char *incexname[4] = { "--include", "--exclude",
282 "--include-dir", "--exclude-dir" };
284 /* Structure for options and list of them */
286 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
287 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
289 typedef struct option_item {
290 int type;
291 int one_char;
292 void *dataptr;
293 const char *long_name;
294 const char *help_text;
295 } option_item;
297 /* Options without a single-letter equivalent get a negative value. This can be
298 used to identify them. */
300 #define N_COLOUR (-1)
301 #define N_EXCLUDE (-2)
302 #define N_EXCLUDE_DIR (-3)
303 #define N_HELP (-4)
304 #define N_INCLUDE (-5)
305 #define N_INCLUDE_DIR (-6)
306 #define N_LABEL (-7)
307 #define N_LOCALE (-8)
308 #define N_NULL (-9)
309 #define N_LOFFSETS (-10)
310 #define N_FOFFSETS (-11)
311 #define N_LBUFFER (-12)
312 #define N_M_LIMIT (-13)
313 #define N_M_LIMIT_REC (-14)
314 #define N_BUFSIZE (-15)
315 #define N_NOJIT (-16)
316 #define N_FILE_LIST (-17)
317 #define N_BINARY_FILES (-18)
318 #define N_EXCLUDE_FROM (-19)
319 #define N_INCLUDE_FROM (-20)
320 #define N_OM_SEPARATOR (-21)
322 static option_item optionlist[] = {
323 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
324 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
325 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
326 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
327 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
328 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
329 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
330 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
331 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
332 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
333 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
334 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
335 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
336 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
337 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
338 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
339 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
340 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
341 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
342 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
343 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
344 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
345 #ifdef SUPPORT_PCREGREP_JIT
346 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
347 #else
348 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
349 #endif
350 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
351 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
352 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
353 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
354 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
355 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
356 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
357 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
358 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
359 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
360 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
361 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
362 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
363 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
364 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
365 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
366 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
367 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
368 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
369 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
370 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
372 /* These two were accidentally implemented with underscores instead of
373 hyphens in the option names. As this was not discovered for several releases,
374 the incorrect versions are left in the table for compatibility. However, the
375 --help function misses out any option that has an underscore in its name. */
377 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
378 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
380 #ifdef JFRIEDL_DEBUG
381 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
382 #endif
383 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
384 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
385 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
386 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
387 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
388 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
389 { OP_NODATA, 0, NULL, NULL, NULL }
392 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
393 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
394 that the combination of -w and -x has the same effect as -x on its own, so we
395 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
396 prefix+suffix is 10 characters; if anything longer is added, it must be
397 adjusted. */
399 static const char *prefix[] = {
400 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
402 static const char *suffix[] = {
403 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
405 /* UTF-8 tables - used only when the newline setting is "any". */
407 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
409 const char utf8_table4[] = {
410 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
411 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
412 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
413 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
417 /*************************************************
418 * Exit from the program *
419 *************************************************/
421 /* If there has been a resource error, give a suitable message.
423 Argument: the return code
424 Returns: does not return
427 static void
428 pcregrep_exit(int rc)
430 if (resource_error)
432 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
433 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
434 PCRE_ERROR_JIT_STACKLIMIT);
435 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
437 exit(rc);
441 /*************************************************
442 * Add item to chain of patterns *
443 *************************************************/
445 /* Used to add an item onto a chain, or just return an unconnected item if the
446 "after" argument is NULL.
448 Arguments:
449 s pattern string to add
450 after if not NULL points to item to insert after
452 Returns: new pattern block
455 static patstr *
456 add_pattern(char *s, patstr *after)
458 patstr *p = (patstr *)malloc(sizeof(patstr));
459 if (p == NULL)
461 fprintf(stderr, "pcregrep: malloc failed\n");
462 pcregrep_exit(2);
464 if (strlen(s) > MAXPATLEN)
466 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
467 MAXPATLEN);
468 return NULL;
470 p->next = NULL;
471 p->string = s;
472 p->compiled = NULL;
473 p->hint = NULL;
475 if (after != NULL)
477 p->next = after->next;
478 after->next = p;
480 return p;
484 /*************************************************
485 * Free chain of patterns *
486 *************************************************/
488 /* Used for several chains of patterns.
490 Argument: pointer to start of chain
491 Returns: nothing
494 static void
495 free_pattern_chain(patstr *pc)
497 while (pc != NULL)
499 patstr *p = pc;
500 pc = p->next;
501 if (p->hint != NULL) pcre_free_study(p->hint);
502 if (p->compiled != NULL) pcre_free(p->compiled);
503 free(p);
508 /*************************************************
509 * Free chain of file names *
510 *************************************************/
513 Argument: pointer to start of chain
514 Returns: nothing
517 static void
518 free_file_chain(fnstr *fn)
520 while (fn != NULL)
522 fnstr *f = fn;
523 fn = f->next;
524 free(f);
529 /*************************************************
530 * OS-specific functions *
531 *************************************************/
533 /* These functions are defined so that they can be made system specific,
534 although at present the only ones are for Unix, Win32, and for "no support". */
537 /************* Directory scanning in Unix ***********/
539 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
540 #include <sys/types.h>
541 #include <sys/stat.h>
542 #include <dirent.h>
544 typedef DIR directory_type;
545 #define FILESEP '/'
547 static int
548 isdirectory(char *filename)
550 struct stat statbuf;
551 if (stat(filename, &statbuf) < 0)
552 return 0; /* In the expectation that opening as a file will fail */
553 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
556 static directory_type *
557 opendirectory(char *filename)
559 return opendir(filename);
562 static char *
563 readdirectory(directory_type *dir)
565 for (;;)
567 struct dirent *dent = readdir(dir);
568 if (dent == NULL) return NULL;
569 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
570 return dent->d_name;
572 /* Control never reaches here */
575 static void
576 closedirectory(directory_type *dir)
578 closedir(dir);
582 /************* Test for regular file in Unix **********/
584 static int
585 isregfile(char *filename)
587 struct stat statbuf;
588 if (stat(filename, &statbuf) < 0)
589 return 1; /* In the expectation that opening as a file will fail */
590 return (statbuf.st_mode & S_IFMT) == S_IFREG;
594 /************* Test for a terminal in Unix **********/
596 static BOOL
597 is_stdout_tty(void)
599 return isatty(fileno(stdout));
602 static BOOL
603 is_file_tty(FILE *f)
605 return isatty(fileno(f));
609 /************* Directory scanning in Win32 ***********/
611 /* I (Philip Hazel) have no means of testing this code. It was contributed by
612 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
613 when it did not exist. David Byron added a patch that moved the #include of
614 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
615 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
616 undefined when it is indeed undefined. */
618 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
620 #ifndef STRICT
621 # define STRICT
622 #endif
623 #ifndef WIN32_LEAN_AND_MEAN
624 # define WIN32_LEAN_AND_MEAN
625 #endif
627 #include <windows.h>
629 #ifndef INVALID_FILE_ATTRIBUTES
630 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
631 #endif
633 typedef struct directory_type
635 HANDLE handle;
636 BOOL first;
637 WIN32_FIND_DATA data;
638 } directory_type;
640 #define FILESEP '/'
643 isdirectory(char *filename)
645 DWORD attr = GetFileAttributes(filename);
646 if (attr == INVALID_FILE_ATTRIBUTES)
647 return 0;
648 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
651 directory_type *
652 opendirectory(char *filename)
654 size_t len;
655 char *pattern;
656 directory_type *dir;
657 DWORD err;
658 len = strlen(filename);
659 pattern = (char *)malloc(len + 3);
660 dir = (directory_type *)malloc(sizeof(*dir));
661 if ((pattern == NULL) || (dir == NULL))
663 fprintf(stderr, "pcregrep: malloc failed\n");
664 pcregrep_exit(2);
666 memcpy(pattern, filename, len);
667 memcpy(&(pattern[len]), "\\*", 3);
668 dir->handle = FindFirstFile(pattern, &(dir->data));
669 if (dir->handle != INVALID_HANDLE_VALUE)
671 free(pattern);
672 dir->first = TRUE;
673 return dir;
675 err = GetLastError();
676 free(pattern);
677 free(dir);
678 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
679 return NULL;
682 char *
683 readdirectory(directory_type *dir)
685 for (;;)
687 if (!dir->first)
689 if (!FindNextFile(dir->handle, &(dir->data)))
690 return NULL;
692 else
694 dir->first = FALSE;
696 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
697 return dir->data.cFileName;
699 #ifndef _MSC_VER
700 return NULL; /* Keep compiler happy; never executed */
701 #endif
704 void
705 closedirectory(directory_type *dir)
707 FindClose(dir->handle);
708 free(dir);
712 /************* Test for regular file in Win32 **********/
714 /* I don't know how to do this, or if it can be done; assume all paths are
715 regular if they are not directories. */
717 int isregfile(char *filename)
719 return !isdirectory(filename);
723 /************* Test for a terminal in Win32 **********/
725 /* I don't know how to do this; assume never */
727 static BOOL
728 is_stdout_tty(void)
730 return FALSE;
733 static BOOL
734 is_file_tty(FILE *f)
736 return FALSE;
740 /************* Directory scanning when we can't do it ***********/
742 /* The type is void, and apart from isdirectory(), the functions do nothing. */
744 #else
746 #define FILESEP 0
747 typedef void directory_type;
749 int isdirectory(char *filename) { return 0; }
750 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
751 char *readdirectory(directory_type *dir) { return (char*)0;}
752 void closedirectory(directory_type *dir) {}
755 /************* Test for regular when we can't do it **********/
757 /* Assume all files are regular. */
759 int isregfile(char *filename) { return 1; }
762 /************* Test for a terminal when we can't do it **********/
764 static BOOL
765 is_stdout_tty(void)
767 return FALSE;
770 static BOOL
771 is_file_tty(FILE *f)
773 return FALSE;
776 #endif
780 #ifndef HAVE_STRERROR
781 /*************************************************
782 * Provide strerror() for non-ANSI libraries *
783 *************************************************/
785 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
786 in their libraries, but can provide the same facility by this simple
787 alternative function. */
789 extern int sys_nerr;
790 extern char *sys_errlist[];
792 char *
793 strerror(int n)
795 if (n < 0 || n >= sys_nerr) return "unknown error number";
796 return sys_errlist[n];
798 #endif /* HAVE_STRERROR */
802 /*************************************************
803 * Usage function *
804 *************************************************/
806 static int
807 usage(int rc)
809 option_item *op;
810 fprintf(stderr, "Usage: pcregrep [-");
811 for (op = optionlist; op->one_char != 0; op++)
813 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
815 fprintf(stderr, "] [long options] [pattern] [files]\n");
816 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
817 "options.\n");
818 return rc;
823 /*************************************************
824 * Help function *
825 *************************************************/
827 static void
828 help(void)
830 option_item *op;
832 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
833 printf("Search for PATTERN in each FILE or standard input.\n");
834 printf("PATTERN must be present if neither -e nor -f is used.\n");
835 printf("\"-\" can be used as a file name to mean STDIN.\n");
837 #ifdef SUPPORT_LIBZ
838 printf("Files whose names end in .gz are read using zlib.\n");
839 #endif
841 #ifdef SUPPORT_LIBBZ2
842 printf("Files whose names end in .bz2 are read using bzlib2.\n");
843 #endif
845 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
846 printf("Other files and the standard input are read as plain files.\n\n");
847 #else
848 printf("All files are read as plain files, without any interpretation.\n\n");
849 #endif
851 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
852 printf("Options:\n");
854 for (op = optionlist; op->one_char != 0; op++)
856 int n;
857 char s[4];
859 /* Two options were accidentally implemented and documented with underscores
860 instead of hyphens in their names, something that was not noticed for quite a
861 few releases. When fixing this, I left the underscored versions in the list
862 in case people were using them. However, we don't want to display them in the
863 help data. There are no other options that contain underscores, and we do not
864 expect ever to implement such options. Therefore, just omit any option that
865 contains an underscore. */
867 if (strchr(op->long_name, '_') != NULL) continue;
869 if (op->one_char > 0 && (op->long_name)[0] == 0)
870 n = 31 - printf(" -%c", op->one_char);
871 else
873 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
874 else strcpy(s, " ");
875 n = 31 - printf(" %s --%s", s, op->long_name);
878 if (n < 1) n = 1;
879 printf("%.*s%s\n", n, " ", op->help_text);
882 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
883 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
884 printf("When reading patterns or file names from a file, trailing white\n");
885 printf("space is removed and blank lines are ignored.\n");
886 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
888 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
889 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
894 /*************************************************
895 * Test exclude/includes *
896 *************************************************/
898 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
899 there are no includes, the path must match an include pattern.
901 Arguments:
902 path the path to be matched
903 ip the chain of include patterns
904 ep the chain of exclude patterns
906 Returns: TRUE if the path is not excluded
909 static BOOL
910 test_incexc(char *path, patstr *ip, patstr *ep)
912 int plen = strlen(path);
914 for (; ep != NULL; ep = ep->next)
916 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
917 return FALSE;
920 if (ip == NULL) return TRUE;
922 for (; ip != NULL; ip = ip->next)
924 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
925 return TRUE;
928 return FALSE;
933 /*************************************************
934 * Decode integer argument value *
935 *************************************************/
937 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
938 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
939 just keep it simple.
941 Arguments:
942 option_data the option data string
943 op the option item (for error messages)
944 longop TRUE if option given in long form
946 Returns: a long integer
949 static long int
950 decode_number(char *option_data, option_item *op, BOOL longop)
952 unsigned long int n = 0;
953 char *endptr = option_data;
954 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
955 while (isdigit((unsigned char)(*endptr)))
956 n = n * 10 + (int)(*endptr++ - '0');
957 if (toupper(*endptr) == 'K')
959 n *= 1024;
960 endptr++;
962 else if (toupper(*endptr) == 'M')
964 n *= 1024*1024;
965 endptr++;
968 if (*endptr != 0) /* Error */
970 if (longop)
972 char *equals = strchr(op->long_name, '=');
973 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
974 (int)(equals - op->long_name);
975 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
976 option_data, nlen, op->long_name);
978 else
979 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
980 option_data, op->one_char);
981 pcregrep_exit(usage(2));
984 return n;
989 /*************************************************
990 * Add item to a chain of numbers *
991 *************************************************/
993 /* Used to add an item onto a chain, or just return an unconnected item if the
994 "after" argument is NULL.
996 Arguments:
997 n the number to add
998 after if not NULL points to item to insert after
1000 Returns: new number block
1003 static omstr *
1004 add_number(int n, omstr *after)
1006 omstr *om = (omstr *)malloc(sizeof(omstr));
1008 if (om == NULL)
1010 fprintf(stderr, "pcregrep: malloc failed\n");
1011 pcregrep_exit(2);
1013 om->next = NULL;
1014 om->groupnum = n;
1016 if (after != NULL)
1018 om->next = after->next;
1019 after->next = om;
1021 return om;
1026 /*************************************************
1027 * Read one line of input *
1028 *************************************************/
1030 /* Normally, input is read using fread() into a large buffer, so many lines may
1031 be read at once. However, doing this for tty input means that no output appears
1032 until a lot of input has been typed. Instead, tty input is handled line by
1033 line. We cannot use fgets() for this, because it does not stop at a binary
1034 zero, and therefore there is no way of telling how many characters it has read,
1035 because there may be binary zeros embedded in the data.
1037 Arguments:
1038 buffer the buffer to read into
1039 length the maximum number of characters to read
1040 f the file
1042 Returns: the number of characters read, zero at end of file
1045 static unsigned int
1046 read_one_line(char *buffer, int length, FILE *f)
1048 int c;
1049 int yield = 0;
1050 while ((c = fgetc(f)) != EOF)
1052 buffer[yield++] = c;
1053 if (c == '\n' || yield >= length) break;
1055 return yield;
1060 /*************************************************
1061 * Find end of line *
1062 *************************************************/
1064 /* The length of the endline sequence that is found is set via lenptr. This may
1065 be zero at the very end of the file if there is no line-ending sequence there.
1067 Arguments:
1068 p current position in line
1069 endptr end of available data
1070 lenptr where to put the length of the eol sequence
1072 Returns: pointer after the last byte of the line,
1073 including the newline byte(s)
1076 static char *
1077 end_of_line(char *p, char *endptr, int *lenptr)
1079 switch(endlinetype)
1081 default: /* Just in case */
1082 case EL_LF:
1083 while (p < endptr && *p != '\n') p++;
1084 if (p < endptr)
1086 *lenptr = 1;
1087 return p + 1;
1089 *lenptr = 0;
1090 return endptr;
1092 case EL_CR:
1093 while (p < endptr && *p != '\r') p++;
1094 if (p < endptr)
1096 *lenptr = 1;
1097 return p + 1;
1099 *lenptr = 0;
1100 return endptr;
1102 case EL_CRLF:
1103 for (;;)
1105 while (p < endptr && *p != '\r') p++;
1106 if (++p >= endptr)
1108 *lenptr = 0;
1109 return endptr;
1111 if (*p == '\n')
1113 *lenptr = 2;
1114 return p + 1;
1117 break;
1119 case EL_ANYCRLF:
1120 while (p < endptr)
1122 int extra = 0;
1123 register int c = *((unsigned char *)p);
1125 if (utf8 && c >= 0xc0)
1127 int gcii, gcss;
1128 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1129 gcss = 6*extra;
1130 c = (c & utf8_table3[extra]) << gcss;
1131 for (gcii = 1; gcii <= extra; gcii++)
1133 gcss -= 6;
1134 c |= (p[gcii] & 0x3f) << gcss;
1138 p += 1 + extra;
1140 switch (c)
1142 case '\n':
1143 *lenptr = 1;
1144 return p;
1146 case '\r':
1147 if (p < endptr && *p == '\n')
1149 *lenptr = 2;
1150 p++;
1152 else *lenptr = 1;
1153 return p;
1155 default:
1156 break;
1158 } /* End of loop for ANYCRLF case */
1160 *lenptr = 0; /* Must have hit the end */
1161 return endptr;
1163 case EL_ANY:
1164 while (p < endptr)
1166 int extra = 0;
1167 register int c = *((unsigned char *)p);
1169 if (utf8 && c >= 0xc0)
1171 int gcii, gcss;
1172 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1173 gcss = 6*extra;
1174 c = (c & utf8_table3[extra]) << gcss;
1175 for (gcii = 1; gcii <= extra; gcii++)
1177 gcss -= 6;
1178 c |= (p[gcii] & 0x3f) << gcss;
1182 p += 1 + extra;
1184 switch (c)
1186 case '\n': /* LF */
1187 case '\v': /* VT */
1188 case '\f': /* FF */
1189 *lenptr = 1;
1190 return p;
1192 case '\r': /* CR */
1193 if (p < endptr && *p == '\n')
1195 *lenptr = 2;
1196 p++;
1198 else *lenptr = 1;
1199 return p;
1201 #ifndef EBCDIC
1202 case 0x85: /* Unicode NEL */
1203 *lenptr = utf8? 2 : 1;
1204 return p;
1206 case 0x2028: /* Unicode LS */
1207 case 0x2029: /* Unicode PS */
1208 *lenptr = 3;
1209 return p;
1210 #endif /* Not EBCDIC */
1212 default:
1213 break;
1215 } /* End of loop for ANY case */
1217 *lenptr = 0; /* Must have hit the end */
1218 return endptr;
1219 } /* End of overall switch */
1224 /*************************************************
1225 * Find start of previous line *
1226 *************************************************/
1228 /* This is called when looking back for before lines to print.
1230 Arguments:
1231 p start of the subsequent line
1232 startptr start of available data
1234 Returns: pointer to the start of the previous line
1237 static char *
1238 previous_line(char *p, char *startptr)
1240 switch(endlinetype)
1242 default: /* Just in case */
1243 case EL_LF:
1244 p--;
1245 while (p > startptr && p[-1] != '\n') p--;
1246 return p;
1248 case EL_CR:
1249 p--;
1250 while (p > startptr && p[-1] != '\n') p--;
1251 return p;
1253 case EL_CRLF:
1254 for (;;)
1256 p -= 2;
1257 while (p > startptr && p[-1] != '\n') p--;
1258 if (p <= startptr + 1 || p[-2] == '\r') return p;
1260 return p; /* But control should never get here */
1262 case EL_ANY:
1263 case EL_ANYCRLF:
1264 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1265 if (utf8) while ((*p & 0xc0) == 0x80) p--;
1267 while (p > startptr)
1269 register unsigned int c;
1270 char *pp = p - 1;
1272 if (utf8)
1274 int extra = 0;
1275 while ((*pp & 0xc0) == 0x80) pp--;
1276 c = *((unsigned char *)pp);
1277 if (c >= 0xc0)
1279 int gcii, gcss;
1280 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1281 gcss = 6*extra;
1282 c = (c & utf8_table3[extra]) << gcss;
1283 for (gcii = 1; gcii <= extra; gcii++)
1285 gcss -= 6;
1286 c |= (pp[gcii] & 0x3f) << gcss;
1290 else c = *((unsigned char *)pp);
1292 if (endlinetype == EL_ANYCRLF) switch (c)
1294 case '\n': /* LF */
1295 case '\r': /* CR */
1296 return p;
1298 default:
1299 break;
1302 else switch (c)
1304 case '\n': /* LF */
1305 case '\v': /* VT */
1306 case '\f': /* FF */
1307 case '\r': /* CR */
1308 #ifndef EBCDIE
1309 case 0x85: /* Unicode NEL */
1310 case 0x2028: /* Unicode LS */
1311 case 0x2029: /* Unicode PS */
1312 #endif /* Not EBCDIC */
1313 return p;
1315 default:
1316 break;
1319 p = pp; /* Back one character */
1320 } /* End of loop for ANY case */
1322 return startptr; /* Hit start of data */
1323 } /* End of overall switch */
1330 /*************************************************
1331 * Print the previous "after" lines *
1332 *************************************************/
1334 /* This is called if we are about to lose said lines because of buffer filling,
1335 and at the end of the file. The data in the line is written using fwrite() so
1336 that a binary zero does not terminate it.
1338 Arguments:
1339 lastmatchnumber the number of the last matching line, plus one
1340 lastmatchrestart where we restarted after the last match
1341 endptr end of available data
1342 printname filename for printing
1344 Returns: nothing
1347 static void
1348 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1349 char *printname)
1351 if (after_context > 0 && lastmatchnumber > 0)
1353 int count = 0;
1354 while (lastmatchrestart < endptr && count++ < after_context)
1356 int ellength;
1357 char *pp = lastmatchrestart;
1358 if (printname != NULL) fprintf(stdout, "%s-", printname);
1359 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1360 pp = end_of_line(pp, endptr, &ellength);
1361 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1362 lastmatchrestart = pp;
1364 hyphenpending = TRUE;
1370 /*************************************************
1371 * Apply patterns to subject till one matches *
1372 *************************************************/
1374 /* This function is called to run through all patterns, looking for a match. It
1375 is used multiple times for the same subject when colouring is enabled, in order
1376 to find all possible matches.
1378 Arguments:
1379 matchptr the start of the subject
1380 length the length of the subject to match
1381 startoffset where to start matching
1382 offsets the offets vector to fill in
1383 mrc address of where to put the result of pcre_exec()
1385 Returns: TRUE if there was a match
1386 FALSE if there was no match
1387 invert if there was a non-fatal error
1390 static BOOL
1391 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1392 int *mrc)
1394 int i;
1395 size_t slen = length;
1396 patstr *p = patterns;
1397 const char *msg = "this text:\n\n";
1399 if (slen > 200)
1401 slen = 200;
1402 msg = "text that starts:\n\n";
1404 for (i = 1; p != NULL; p = p->next, i++)
1406 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1407 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1408 if (*mrc >= 0) return TRUE;
1409 if (*mrc == PCRE_ERROR_NOMATCH) continue;
1410 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1411 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1412 fprintf(stderr, "%s", msg);
1413 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1414 fprintf(stderr, "\n\n");
1415 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1416 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1417 resource_error = TRUE;
1418 if (error_count++ > 20)
1420 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1421 pcregrep_exit(2);
1423 return invert; /* No more matching; don't show the line again */
1426 return FALSE; /* No match, no errors */
1431 /*************************************************
1432 * Grep an individual file *
1433 *************************************************/
1435 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1436 times the value of bufthird. The matching point is never allowed to stray into
1437 the top third of the buffer, thus keeping more of the file available for
1438 context printing or for multiline scanning. For large files, the pointer will
1439 be in the middle third most of the time, so the bottom third is available for
1440 "before" context printing.
1442 Arguments:
1443 handle the fopened FILE stream for a normal file
1444 the gzFile pointer when reading is via libz
1445 the BZFILE pointer when reading is via libbz2
1446 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1447 filename the file name or NULL (for errors)
1448 printname the file name if it is to be printed for each match
1449 or NULL if the file name is not to be printed
1450 it cannot be NULL if filenames[_nomatch]_only is set
1452 Returns: 0 if there was at least one match
1453 1 otherwise (no matches)
1454 2 if an overlong line is encountered
1455 3 if there is a read error on a .bz2 file
1458 static int
1459 pcregrep(void *handle, int frtype, char *filename, char *printname)
1461 int rc = 1;
1462 int linenumber = 1;
1463 int lastmatchnumber = 0;
1464 int count = 0;
1465 int filepos = 0;
1466 int offsets[OFFSET_SIZE];
1467 char *lastmatchrestart = NULL;
1468 char *ptr = main_buffer;
1469 char *endptr;
1470 size_t bufflength;
1471 BOOL binary = FALSE;
1472 BOOL endhyphenpending = FALSE;
1473 BOOL input_line_buffered = line_buffered;
1474 FILE *in = NULL; /* Ensure initialized */
1476 #ifdef SUPPORT_LIBZ
1477 gzFile ingz = NULL;
1478 #endif
1480 #ifdef SUPPORT_LIBBZ2
1481 BZFILE *inbz2 = NULL;
1482 #endif
1485 /* Do the first read into the start of the buffer and set up the pointer to end
1486 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1487 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1488 fail. */
1490 (void)frtype;
1492 #ifdef SUPPORT_LIBZ
1493 if (frtype == FR_LIBZ)
1495 ingz = (gzFile)handle;
1496 bufflength = gzread (ingz, main_buffer, bufsize);
1498 else
1499 #endif
1501 #ifdef SUPPORT_LIBBZ2
1502 if (frtype == FR_LIBBZ2)
1504 inbz2 = (BZFILE *)handle;
1505 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1506 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1507 } /* without the cast it is unsigned. */
1508 else
1509 #endif
1512 in = (FILE *)handle;
1513 if (is_file_tty(in)) input_line_buffered = TRUE;
1514 bufflength = input_line_buffered?
1515 read_one_line(main_buffer, bufsize, in) :
1516 fread(main_buffer, 1, bufsize, in);
1519 endptr = main_buffer + bufflength;
1521 /* Unless binary-files=text, see if we have a binary file. This uses the same
1522 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1523 file. */
1525 if (binary_files != BIN_TEXT)
1527 binary =
1528 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1529 if (binary && binary_files == BIN_NOMATCH) return 1;
1532 /* Loop while the current pointer is not at the end of the file. For large
1533 files, endptr will be at the end of the buffer when we are in the middle of the
1534 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1535 way, the buffer is shifted left and re-filled. */
1537 while (ptr < endptr)
1539 int endlinelength;
1540 int mrc = 0;
1541 int startoffset = 0;
1542 BOOL match;
1543 char *matchptr = ptr;
1544 char *t = ptr;
1545 size_t length, linelength;
1547 /* At this point, ptr is at the start of a line. We need to find the length
1548 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1549 length remainder of the data in the buffer. Otherwise, it is the length of
1550 the next line, excluding the terminating newline. After matching, we always
1551 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1552 option is used for compiling, so that any match is constrained to be in the
1553 first line. */
1555 t = end_of_line(t, endptr, &endlinelength);
1556 linelength = t - ptr - endlinelength;
1557 length = multiline? (size_t)(endptr - ptr) : linelength;
1559 /* Check to see if the line we are looking at extends right to the very end
1560 of the buffer without a line terminator. This means the line is too long to
1561 handle. */
1563 if (endlinelength == 0 && t == main_buffer + bufsize)
1565 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1566 "pcregrep: check the --buffer-size option\n",
1567 linenumber,
1568 (filename == NULL)? "" : " of file ",
1569 (filename == NULL)? "" : filename);
1570 return 2;
1573 /* Extra processing for Jeffrey Friedl's debugging. */
1575 #ifdef JFRIEDL_DEBUG
1576 if (jfriedl_XT || jfriedl_XR)
1578 # include <sys/time.h>
1579 # include <time.h>
1580 struct timeval start_time, end_time;
1581 struct timezone dummy;
1582 int i;
1584 if (jfriedl_XT)
1586 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1587 const char *orig = ptr;
1588 ptr = malloc(newlen + 1);
1589 if (!ptr) {
1590 printf("out of memory");
1591 pcregrep_exit(2);
1593 endptr = ptr;
1594 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1595 for (i = 0; i < jfriedl_XT; i++) {
1596 strncpy(endptr, orig, length);
1597 endptr += length;
1599 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1600 length = newlen;
1603 if (gettimeofday(&start_time, &dummy) != 0)
1604 perror("bad gettimeofday");
1607 for (i = 0; i < jfriedl_XR; i++)
1608 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1609 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1611 if (gettimeofday(&end_time, &dummy) != 0)
1612 perror("bad gettimeofday");
1614 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1616 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1618 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1619 return 0;
1621 #endif
1623 /* We come back here after a match when show_only_matching is set, in order
1624 to find any further matches in the same line. This applies to
1625 --only-matching, --file-offsets, and --line-offsets. */
1627 ONLY_MATCHING_RESTART:
1629 /* Run through all the patterns until one matches or there is an error other
1630 than NOMATCH. This code is in a subroutine so that it can be re-used for
1631 finding subsequent matches when colouring matched lines. */
1633 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1635 /* If it's a match or a not-match (as required), do what's wanted. */
1637 if (match != invert)
1639 BOOL hyphenprinted = FALSE;
1641 /* We've failed if we want a file that doesn't have any matches. */
1643 if (filenames == FN_NOMATCH_ONLY) return 1;
1645 /* Just count if just counting is wanted. */
1647 if (count_only) count++;
1649 /* When handling a binary file and binary-files==binary, the "binary"
1650 variable will be set true (it's false in all other cases). In this
1651 situation we just want to output the file name. No need to scan further. */
1653 else if (binary)
1655 fprintf(stdout, "Binary file %s matches\n", filename);
1656 return 0;
1659 /* If all we want is a file name, there is no need to scan any more lines
1660 in the file. */
1662 else if (filenames == FN_MATCH_ONLY)
1664 fprintf(stdout, "%s\n", printname);
1665 return 0;
1668 /* Likewise, if all we want is a yes/no answer. */
1670 else if (quiet) return 0;
1672 /* The --only-matching option prints just the substring that matched,
1673 and/or one or more captured portions of it, as long as these strings are
1674 not empty. The --file-offsets and --line-offsets options output offsets for
1675 the matching substring (all three set show_only_matching). None of these
1676 mutually exclusive options prints any context. Afterwards, adjust the start
1677 and then jump back to look for further matches in the same line. If we are
1678 in invert mode, however, nothing is printed and we do not restart - this
1679 could still be useful because the return code is set. */
1681 else if (show_only_matching)
1683 if (!invert)
1685 if (printname != NULL) fprintf(stdout, "%s:", printname);
1686 if (number) fprintf(stdout, "%d:", linenumber);
1688 /* Handle --line-offsets */
1690 if (line_offsets)
1691 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1692 offsets[1] - offsets[0]);
1694 /* Handle --file-offsets */
1696 else if (file_offsets)
1697 fprintf(stdout, "%d,%d\n",
1698 (int)(filepos + matchptr + offsets[0] - ptr),
1699 offsets[1] - offsets[0]);
1701 /* Handle --only-matching, which may occur many times */
1703 else
1705 BOOL printed = FALSE;
1706 omstr *om;
1708 for (om = only_matching; om != NULL; om = om->next)
1710 int n = om->groupnum;
1711 if (n < mrc)
1713 int plen = offsets[2*n + 1] - offsets[2*n];
1714 if (plen > 0)
1716 if (printed) fprintf(stdout, "%s", om_separator);
1717 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1718 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1719 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1720 printed = TRUE;
1725 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1728 /* Prepare to repeat to find the next match */
1730 match = FALSE;
1731 if (line_buffered) fflush(stdout);
1732 rc = 0; /* Had some success */
1733 startoffset = offsets[1]; /* Restart after the match */
1734 goto ONLY_MATCHING_RESTART;
1738 /* This is the default case when none of the above options is set. We print
1739 the matching lines(s), possibly preceded and/or followed by other lines of
1740 context. */
1742 else
1744 /* See if there is a requirement to print some "after" lines from a
1745 previous match. We never print any overlaps. */
1747 if (after_context > 0 && lastmatchnumber > 0)
1749 int ellength;
1750 int linecount = 0;
1751 char *p = lastmatchrestart;
1753 while (p < ptr && linecount < after_context)
1755 p = end_of_line(p, ptr, &ellength);
1756 linecount++;
1759 /* It is important to advance lastmatchrestart during this printing so
1760 that it interacts correctly with any "before" printing below. Print
1761 each line's data using fwrite() in case there are binary zeroes. */
1763 while (lastmatchrestart < p)
1765 char *pp = lastmatchrestart;
1766 if (printname != NULL) fprintf(stdout, "%s-", printname);
1767 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1768 pp = end_of_line(pp, endptr, &ellength);
1769 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1770 lastmatchrestart = pp;
1772 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1775 /* If there were non-contiguous lines printed above, insert hyphens. */
1777 if (hyphenpending)
1779 fprintf(stdout, "--\n");
1780 hyphenpending = FALSE;
1781 hyphenprinted = TRUE;
1784 /* See if there is a requirement to print some "before" lines for this
1785 match. Again, don't print overlaps. */
1787 if (before_context > 0)
1789 int linecount = 0;
1790 char *p = ptr;
1792 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1793 linecount < before_context)
1795 linecount++;
1796 p = previous_line(p, main_buffer);
1799 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1800 fprintf(stdout, "--\n");
1802 while (p < ptr)
1804 int ellength;
1805 char *pp = p;
1806 if (printname != NULL) fprintf(stdout, "%s-", printname);
1807 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1808 pp = end_of_line(pp, endptr, &ellength);
1809 FWRITE(p, 1, pp - p, stdout);
1810 p = pp;
1814 /* Now print the matching line(s); ensure we set hyphenpending at the end
1815 of the file if any context lines are being output. */
1817 if (after_context > 0 || before_context > 0)
1818 endhyphenpending = TRUE;
1820 if (printname != NULL) fprintf(stdout, "%s:", printname);
1821 if (number) fprintf(stdout, "%d:", linenumber);
1823 /* In multiline mode, we want to print to the end of the line in which
1824 the end of the matched string is found, so we adjust linelength and the
1825 line number appropriately, but only when there actually was a match
1826 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1827 the match will always be before the first newline sequence. */
1829 if (multiline & !invert)
1831 char *endmatch = ptr + offsets[1];
1832 t = ptr;
1833 while (t < endmatch)
1835 t = end_of_line(t, endptr, &endlinelength);
1836 if (t < endmatch) linenumber++; else break;
1838 linelength = t - ptr - endlinelength;
1841 /*** NOTE: Use only fwrite() to output the data line, so that binary
1842 zeroes are treated as just another data character. */
1844 /* This extra option, for Jeffrey Friedl's debugging requirements,
1845 replaces the matched string, or a specific captured string if it exists,
1846 with X. When this happens, colouring is ignored. */
1848 #ifdef JFRIEDL_DEBUG
1849 if (S_arg >= 0 && S_arg < mrc)
1851 int first = S_arg * 2;
1852 int last = first + 1;
1853 FWRITE(ptr, 1, offsets[first], stdout);
1854 fprintf(stdout, "X");
1855 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1857 else
1858 #endif
1860 /* We have to split the line(s) up if colouring, and search for further
1861 matches, but not of course if the line is a non-match. */
1863 if (do_colour && !invert)
1865 int plength;
1866 FWRITE(ptr, 1, offsets[0], stdout);
1867 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1868 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1869 fprintf(stdout, "%c[00m", 0x1b);
1870 for (;;)
1872 startoffset = offsets[1];
1873 if (startoffset >= (int)linelength + endlinelength ||
1874 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1875 break;
1876 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1877 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1878 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1879 fprintf(stdout, "%c[00m", 0x1b);
1882 /* In multiline mode, we may have already printed the complete line
1883 and its line-ending characters (if they matched the pattern), so there
1884 may be no more to print. */
1886 plength = (int)((linelength + endlinelength) - startoffset);
1887 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1890 /* Not colouring; no need to search for further matches */
1892 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1895 /* End of doing what has to be done for a match. If --line-buffered was
1896 given, flush the output. */
1898 if (line_buffered) fflush(stdout);
1899 rc = 0; /* Had some success */
1901 /* Remember where the last match happened for after_context. We remember
1902 where we are about to restart, and that line's number. */
1904 lastmatchrestart = ptr + linelength + endlinelength;
1905 lastmatchnumber = linenumber + 1;
1908 /* For a match in multiline inverted mode (which of course did not cause
1909 anything to be printed), we have to move on to the end of the match before
1910 proceeding. */
1912 if (multiline && invert && match)
1914 int ellength;
1915 char *endmatch = ptr + offsets[1];
1916 t = ptr;
1917 while (t < endmatch)
1919 t = end_of_line(t, endptr, &ellength);
1920 if (t <= endmatch) linenumber++; else break;
1922 endmatch = end_of_line(endmatch, endptr, &ellength);
1923 linelength = endmatch - ptr - ellength;
1926 /* Advance to after the newline and increment the line number. The file
1927 offset to the current line is maintained in filepos. */
1929 ptr += linelength + endlinelength;
1930 filepos += (int)(linelength + endlinelength);
1931 linenumber++;
1933 /* If input is line buffered, and the buffer is not yet full, read another
1934 line and add it into the buffer. */
1936 if (input_line_buffered && bufflength < (size_t)bufsize)
1938 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1939 bufflength += add;
1940 endptr += add;
1943 /* If we haven't yet reached the end of the file (the buffer is full), and
1944 the current point is in the top 1/3 of the buffer, slide the buffer down by
1945 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1946 about to be lost, print them. */
1948 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1950 if (after_context > 0 &&
1951 lastmatchnumber > 0 &&
1952 lastmatchrestart < main_buffer + bufthird)
1954 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1955 lastmatchnumber = 0;
1958 /* Now do the shuffle */
1960 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1961 ptr -= bufthird;
1963 #ifdef SUPPORT_LIBZ
1964 if (frtype == FR_LIBZ)
1965 bufflength = 2*bufthird +
1966 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1967 else
1968 #endif
1970 #ifdef SUPPORT_LIBBZ2
1971 if (frtype == FR_LIBBZ2)
1972 bufflength = 2*bufthird +
1973 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1974 else
1975 #endif
1977 bufflength = 2*bufthird +
1978 (input_line_buffered?
1979 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1980 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1981 endptr = main_buffer + bufflength;
1983 /* Adjust any last match point */
1985 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1987 } /* Loop through the whole file */
1989 /* End of file; print final "after" lines if wanted; do_after_lines sets
1990 hyphenpending if it prints something. */
1992 if (!show_only_matching && !count_only)
1994 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1995 hyphenpending |= endhyphenpending;
1998 /* Print the file name if we are looking for those without matches and there
1999 were none. If we found a match, we won't have got this far. */
2001 if (filenames == FN_NOMATCH_ONLY)
2003 fprintf(stdout, "%s\n", printname);
2004 return 0;
2007 /* Print the match count if wanted */
2009 if (count_only)
2011 if (count > 0 || !omit_zero_count)
2013 if (printname != NULL && filenames != FN_NONE)
2014 fprintf(stdout, "%s:", printname);
2015 fprintf(stdout, "%d\n", count);
2019 return rc;
2024 /*************************************************
2025 * Grep a file or recurse into a directory *
2026 *************************************************/
2028 /* Given a path name, if it's a directory, scan all the files if we are
2029 recursing; if it's a file, grep it.
2031 Arguments:
2032 pathname the path to investigate
2033 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2034 only_one_at_top TRUE if the path is the only one at toplevel
2036 Returns: -1 the file/directory was skipped
2037 0 if there was at least one match
2038 1 if there were no matches
2039 2 there was some kind of error
2041 However, file opening failures are suppressed if "silent" is set.
2044 static int
2045 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2047 int rc = 1;
2048 int frtype;
2049 void *handle;
2050 char *lastcomp;
2051 FILE *in = NULL; /* Ensure initialized */
2053 #ifdef SUPPORT_LIBZ
2054 gzFile ingz = NULL;
2055 #endif
2057 #ifdef SUPPORT_LIBBZ2
2058 BZFILE *inbz2 = NULL;
2059 #endif
2061 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2062 int pathlen;
2063 #endif
2065 /* If the file name is "-" we scan stdin */
2067 if (strcmp(pathname, "-") == 0)
2069 return pcregrep(stdin, FR_PLAIN, stdin_name,
2070 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2071 stdin_name : NULL);
2074 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2075 directories, whereas --include and --exclude apply to everything else. The test
2076 is against the final component of the path. */
2078 lastcomp = strrchr(pathname, FILESEP);
2079 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2081 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2082 Otherwise, scan the directory and recurse for each path within it. The scanning
2083 code is localized so it can be made system-specific. */
2085 if (isdirectory(pathname))
2087 if (dee_action == dee_SKIP ||
2088 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2089 return -1;
2091 if (dee_action == dee_RECURSE)
2093 char buffer[1024];
2094 char *nextfile;
2095 directory_type *dir = opendirectory(pathname);
2097 if (dir == NULL)
2099 if (!silent)
2100 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2101 strerror(errno));
2102 return 2;
2105 while ((nextfile = readdirectory(dir)) != NULL)
2107 int frc;
2108 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2109 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2110 if (frc > 1) rc = frc;
2111 else if (frc == 0 && rc == 1) rc = 0;
2114 closedirectory(dir);
2115 return rc;
2119 /* If the file is not a directory and not a regular file, skip it if that's
2120 been requested. Otherwise, check for explicit include/exclude. */
2122 else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2123 !test_incexc(lastcomp, include_patterns, exclude_patterns))
2124 return -1;
2126 /* Control reaches here if we have a regular file, or if we have a directory
2127 and recursion or skipping was not requested, or if we have anything else and
2128 skipping was not requested. The scan proceeds. If this is the first and only
2129 argument at top level, we don't show the file name, unless we are only showing
2130 the file name, or the filename was forced (-H). */
2132 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2133 pathlen = (int)(strlen(pathname));
2134 #endif
2136 /* Open using zlib if it is supported and the file name ends with .gz. */
2138 #ifdef SUPPORT_LIBZ
2139 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2141 ingz = gzopen(pathname, "rb");
2142 if (ingz == NULL)
2144 if (!silent)
2145 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2146 strerror(errno));
2147 return 2;
2149 handle = (void *)ingz;
2150 frtype = FR_LIBZ;
2152 else
2153 #endif
2155 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2157 #ifdef SUPPORT_LIBBZ2
2158 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2160 inbz2 = BZ2_bzopen(pathname, "rb");
2161 handle = (void *)inbz2;
2162 frtype = FR_LIBBZ2;
2164 else
2165 #endif
2167 /* Otherwise use plain fopen(). The label is so that we can come back here if
2168 an attempt to read a .bz2 file indicates that it really is a plain file. */
2170 #ifdef SUPPORT_LIBBZ2
2171 PLAIN_FILE:
2172 #endif
2174 in = fopen(pathname, "rb");
2175 handle = (void *)in;
2176 frtype = FR_PLAIN;
2179 /* All the opening methods return errno when they fail. */
2181 if (handle == NULL)
2183 if (!silent)
2184 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2185 strerror(errno));
2186 return 2;
2189 /* Now grep the file */
2191 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2192 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2194 /* Close in an appropriate manner. */
2196 #ifdef SUPPORT_LIBZ
2197 if (frtype == FR_LIBZ)
2198 gzclose(ingz);
2199 else
2200 #endif
2202 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2203 read failed. If the error indicates that the file isn't in fact bzipped, try
2204 again as a normal file. */
2206 #ifdef SUPPORT_LIBBZ2
2207 if (frtype == FR_LIBBZ2)
2209 if (rc == 3)
2211 int errnum;
2212 const char *err = BZ2_bzerror(inbz2, &errnum);
2213 if (errnum == BZ_DATA_ERROR_MAGIC)
2215 BZ2_bzclose(inbz2);
2216 goto PLAIN_FILE;
2218 else if (!silent)
2219 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2220 pathname, err);
2221 rc = 2; /* The normal "something went wrong" code */
2223 BZ2_bzclose(inbz2);
2225 else
2226 #endif
2228 /* Normal file close */
2230 fclose(in);
2232 /* Pass back the yield from pcregrep(). */
2234 return rc;
2239 /*************************************************
2240 * Handle a single-letter, no data option *
2241 *************************************************/
2243 static int
2244 handle_option(int letter, int options)
2246 switch(letter)
2248 case N_FOFFSETS: file_offsets = TRUE; break;
2249 case N_HELP: help(); pcregrep_exit(0);
2250 case N_LBUFFER: line_buffered = TRUE; break;
2251 case N_LOFFSETS: line_offsets = number = TRUE; break;
2252 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2253 case 'a': binary_files = BIN_TEXT; break;
2254 case 'c': count_only = TRUE; break;
2255 case 'F': process_options |= PO_FIXED_STRINGS; break;
2256 case 'H': filenames = FN_FORCE; break;
2257 case 'I': binary_files = BIN_NOMATCH; break;
2258 case 'h': filenames = FN_NONE; break;
2259 case 'i': options |= PCRE_CASELESS; break;
2260 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2261 case 'L': filenames = FN_NOMATCH_ONLY; break;
2262 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2263 case 'n': number = TRUE; break;
2265 case 'o':
2266 only_matching_last = add_number(0, only_matching_last);
2267 if (only_matching == NULL) only_matching = only_matching_last;
2268 break;
2270 case 'q': quiet = TRUE; break;
2271 case 'r': dee_action = dee_RECURSE; break;
2272 case 's': silent = TRUE; break;
2273 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2274 case 'v': invert = TRUE; break;
2275 case 'w': process_options |= PO_WORD_MATCH; break;
2276 case 'x': process_options |= PO_LINE_MATCH; break;
2278 case 'V':
2279 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2280 pcregrep_exit(0);
2281 break;
2283 default:
2284 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2285 pcregrep_exit(usage(2));
2288 return options;
2294 /*************************************************
2295 * Construct printed ordinal *
2296 *************************************************/
2298 /* This turns a number into "1st", "3rd", etc. */
2300 static char *
2301 ordin(int n)
2303 static char buffer[8];
2304 char *p = buffer;
2305 sprintf(p, "%d", n);
2306 while (*p != 0) p++;
2307 switch (n%10)
2309 case 1: strcpy(p, "st"); break;
2310 case 2: strcpy(p, "nd"); break;
2311 case 3: strcpy(p, "rd"); break;
2312 default: strcpy(p, "th"); break;
2314 return buffer;
2319 /*************************************************
2320 * Compile a single pattern *
2321 *************************************************/
2323 /* Do nothing if the pattern has already been compiled. This is the case for
2324 include/exclude patterns read from a file.
2326 When the -F option has been used, each "pattern" may be a list of strings,
2327 separated by line breaks. They will be matched literally. We split such a
2328 string and compile the first substring, inserting an additional block into the
2329 pattern chain.
2331 Arguments:
2332 p points to the pattern block
2333 options the PCRE options
2334 popts the processing options
2335 fromfile TRUE if the pattern was read from a file
2336 fromtext file name or identifying text (e.g. "include")
2337 count 0 if this is the only command line pattern, or
2338 number of the command line pattern, or
2339 linenumber for a pattern from a file
2341 Returns: TRUE on success, FALSE after an error
2344 static BOOL
2345 compile_pattern(patstr *p, int options, int popts, int fromfile,
2346 const char *fromtext, int count)
2348 char buffer[PATBUFSIZE];
2349 const char *error;
2350 char *ps = p->string;
2351 int patlen = strlen(ps);
2352 int errptr;
2354 if (p->compiled != NULL) return TRUE;
2356 if ((popts & PO_FIXED_STRINGS) != 0)
2358 int ellength;
2359 char *eop = ps + patlen;
2360 char *pe = end_of_line(ps, eop, &ellength);
2362 if (ellength != 0)
2364 if (add_pattern(pe, p) == NULL) return FALSE;
2365 patlen = (int)(pe - ps - ellength);
2369 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2370 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2371 if (p->compiled != NULL) return TRUE;
2373 /* Handle compile errors */
2375 errptr -= (int)strlen(prefix[popts]);
2376 if (errptr > patlen) errptr = patlen;
2378 if (fromfile)
2380 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2381 "at offset %d: %s\n", count, fromtext, errptr, error);
2383 else
2385 if (count == 0)
2386 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2387 fromtext, errptr, error);
2388 else
2389 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2390 ordin(count), fromtext, errptr, error);
2393 return FALSE;
2398 /*************************************************
2399 * Read and compile a file of patterns *
2400 *************************************************/
2402 /* This is used for --filelist, --include-from, and --exclude-from.
2404 Arguments:
2405 name the name of the file; "-" is stdin
2406 patptr pointer to the pattern chain anchor
2407 patlastptr pointer to the last pattern pointer
2408 popts the process options to pass to pattern_compile()
2410 Returns: TRUE if all went well
2413 static BOOL
2414 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2416 int linenumber = 0;
2417 FILE *f;
2418 char *filename;
2419 char buffer[PATBUFSIZE];
2421 if (strcmp(name, "-") == 0)
2423 f = stdin;
2424 filename = stdin_name;
2426 else
2428 f = fopen(name, "r");
2429 if (f == NULL)
2431 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2432 return FALSE;
2434 filename = name;
2437 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2439 char *s = buffer + (int)strlen(buffer);
2440 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2441 *s = 0;
2442 linenumber++;
2443 if (buffer[0] == 0) continue; /* Skip blank lines */
2445 /* Note: this call to add_pattern() puts a pointer to the local variable
2446 "buffer" into the pattern chain. However, that pointer is used only when
2447 compiling the pattern, which happens immediately below, so we flatten it
2448 afterwards, as a precaution against any later code trying to use it. */
2450 *patlastptr = add_pattern(buffer, *patlastptr);
2451 if (*patlastptr == NULL) return FALSE;
2452 if (*patptr == NULL) *patptr = *patlastptr;
2454 /* This loop is needed because compiling a "pattern" when -F is set may add
2455 on additional literal patterns if the original contains a newline. In the
2456 common case, it never will, because fgets() stops at a newline. However,
2457 the -N option can be used to give pcregrep a different newline setting. */
2459 for(;;)
2461 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2462 linenumber))
2463 return FALSE;
2464 (*patlastptr)->string = NULL; /* Insurance */
2465 if ((*patlastptr)->next == NULL) break;
2466 *patlastptr = (*patlastptr)->next;
2470 if (f != stdin) fclose(f);
2471 return TRUE;
2476 /*************************************************
2477 * Main program *
2478 *************************************************/
2480 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2483 main(int argc, char **argv)
2485 int i, j;
2486 int rc = 1;
2487 BOOL only_one_at_top;
2488 patstr *cp;
2489 fnstr *fn;
2490 const char *locale_from = "--locale";
2491 const char *error;
2493 #ifdef SUPPORT_PCREGREP_JIT
2494 pcre_jit_stack *jit_stack = NULL;
2495 #endif
2497 /* Set the default line ending value from the default in the PCRE library;
2498 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2499 Note that the return values from pcre_config(), though derived from the ASCII
2500 codes, are the same in EBCDIC environments, so we must use the actual values
2501 rather than escapes such as as '\r'. */
2503 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2504 switch(i)
2506 default: newline = (char *)"lf"; break;
2507 case 13: newline = (char *)"cr"; break;
2508 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2509 case -1: newline = (char *)"any"; break;
2510 case -2: newline = (char *)"anycrlf"; break;
2513 /* Process the options */
2515 for (i = 1; i < argc; i++)
2517 option_item *op = NULL;
2518 char *option_data = (char *)""; /* default to keep compiler happy */
2519 BOOL longop;
2520 BOOL longopwasequals = FALSE;
2522 if (argv[i][0] != '-') break;
2524 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2525 but only if we have previously had -e or -f to define the patterns. */
2527 if (argv[i][1] == 0)
2529 if (pattern_files != NULL || patterns != NULL) break;
2530 else pcregrep_exit(usage(2));
2533 /* Handle a long name option, or -- to terminate the options */
2535 if (argv[i][1] == '-')
2537 char *arg = argv[i] + 2;
2538 char *argequals = strchr(arg, '=');
2540 if (*arg == 0) /* -- terminates options */
2542 i++;
2543 break; /* out of the options-handling loop */
2546 longop = TRUE;
2548 /* Some long options have data that follows after =, for example file=name.
2549 Some options have variations in the long name spelling: specifically, we
2550 allow "regexp" because GNU grep allows it, though I personally go along
2551 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2552 These options are entered in the table as "regex(p)". Options can be in
2553 both these categories. */
2555 for (op = optionlist; op->one_char != 0; op++)
2557 char *opbra = strchr(op->long_name, '(');
2558 char *equals = strchr(op->long_name, '=');
2560 /* Handle options with only one spelling of the name */
2562 if (opbra == NULL) /* Does not contain '(' */
2564 if (equals == NULL) /* Not thing=data case */
2566 if (strcmp(arg, op->long_name) == 0) break;
2568 else /* Special case xxx=data */
2570 int oplen = (int)(equals - op->long_name);
2571 int arglen = (argequals == NULL)?
2572 (int)strlen(arg) : (int)(argequals - arg);
2573 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2575 option_data = arg + arglen;
2576 if (*option_data == '=')
2578 option_data++;
2579 longopwasequals = TRUE;
2581 break;
2586 /* Handle options with an alternate spelling of the name */
2588 else
2590 char buff1[24];
2591 char buff2[24];
2593 int baselen = (int)(opbra - op->long_name);
2594 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2595 int arglen = (argequals == NULL || equals == NULL)?
2596 (int)strlen(arg) : (int)(argequals - arg);
2598 sprintf(buff1, "%.*s", baselen, op->long_name);
2599 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2601 if (strncmp(arg, buff1, arglen) == 0 ||
2602 strncmp(arg, buff2, arglen) == 0)
2604 if (equals != NULL && argequals != NULL)
2606 option_data = argequals;
2607 if (*option_data == '=')
2609 option_data++;
2610 longopwasequals = TRUE;
2613 break;
2618 if (op->one_char == 0)
2620 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2621 pcregrep_exit(usage(2));
2625 /* Jeffrey Friedl's debugging harness uses these additional options which
2626 are not in the right form for putting in the option table because they use
2627 only one hyphen, yet are more than one character long. By putting them
2628 separately here, they will not get displayed as part of the help() output,
2629 but I don't think Jeffrey will care about that. */
2631 #ifdef JFRIEDL_DEBUG
2632 else if (strcmp(argv[i], "-pre") == 0) {
2633 jfriedl_prefix = argv[++i];
2634 continue;
2635 } else if (strcmp(argv[i], "-post") == 0) {
2636 jfriedl_postfix = argv[++i];
2637 continue;
2638 } else if (strcmp(argv[i], "-XT") == 0) {
2639 sscanf(argv[++i], "%d", &jfriedl_XT);
2640 continue;
2641 } else if (strcmp(argv[i], "-XR") == 0) {
2642 sscanf(argv[++i], "%d", &jfriedl_XR);
2643 continue;
2645 #endif
2648 /* One-char options; many that have no data may be in a single argument; we
2649 continue till we hit the last one or one that needs data. */
2651 else
2653 char *s = argv[i] + 1;
2654 longop = FALSE;
2656 while (*s != 0)
2658 for (op = optionlist; op->one_char != 0; op++)
2660 if (*s == op->one_char) break;
2662 if (op->one_char == 0)
2664 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2665 *s, argv[i]);
2666 pcregrep_exit(usage(2));
2669 option_data = s+1;
2671 /* Break out if this is the last character in the string; it's handled
2672 below like a single multi-char option. */
2674 if (*option_data == 0) break;
2676 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2677 are used for ones that either have a numerical number or defaults, i.e.
2678 the data is optional. If a digit follows, there is data; if not, carry on
2679 with other single-character options in the same string. */
2681 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2683 if (isdigit((unsigned char)s[1])) break;
2685 else /* Check for an option with data */
2687 if (op->type != OP_NODATA) break;
2690 /* Handle a single-character option with no data, then loop for the
2691 next character in the string. */
2693 pcre_options = handle_option(*s++, pcre_options);
2697 /* At this point we should have op pointing to a matched option. If the type
2698 is NO_DATA, it means that there is no data, and the option might set
2699 something in the PCRE options. */
2701 if (op->type == OP_NODATA)
2703 pcre_options = handle_option(op->one_char, pcre_options);
2704 continue;
2707 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2708 either has a value or defaults to something. It cannot have data in a
2709 separate item. At the moment, the only such options are "colo(u)r",
2710 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2712 if (*option_data == 0 &&
2713 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2714 op->type == OP_OP_NUMBERS))
2716 switch (op->one_char)
2718 case N_COLOUR:
2719 colour_option = (char *)"auto";
2720 break;
2722 case 'o':
2723 only_matching_last = add_number(0, only_matching_last);
2724 if (only_matching == NULL) only_matching = only_matching_last;
2725 break;
2727 #ifdef JFRIEDL_DEBUG
2728 case 'S':
2729 S_arg = 0;
2730 break;
2731 #endif
2733 continue;
2736 /* Otherwise, find the data string for the option. */
2738 if (*option_data == 0)
2740 if (i >= argc - 1 || longopwasequals)
2742 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2743 pcregrep_exit(usage(2));
2745 option_data = argv[++i];
2748 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2749 added to a chain of numbers. */
2751 if (op->type == OP_OP_NUMBERS)
2753 unsigned long int n = decode_number(option_data, op, longop);
2754 omdatastr *omd = (omdatastr *)op->dataptr;
2755 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2756 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2759 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2760 include/exclude options, which can be called multiple times to create lists
2761 of patterns. */
2763 else if (op->type == OP_PATLIST)
2765 patdatastr *pd = (patdatastr *)op->dataptr;
2766 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2767 if (*(pd->lastptr) == NULL) goto EXIT2;
2768 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2771 /* If the option type is OP_FILELIST, it's one of the options that names a
2772 file. */
2774 else if (op->type == OP_FILELIST)
2776 fndatastr *fd = (fndatastr *)op->dataptr;
2777 fn = (fnstr *)malloc(sizeof(fnstr));
2778 if (fn == NULL)
2780 fprintf(stderr, "pcregrep: malloc failed\n");
2781 goto EXIT2;
2783 fn->next = NULL;
2784 fn->name = option_data;
2785 if (*(fd->anchor) == NULL)
2786 *(fd->anchor) = fn;
2787 else
2788 (*(fd->lastptr))->next = fn;
2789 *(fd->lastptr) = fn;
2792 /* Handle OP_BINARY_FILES */
2794 else if (op->type == OP_BINFILES)
2796 if (strcmp(option_data, "binary") == 0)
2797 binary_files = BIN_BINARY;
2798 else if (strcmp(option_data, "without-match") == 0)
2799 binary_files = BIN_NOMATCH;
2800 else if (strcmp(option_data, "text") == 0)
2801 binary_files = BIN_TEXT;
2802 else
2804 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2805 option_data);
2806 pcregrep_exit(usage(2));
2810 /* Otherwise, deal with a single string or numeric data value. */
2812 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2813 op->type != OP_OP_NUMBER)
2815 *((char **)op->dataptr) = option_data;
2817 else
2819 unsigned long int n = decode_number(option_data, op, longop);
2820 if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2821 else *((int *)op->dataptr) = n;
2825 /* Options have been decoded. If -C was used, its value is used as a default
2826 for -A and -B. */
2828 if (both_context > 0)
2830 if (after_context == 0) after_context = both_context;
2831 if (before_context == 0) before_context = both_context;
2834 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2835 However, all three set show_only_matching because they display, each in their
2836 own way, only the data that has matched. */
2838 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2839 (file_offsets && line_offsets))
2841 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2842 "and/or --line-offsets\n");
2843 pcregrep_exit(usage(2));
2846 if (only_matching != NULL || file_offsets || line_offsets)
2847 show_only_matching = TRUE;
2849 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2850 LC_ALL environment variable is set, and if so, use it. */
2852 if (locale == NULL)
2854 locale = getenv("LC_ALL");
2855 locale_from = "LCC_ALL";
2858 if (locale == NULL)
2860 locale = getenv("LC_CTYPE");
2861 locale_from = "LC_CTYPE";
2864 /* If a locale has been provided, set it, and generate the tables the PCRE
2865 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2867 if (locale != NULL)
2869 if (setlocale(LC_CTYPE, locale) == NULL)
2871 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2872 locale, locale_from);
2873 return 2;
2875 pcretables = pcre_maketables();
2878 /* Sort out colouring */
2880 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2882 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2883 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2884 else
2886 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2887 colour_option);
2888 return 2;
2890 if (do_colour)
2892 char *cs = getenv("PCREGREP_COLOUR");
2893 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2894 if (cs != NULL) colour_string = cs;
2898 /* Interpret the newline type; the default settings are Unix-like. */
2900 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2902 pcre_options |= PCRE_NEWLINE_CR;
2903 endlinetype = EL_CR;
2905 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2907 pcre_options |= PCRE_NEWLINE_LF;
2908 endlinetype = EL_LF;
2910 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2912 pcre_options |= PCRE_NEWLINE_CRLF;
2913 endlinetype = EL_CRLF;
2915 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2917 pcre_options |= PCRE_NEWLINE_ANY;
2918 endlinetype = EL_ANY;
2920 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2922 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2923 endlinetype = EL_ANYCRLF;
2925 else
2927 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2928 return 2;
2931 /* Interpret the text values for -d and -D */
2933 if (dee_option != NULL)
2935 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2936 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2937 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2938 else
2940 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2941 return 2;
2945 if (DEE_option != NULL)
2947 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2948 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2949 else
2951 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2952 return 2;
2956 /* Check the values for Jeffrey Friedl's debugging options. */
2958 #ifdef JFRIEDL_DEBUG
2959 if (S_arg > 9)
2961 fprintf(stderr, "pcregrep: bad value for -S option\n");
2962 return 2;
2964 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2966 if (jfriedl_XT == 0) jfriedl_XT = 1;
2967 if (jfriedl_XR == 0) jfriedl_XR = 1;
2969 #endif
2971 /* Get memory for the main buffer. */
2973 bufsize = 3*bufthird;
2974 main_buffer = (char *)malloc(bufsize);
2976 if (main_buffer == NULL)
2978 fprintf(stderr, "pcregrep: malloc failed\n");
2979 goto EXIT2;
2982 /* If no patterns were provided by -e, and there are no files provided by -f,
2983 the first argument is the one and only pattern, and it must exist. */
2985 if (patterns == NULL && pattern_files == NULL)
2987 if (i >= argc) return usage(2);
2988 patterns = patterns_last = add_pattern(argv[i++], NULL);
2989 if (patterns == NULL) goto EXIT2;
2992 /* Compile the patterns that were provided on the command line, either by
2993 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2994 after all the command-line options are read so that we know which PCRE options
2995 to use. When -F is used, compile_pattern() may add another block into the
2996 chain, so we must not access the next pointer till after the compile. */
2998 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3000 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3001 (j == 1 && patterns->next == NULL)? 0 : j))
3002 goto EXIT2;
3005 /* Read and compile the regular expressions that are provided in files. */
3007 for (fn = pattern_files; fn != NULL; fn = fn->next)
3009 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3010 goto EXIT2;
3013 /* Study the regular expressions, as we will be running them many times. If an
3014 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3015 returned, even if studying produces no data. */
3017 if (match_limit > 0 || match_limit_recursion > 0)
3018 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3020 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3022 #ifdef SUPPORT_PCREGREP_JIT
3023 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3024 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3025 #endif
3027 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3029 cp->hint = pcre_study(cp->compiled, study_options, &error);
3030 if (error != NULL)
3032 char s[16];
3033 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3034 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3035 goto EXIT2;
3037 #ifdef SUPPORT_PCREGREP_JIT
3038 if (jit_stack != NULL && cp->hint != NULL)
3039 pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3040 #endif
3043 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3044 pcre_extra block for each pattern. There will always be an extra block because
3045 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3047 for (cp = patterns; cp != NULL; cp = cp->next)
3049 if (match_limit > 0)
3051 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3052 cp->hint->match_limit = match_limit;
3055 if (match_limit_recursion > 0)
3057 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3058 cp->hint->match_limit_recursion = match_limit_recursion;
3062 /* If there are include or exclude patterns read from the command line, compile
3063 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3064 0. */
3066 for (j = 0; j < 4; j++)
3068 int k;
3069 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3071 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3072 (k == 1 && cp->next == NULL)? 0 : k))
3073 goto EXIT2;
3077 /* Read and compile include/exclude patterns from files. */
3079 for (fn = include_from; fn != NULL; fn = fn->next)
3081 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3082 goto EXIT2;
3085 for (fn = exclude_from; fn != NULL; fn = fn->next)
3087 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3088 goto EXIT2;
3091 /* If there are no files that contain lists of files to search, and there are
3092 no file arguments, search stdin, and then exit. */
3094 if (file_lists == NULL && i >= argc)
3096 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3097 (filenames > FN_DEFAULT)? stdin_name : NULL);
3098 goto EXIT;
3101 /* If any files that contains a list of files to search have been specified,
3102 read them line by line and search the given files. */
3104 for (fn = file_lists; fn != NULL; fn = fn->next)
3106 char buffer[PATBUFSIZE];
3107 FILE *fl;
3108 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3110 fl = fopen(fn->name, "rb");
3111 if (fl == NULL)
3113 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3114 strerror(errno));
3115 goto EXIT2;
3118 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3120 int frc;
3121 char *end = buffer + (int)strlen(buffer);
3122 while (end > buffer && isspace(end[-1])) end--;
3123 *end = 0;
3124 if (*buffer != 0)
3126 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3127 if (frc > 1) rc = frc;
3128 else if (frc == 0 && rc == 1) rc = 0;
3131 if (fl != stdin) fclose(fl);
3134 /* After handling file-list, work through remaining arguments. Pass in the fact
3135 that there is only one argument at top level - this suppresses the file name if
3136 the argument is not a directory and filenames are not otherwise forced. */
3138 only_one_at_top = i == argc - 1 && file_lists == NULL;
3140 for (; i < argc; i++)
3142 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3143 only_one_at_top);
3144 if (frc > 1) rc = frc;
3145 else if (frc == 0 && rc == 1) rc = 0;
3148 EXIT:
3149 #ifdef SUPPORT_PCREGREP_JIT
3150 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3151 #endif
3153 if (main_buffer != NULL) free(main_buffer);
3155 free_pattern_chain(patterns);
3156 free_pattern_chain(include_patterns);
3157 free_pattern_chain(include_dir_patterns);
3158 free_pattern_chain(exclude_patterns);
3159 free_pattern_chain(exclude_dir_patterns);
3161 free_file_chain(exclude_from);
3162 free_file_chain(include_from);
3163 free_file_chain(pattern_files);
3164 free_file_chain(file_lists);
3166 while (only_matching != NULL)
3168 omstr *this = only_matching;
3169 only_matching = this->next;
3170 free(this);
3173 pcregrep_exit(rc);
3175 EXIT2:
3176 rc = 2;
3177 goto EXIT;
3180 /* End of pcregrep */