csh: Stop ignoring -Wformat warnings.
[dragonfly.git] / contrib / grep / src / pcresearch.c
blob820dd0096b3161f77b5c546aa5508c4ea7ed605c
1 /* pcresearch.c - searching subroutines using PCRE for grep.
2 Copyright 2000, 2007, 2009-2014 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
19 /* Written August 1992 by Mike Haertel. */
21 #include <config.h>
22 #include "search.h"
23 #if HAVE_PCRE_H
24 # include <pcre.h>
25 #elif HAVE_PCRE_PCRE_H
26 # include <pcre/pcre.h>
27 #endif
29 #if HAVE_LIBPCRE
30 /* Compiled internal form of a Perl regular expression. */
31 static pcre *cre;
33 /* Additional information about the pattern. */
34 static pcre_extra *extra;
36 # ifdef PCRE_STUDY_JIT_COMPILE
37 static pcre_jit_stack *jit_stack;
38 # else
39 # define PCRE_STUDY_JIT_COMPILE 0
40 # endif
41 #endif
43 void
44 Pcompile (char const *pattern, size_t size)
46 #if !HAVE_LIBPCRE
47 error (EXIT_TROUBLE, 0, "%s",
48 _("support for the -P option is not compiled into "
49 "this --disable-perl-regexp binary"));
50 #else
51 int e;
52 char const *ep;
53 char *re = xnmalloc (4, size + 7);
54 int flags = (PCRE_MULTILINE
55 | (match_icase ? PCRE_CASELESS : 0)
56 | (using_utf8 () ? PCRE_UTF8 : 0));
57 char const *patlim = pattern + size;
58 char *n = re;
59 char const *p;
60 char const *pnul;
62 /* FIXME: Remove these restrictions. */
63 if (memchr (pattern, '\n', size))
64 error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
66 *n = '\0';
67 if (match_lines)
68 strcpy (n, "^(?:");
69 if (match_words)
70 strcpy (n, "(?<!\\w)(?:");
71 n += strlen (n);
73 /* The PCRE interface doesn't allow NUL bytes in the pattern, so
74 replace each NUL byte in the pattern with the four characters
75 "\000", removing a preceding backslash if there are an odd
76 number of backslashes before the NUL.
78 FIXME: This method does not work with some multibyte character
79 encodings, notably Shift-JIS, where a multibyte character can end
80 in a backslash byte. */
81 for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
83 memcpy (n, p, pnul - p);
84 n += pnul - p;
85 for (p = pnul; pattern < p && p[-1] == '\\'; p--)
86 continue;
87 n -= (pnul - p) & 1;
88 strcpy (n, "\\000");
89 n += 4;
92 memcpy (n, p, patlim - p);
93 n += patlim - p;
94 *n = '\0';
95 if (match_words)
96 strcpy (n, ")(?!\\w)");
97 if (match_lines)
98 strcpy (n, ")$");
100 cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
101 if (!cre)
102 error (EXIT_TROUBLE, 0, "%s", ep);
104 extra = pcre_study (cre, PCRE_STUDY_JIT_COMPILE, &ep);
105 if (ep)
106 error (EXIT_TROUBLE, 0, "%s", ep);
108 # if PCRE_STUDY_JIT_COMPILE
109 if (pcre_fullinfo (cre, extra, PCRE_INFO_JIT, &e))
110 error (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
112 if (e)
114 /* A 32K stack is allocated for the machine code by default, which
115 can grow to 512K if necessary. Since JIT uses far less memory
116 than the interpreter, this should be enough in practice. */
117 jit_stack = pcre_jit_stack_alloc (32 * 1024, 512 * 1024);
118 if (!jit_stack)
119 error (EXIT_TROUBLE, 0,
120 _("failed to allocate memory for the PCRE JIT stack"));
121 pcre_assign_jit_stack (extra, NULL, jit_stack);
123 # endif
124 free (re);
125 #endif /* HAVE_LIBPCRE */
128 size_t
129 Pexecute (char const *buf, size_t size, size_t *match_size,
130 char const *start_ptr)
132 #if !HAVE_LIBPCRE
133 /* We can't get here, because Pcompile would have been called earlier. */
134 error (EXIT_TROUBLE, 0, _("internal error"));
135 return -1;
136 #else
137 /* This array must have at least two elements; everything after that
138 is just for performance improvement in pcre_exec. */
139 int sub[300];
141 const char *line_buf, *line_end, *line_next;
142 int e = PCRE_ERROR_NOMATCH;
143 ptrdiff_t start_ofs = start_ptr ? start_ptr - buf : 0;
145 /* PCRE can't limit the matching to single lines, therefore we have to
146 match each line in the buffer separately. */
147 for (line_next = buf;
148 e == PCRE_ERROR_NOMATCH && line_next < buf + size;
149 start_ofs -= line_next - line_buf)
151 line_buf = line_next;
152 line_end = memchr (line_buf, eolbyte, (buf + size) - line_buf);
153 if (line_end == NULL)
154 line_next = line_end = buf + size;
155 else
156 line_next = line_end + 1;
158 if (start_ptr && start_ptr >= line_end)
159 continue;
161 if (INT_MAX < line_end - line_buf)
162 error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
164 e = pcre_exec (cre, extra, line_buf, line_end - line_buf,
165 start_ofs < 0 ? 0 : start_ofs, 0,
166 sub, sizeof sub / sizeof *sub);
169 if (e <= 0)
171 switch (e)
173 case PCRE_ERROR_NOMATCH:
174 return -1;
176 case PCRE_ERROR_NOMEMORY:
177 error (EXIT_TROUBLE, 0, _("memory exhausted"));
179 case PCRE_ERROR_MATCHLIMIT:
180 error (EXIT_TROUBLE, 0,
181 _("exceeded PCRE's backtracking limit"));
183 case PCRE_ERROR_BADUTF8:
184 error (EXIT_TROUBLE, 0,
185 _("invalid UTF-8 byte sequence in input"));
187 default:
188 /* For now, we lump all remaining PCRE failures into this basket.
189 If anyone cares to provide sample grep usage that can trigger
190 particular PCRE errors, we can add to the list (above) of more
191 detailed diagnostics. */
192 error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e);
195 /* NOTREACHED */
196 return -1;
198 else
200 /* Narrow down to the line we've found. */
201 char const *beg = line_buf + sub[0];
202 char const *end = line_buf + sub[1];
203 char const *buflim = buf + size;
204 char eol = eolbyte;
205 if (!start_ptr)
207 /* FIXME: The case when '\n' is not found indicates a bug:
208 Since grep is line oriented, the match should never contain
209 a newline, so there _must_ be a newline following.
211 if (!(end = memchr (end, eol, buflim - end)))
212 end = buflim;
213 else
214 end++;
215 while (buf < beg && beg[-1] != eol)
216 --beg;
219 *match_size = end - beg;
220 return beg - buf;
222 #endif