exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / regex-quote.c
blob3ed239ebbf665923b2b8ea47684efd7b9dc600b1
1 /* Construct a regular expression from a literal string.
2 Copyright (C) 1995, 2010-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2010.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 #include <config.h>
20 /* Specification. */
21 #include "regex-quote.h"
23 #include <string.h>
25 #if GNULIB_MCEL_PREFER
26 # include "mcel.h"
27 #else
28 # include "mbuiter.h"
29 #endif
31 #include "xalloc.h"
33 /* Characters that are special in a BRE. */
34 static const char bre_special[] = "$^.*[\\";
36 /* Characters that are special in an ERE. */
37 static const char ere_special[] = "$^.*[\\+?{()|";
39 struct regex_quote_spec
40 regex_quote_spec_posix (int cflags, bool anchored)
42 struct regex_quote_spec result;
44 strcpy (result.special, cflags != 0 ? ere_special : bre_special);
45 result.multibyte = true;
46 result.anchored = anchored;
48 return result;
51 /* Syntax bit values, defined in GNU <regex.h>. We don't include it here,
52 otherwise this module would need to depend on gnulib module 'regex'. */
53 #define RE_BK_PLUS_QM 0x00000002
54 #define RE_INTERVALS 0x00000200
55 #define RE_LIMITED_OPS 0x00000400
56 #define RE_NEWLINE_ALT 0x00000800
57 #define RE_NO_BK_BRACES 0x00001000
58 #define RE_NO_BK_PARENS 0x00002000
59 #define RE_NO_BK_VBAR 0x00008000
61 struct regex_quote_spec
62 regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored)
64 struct regex_quote_spec result;
65 char *p;
67 p = result.special;
68 memcpy (p, bre_special, sizeof (bre_special) - 1);
69 p += sizeof (bre_special) - 1;
70 if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_BK_PLUS_QM) == 0)
72 *p++ = '+';
73 *p++ = '?';
75 if ((syntax & RE_INTERVALS) != 0 && (syntax & RE_NO_BK_BRACES) != 0)
77 *p++ = '{';
78 *p++ = '}';
80 if ((syntax & RE_NO_BK_PARENS) != 0)
82 *p++ = '(';
83 *p++ = ')';
85 if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_NO_BK_VBAR) != 0)
86 *p++ = '|';
87 if ((syntax & RE_NEWLINE_ALT) != 0)
88 *p++ = '\n';
89 *p = '\0';
91 result.multibyte = true;
92 result.anchored = anchored;
94 return result;
97 /* Characters that are special in a PCRE. */
98 static const char pcre_special[] = "$^.*[]\\+?{}()|";
100 /* Options bit values, defined in <pcre.h>. We don't include it here, because
101 it is not a standard header. */
102 #define PCRE_ANCHORED 0x00000010
103 #define PCRE_EXTENDED 0x00000008
105 struct regex_quote_spec
106 regex_quote_spec_pcre (int options, bool anchored)
108 struct regex_quote_spec result;
109 char *p;
111 p = result.special;
112 memcpy (p, pcre_special, sizeof (pcre_special) - 1);
113 p += sizeof (pcre_special) - 1;
114 if (options & PCRE_EXTENDED)
116 *p++ = ' ';
117 *p++ = '\t';
118 *p++ = '\n';
119 *p++ = '\v';
120 *p++ = '\f';
121 *p++ = '\r';
122 *p++ = '#';
124 *p = '\0';
126 /* PCRE regular expressions consist of UTF-8 characters of options contains
127 PCRE_UTF8 and of single bytes otherwise. */
128 result.multibyte = false;
129 /* If options contains PCRE_ANCHORED, the anchoring is implicit. */
130 result.anchored = (options & PCRE_ANCHORED ? 0 : anchored);
132 return result;
135 size_t
136 regex_quote_length (const char *string, const struct regex_quote_spec *spec)
138 const char *special = spec->special;
139 size_t length;
141 length = 0;
142 if (spec->anchored)
143 length += 2; /* for '^' at the beginning and '$' at the end */
144 if (spec->multibyte)
146 #if GNULIB_MCEL_PREFER
147 char const *iter = string;
148 for (mcel_t g; *iter; iter += g.len)
150 g = mcel_scanz (iter);
151 /* We know that special contains only ASCII characters. */
152 length += g.len == 1 && strchr (special, *iter);
154 length += iter - string;
155 #else
156 mbui_iterator_t iter;
158 for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
160 /* We know that special contains only ASCII characters. */
161 if (mb_len (mbui_cur (iter)) == 1
162 && strchr (special, * mbui_cur_ptr (iter)))
163 length += 1;
164 length += mb_len (mbui_cur (iter));
166 #endif
168 else
170 const char *iter;
172 for (iter = string; *iter != '\0'; iter++)
174 if (strchr (special, *iter))
175 length += 1;
176 length += 1;
180 return length;
183 char *
184 regex_quote_copy (char *p, const char *string, const struct regex_quote_spec *spec)
186 const char *special = spec->special;
188 if (spec->anchored)
189 *p++ = '^';
190 if (spec->multibyte)
192 #if GNULIB_MCEL_PREFER
193 for (char const *iter = string; *iter; )
195 mcel_t g = mcel_scanz (iter);
196 *p = '\\';
197 /* We know that special contains only ASCII characters. */
198 p += g.len == 1 && strchr (special, *iter);
199 p = mempcpy (p, iter, g.len);
200 iter += g.len;
202 #else
203 mbui_iterator_t iter;
205 for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
207 /* We know that special contains only ASCII characters. */
208 if (mb_len (mbui_cur (iter)) == 1
209 && strchr (special, * mbui_cur_ptr (iter)))
210 *p++ = '\\';
211 memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
212 p += mb_len (mbui_cur (iter));
214 #endif
216 else
218 const char *iter;
220 for (iter = string; *iter != '\0'; iter++)
222 if (strchr (special, *iter))
223 *p++ = '\\';
224 *p++ = *iter++;
227 if (spec->anchored)
228 *p++ = '$';
230 return p;
233 char *
234 regex_quote (const char *string, const struct regex_quote_spec *spec)
236 size_t length = regex_quote_length (string, spec);
237 char *result = XNMALLOC (length + 1, char);
238 char *p;
240 p = result;
241 p = regex_quote_copy (p, string, spec);
242 *p = '\0';
243 return result;