1 /* Construct a regular expression from a literal string.
2 Copyright (C) 1995, 2010-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2010.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
21 #include "regex-quote.h"
25 #if GNULIB_MCEL_PREFER
33 /* Characters that are special in a BRE. */
34 static const char bre_special
[] = "$^.*[\\";
36 /* Characters that are special in an ERE. */
37 static const char ere_special
[] = "$^.*[\\+?{()|";
39 struct regex_quote_spec
40 regex_quote_spec_posix (int cflags
, bool anchored
)
42 struct regex_quote_spec result
;
44 strcpy (result
.special
, cflags
!= 0 ? ere_special
: bre_special
);
45 result
.multibyte
= true;
46 result
.anchored
= anchored
;
51 /* Syntax bit values, defined in GNU <regex.h>. We don't include it here,
52 otherwise this module would need to depend on gnulib module 'regex'. */
53 #define RE_BK_PLUS_QM 0x00000002
54 #define RE_INTERVALS 0x00000200
55 #define RE_LIMITED_OPS 0x00000400
56 #define RE_NEWLINE_ALT 0x00000800
57 #define RE_NO_BK_BRACES 0x00001000
58 #define RE_NO_BK_PARENS 0x00002000
59 #define RE_NO_BK_VBAR 0x00008000
61 struct regex_quote_spec
62 regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax
, bool anchored
)
64 struct regex_quote_spec result
;
68 memcpy (p
, bre_special
, sizeof (bre_special
) - 1);
69 p
+= sizeof (bre_special
) - 1;
70 if ((syntax
& RE_LIMITED_OPS
) == 0 && (syntax
& RE_BK_PLUS_QM
) == 0)
75 if ((syntax
& RE_INTERVALS
) != 0 && (syntax
& RE_NO_BK_BRACES
) != 0)
80 if ((syntax
& RE_NO_BK_PARENS
) != 0)
85 if ((syntax
& RE_LIMITED_OPS
) == 0 && (syntax
& RE_NO_BK_VBAR
) != 0)
87 if ((syntax
& RE_NEWLINE_ALT
) != 0)
91 result
.multibyte
= true;
92 result
.anchored
= anchored
;
97 /* Characters that are special in a PCRE. */
98 static const char pcre_special
[] = "$^.*[]\\+?{}()|";
100 /* Options bit values, defined in <pcre.h>. We don't include it here, because
101 it is not a standard header. */
102 #define PCRE_ANCHORED 0x00000010
103 #define PCRE_EXTENDED 0x00000008
105 struct regex_quote_spec
106 regex_quote_spec_pcre (int options
, bool anchored
)
108 struct regex_quote_spec result
;
112 memcpy (p
, pcre_special
, sizeof (pcre_special
) - 1);
113 p
+= sizeof (pcre_special
) - 1;
114 if (options
& PCRE_EXTENDED
)
126 /* PCRE regular expressions consist of UTF-8 characters of options contains
127 PCRE_UTF8 and of single bytes otherwise. */
128 result
.multibyte
= false;
129 /* If options contains PCRE_ANCHORED, the anchoring is implicit. */
130 result
.anchored
= (options
& PCRE_ANCHORED
? 0 : anchored
);
136 regex_quote_length (const char *string
, const struct regex_quote_spec
*spec
)
138 const char *special
= spec
->special
;
143 length
+= 2; /* for '^' at the beginning and '$' at the end */
146 #if GNULIB_MCEL_PREFER
147 char const *iter
= string
;
148 for (mcel_t g
; *iter
; iter
+= g
.len
)
150 g
= mcel_scanz (iter
);
151 /* We know that special contains only ASCII characters. */
152 length
+= g
.len
== 1 && strchr (special
, *iter
);
154 length
+= iter
- string
;
156 mbui_iterator_t iter
;
158 for (mbui_init (iter
, string
); mbui_avail (iter
); mbui_advance (iter
))
160 /* We know that special contains only ASCII characters. */
161 if (mb_len (mbui_cur (iter
)) == 1
162 && strchr (special
, * mbui_cur_ptr (iter
)))
164 length
+= mb_len (mbui_cur (iter
));
172 for (iter
= string
; *iter
!= '\0'; iter
++)
174 if (strchr (special
, *iter
))
184 regex_quote_copy (char *p
, const char *string
, const struct regex_quote_spec
*spec
)
186 const char *special
= spec
->special
;
192 #if GNULIB_MCEL_PREFER
193 for (char const *iter
= string
; *iter
; )
195 mcel_t g
= mcel_scanz (iter
);
197 /* We know that special contains only ASCII characters. */
198 p
+= g
.len
== 1 && strchr (special
, *iter
);
199 p
= mempcpy (p
, iter
, g
.len
);
203 mbui_iterator_t iter
;
205 for (mbui_init (iter
, string
); mbui_avail (iter
); mbui_advance (iter
))
207 /* We know that special contains only ASCII characters. */
208 if (mb_len (mbui_cur (iter
)) == 1
209 && strchr (special
, * mbui_cur_ptr (iter
)))
211 memcpy (p
, mbui_cur_ptr (iter
), mb_len (mbui_cur (iter
)));
212 p
+= mb_len (mbui_cur (iter
));
220 for (iter
= string
; *iter
!= '\0'; iter
++)
222 if (strchr (special
, *iter
))
234 regex_quote (const char *string
, const struct regex_quote_spec
*spec
)
236 size_t length
= regex_quote_length (string
, spec
);
237 char *result
= XNMALLOC (length
+ 1, char);
241 p
= regex_quote_copy (p
, string
, spec
);