3 MPDM - Minimum Profit Data Manager
4 Copyright (C) 2003/2007 Angel Ortega <angel@triptico.com>
6 mpdm_r.c - Regular expressions
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 http://www.triptico.com
36 #include <pcreposix.h>
39 #ifdef CONFOPT_SYSTEM_REGEX
43 #ifdef CONFOPT_INCLUDED_REGEX
44 #include "gnu_regex.h"
51 /* matching of the last regex */
53 int mpdm_regex_offset
= -1;
54 int mpdm_regex_size
= 0;
56 /* number of substitutions in last sregex */
58 int mpdm_sregex_count
= 0;
64 static wchar_t *regex_flags(const mpdm_t r
)
66 return wcsrchr((wchar_t *) r
->data
, *(wchar_t *) r
->data
);
70 static mpdm_t
mpdm_regcomp(mpdm_t r
)
73 mpdm_t regex_cache
= NULL
;
75 /* if cache does not exist, create it */
76 if ((regex_cache
= mpdm_hget_s(mpdm_root(), L
"__REGEX_CACHE__")) == NULL
) {
77 regex_cache
= MPDM_H(0);
78 mpdm_hset_s(mpdm_root(), L
"__REGEX_CACHE__", regex_cache
);
81 /* search the regex in the cache */
82 if ((c
= mpdm_hget(regex_cache
, r
)) == NULL
) {
89 /* not found; regex must be compiled */
92 rmb
= MPDM_2MBS(r
->data
);
94 regex
= (char *) rmb
->data
;
95 if ((flags
= strrchr(regex
, *regex
)) == NULL
)
98 if (strchr(flags
, 'i') != NULL
)
100 if (strchr(flags
, 'm') != NULL
)
106 if (!regcomp(&re
, regex
, f
)) {
109 if ((ptr
= malloc(sizeof(regex_t
))) != NULL
) {
111 memcpy(ptr
, &re
, sizeof(regex_t
));
114 c
= mpdm_new(MPDM_FREE
, ptr
, sizeof(regex_t
));
117 mpdm_hset(regex_cache
, r
, c
);
127 * mpdm_regex - Matches a regular expression.
128 * @r: the regular expression
129 * @v: the value to be matched
130 * @offset: offset from the start of v->data
132 * Matches a regular expression against a value. Valid flags are 'i',
133 * for case-insensitive matching, 'm', to treat the string as a
134 * multiline string (i.e., one containing newline characters), so
135 * that ^ and $ match the boundaries of each line instead of the
136 * whole string, or 'l', to return the last matching instead of
139 * If @r is a string, an ordinary regular expression matching is tried
140 * over the @v string. If the matching is possible, the matched string
141 * is returned, or NULL otherwise.
143 * If @r is an array (of strings), each element is tried sequentially
144 * as an individual regular expression over the @v string, each one using
145 * the offset returned by the previous match. All regular expressions
146 * must match to be successful. If this is the case, an array (with
147 * the same number of arguments) is returned containing the matched
148 * strings, or NULL otherwise.
150 * If @r is NULL, the result of the previous regex matching
151 * is returned as a two element array. The first element will contain
152 * the character offset of the matching and the second the number of
153 * characters matched. If the previous regex was unsuccessful, NULL
155 * [Regular Expressions]
157 mpdm_t
mpdm_regex(mpdm_t r
, const mpdm_t v
, int offset
)
162 /* special case: if r is NULL, return previous match */
164 /* if previous regex was successful... */
165 if (mpdm_regex_offset
!= -1) {
168 mpdm_aset(w
, MPDM_I(mpdm_regex_offset
), 0);
169 mpdm_aset(w
, MPDM_I(mpdm_regex_size
), 1);
175 /* if the string to be tested is NULL, return NULL */
179 if (r
->flags
& MPDM_MULTIPLE
) {
182 /* multiple value; try sequentially all regexes,
183 moving the offset forward */
185 w
= MPDM_A(mpdm_size(r
));
187 for (n
= 0; n
< mpdm_size(r
); n
++) {
188 t
= mpdm_regex(mpdm_aget(r
, n
), v
, offset
);
190 /* if not found, invalid all search and exit */
196 /* found; store and move forward */
198 offset
= mpdm_regex_offset
+ mpdm_regex_size
;
204 /* single value; really do the regex */
206 /* no matching yet */
207 mpdm_regex_offset
= -1;
209 /* compile the regex */
210 if ((cr
= mpdm_regcomp(r
)) != NULL
) {
217 /* takes pointer to 'last' flag */
218 if ((last
= regex_flags(r
)) != NULL
)
219 last
= wcschr(last
, 'l');
222 ptr
= mpdm_wcstombs((wchar_t *) v
->data
+ offset
, NULL
);
225 while (regexec((regex_t
*) cr
->data
, ptr
+ o
, 1,
226 &rm
, offset
> 0 ? REG_NOTBOL
: 0) == 0) {
229 /* if 'last' is not set, it's done */
239 /* converts to mbs the string from the beginning
240 to the start of the match, just to know
241 the size (and immediately frees it) */
242 free(mpdm_mbstowcs(ptr
, &mpdm_regex_offset
, rm
.rm_so
));
245 mpdm_regex_offset
+= offset
;
247 /* create now the matching string */
248 w
= MPDM_NMBS(ptr
+ rm
.rm_so
, rm
.rm_eo
- rm
.rm_so
);
250 /* and store the size */
251 mpdm_regex_size
= mpdm_size(w
);
262 static mpdm_t
expand_ampersands(const mpdm_t s
, const mpdm_t t
)
263 /* substitutes all unescaped ampersands in s with t */
265 const wchar_t *sptr
= mpdm_string(s
);
269 if ((wptr
= wcschr(sptr
, L
'&')) != NULL
) {
273 while (wptr
!= NULL
) {
277 if (n
&& *(wptr
- 1) == '\\') {
278 /* is it escaped? avoid the \ */
281 /* and set the substitution string to & */
285 /* add the leading part */
286 r
= mpdm_strcat(r
, MPDM_NS(sptr
, n
));
288 /* now add the substitution string */
289 r
= mpdm_strcat(r
, t2
);
292 wptr
= wcschr(sptr
, L
'&');
295 /* add the rest of the string */
296 r
= mpdm_strcat(r
, MPDM_S(sptr
));
304 * mpdm_sregex - Matches and substitutes a regular expression.
305 * @r: the regular expression
306 * @v: the value to be matched
307 * @s: the substitution string, hash or code
308 * @offset: offset from the start of v->data
310 * Matches a regular expression against a value, and substitutes the
311 * found substring with @s. Valid flags are 'i', for case-insensitive
312 * matching, and 'g', for global replacements (all ocurrences in @v
313 * will be replaced, instead of just the first found one).
315 * If @s is executable, it's executed with the matched part as
316 * the only argument and its return value is used as the
317 * substitution string.
319 * If @s is a hash, the matched string is used as a key to it and
320 * its value used as the substitution.
322 * If @r is NULL, returns the number of substitutions made in the
323 * previous call to mpdm_sregex() (can be zero if none was done).
325 * The global variables @mpdm_regex_offset and @mpdm_regex_size are
326 * set to the offset of the matched string and the size of the
327 * replaced string, respectively.
329 * Returns the modified string, or the original one if no substitutions
331 * [Regular Expressions]
333 mpdm_t
mpdm_sregex(mpdm_t r
, const mpdm_t v
, const mpdm_t s
, int offset
)
339 /* return last count */
340 return MPDM_I(mpdm_sregex_count
);
346 /* compile the regex */
347 if ((cr
= mpdm_regcomp(r
)) != NULL
) {
353 /* takes pointer to global flag */
354 if ((global
= regex_flags(r
)) !=NULL
)
355 global
= wcschr(global
, 'g');
357 /* store the first part */
358 o
= MPDM_NS(v
->data
, offset
);
361 if ((ptr
= mpdm_wcstombs((wchar_t *) v
->data
+ offset
, NULL
)) == NULL
)
365 mpdm_sregex_count
= 0;
366 mpdm_regex_offset
= -1;
372 f
= !regexec((regex_t
*) cr
->data
, ptr
+ i
,
373 1, &rm
, offset
> 0 ? REG_NOTBOL
: 0);
376 /* creates a string from the beginning
377 to the start of the match */
378 t
= MPDM_NMBS(ptr
+ i
, rm
.rm_so
);
379 o
= mpdm_strcat(o
, t
);
381 /* store offset of substitution */
382 mpdm_regex_offset
= mpdm_size(t
) + offset
;
384 /* get the matched part */
385 t
= MPDM_NMBS(ptr
+ i
+ rm
.rm_so
, rm
.rm_eo
- rm
.rm_so
);
387 /* is s an executable value? */
388 if (MPDM_IS_EXEC(s
)) {
389 /* protect o from sweeping */
392 /* execute s, with t as argument */
393 t
= mpdm_exec_1(s
, t
);
398 /* is s a hash? use match as key */
402 t
= expand_ampersands(s
, t
);
404 /* appends the substitution string */
405 o
= mpdm_strcat(o
, t
);
407 /* store size of substitution */
408 mpdm_regex_size
= mpdm_size(t
);
412 /* one more substitution */
416 } while (f
&& global
);
418 /* no (more) matches; convert and append the rest */
419 t
= MPDM_MBS(ptr
+ i
);
420 o
= mpdm_strcat(o
, t
);