3 MPDM - Minimum Profit Data Manager
4 Copyright (C) 2003/2010 Angel Ortega <angel@triptico.com>
6 mpdm_r.c - Regular expressions
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 http://www.triptico.com
36 #include <pcreposix.h>
39 #ifdef CONFOPT_SYSTEM_REGEX
43 #ifdef CONFOPT_INCLUDED_REGEX
44 #include "gnu_regex.h"
50 /* matching of the last regex */
52 int mpdm_regex_offset
= -1;
53 int mpdm_regex_size
= 0;
55 /* number of substitutions in last sregex */
57 int mpdm_sregex_count
= 0;
62 static wchar_t *regex_flags(const mpdm_t r
)
64 wchar_t *ptr
= mpdm_string(r
);
65 return wcsrchr(ptr
, *ptr
);
69 static mpdm_t
mpdm_regcomp(mpdm_t r
)
72 mpdm_t regex_cache
= NULL
;
76 /* if cache does not exist, create it */
78 mpdm_hget_s(mpdm_root(), L
"__REGEX_CACHE__")) == NULL
) {
79 regex_cache
= MPDM_H(0);
80 mpdm_hset_s(mpdm_root(), L
"__REGEX_CACHE__", regex_cache
);
83 /* search the regex in the cache */
84 if ((c
= mpdm_hget(regex_cache
, r
)) == NULL
) {
91 /* not found; regex must be compiled */
94 rmb
= mpdm_ref(MPDM_2MBS(r
->data
));
95 regex
= (char *) rmb
->data
;
97 if ((flags
= strrchr(regex
, *regex
)) != NULL
) {
99 if (strchr(flags
, 'i') != NULL
)
101 if (strchr(flags
, 'm') != NULL
)
107 if (!regcomp(&re
, regex
, f
)) {
108 c
= MPDM_C(MPDM_REGEX
, &re
, sizeof(regex_t
));
109 mpdm_hset(regex_cache
, r
, c
);
122 static mpdm_t
regex1(mpdm_t r
, const mpdm_t v
, int offset
)
123 /* test for one regex */
131 /* no matching yet */
132 mpdm_regex_offset
= -1;
134 /* compile the regex */
135 if ((cr
= mpdm_regcomp(r
)) != NULL
) {
142 /* takes pointer to 'last' flag */
143 if ((last
= regex_flags(r
)) != NULL
)
144 last
= wcschr(last
, 'l');
147 ptr
= mpdm_wcstombs((wchar_t *) mpdm_string(v
) + offset
, NULL
);
150 while (regexec((regex_t
*) cr
->data
, ptr
+ o
, 1,
151 &rm
, offset
> 0 ? REG_NOTBOL
: 0) == 0) {
154 /* if 'last' is not set, it's done */
164 /* converts to mbs the string from the beginning
165 to the start of the match, just to know
166 the size (and immediately frees it) */
167 free(mpdm_mbstowcs(ptr
, &mpdm_regex_offset
, rm
.rm_so
));
170 mpdm_regex_offset
+= offset
;
172 /* create now the matching string */
173 w
= MPDM_NMBS(ptr
+ rm
.rm_so
, rm
.rm_eo
- rm
.rm_so
);
175 /* and store the size */
176 mpdm_regex_size
= mpdm_size(w
);
190 * mpdm_regex - Matches a regular expression.
191 * @v: the value to be matched
192 * @r: the regular expression
193 * @offset: offset from the start of v->data
195 * Matches a regular expression against a value. Valid flags are 'i',
196 * for case-insensitive matching, 'm', to treat the string as a
197 * multiline string (i.e., one containing newline characters), so
198 * that ^ and $ match the boundaries of each line instead of the
199 * whole string, 'l', to return the last matching instead of the
200 * first one, or 'g', to match globally; in that last case, an array
201 * containing all matches is returned instead of a string scalar.
203 * If @r is a string, an ordinary regular expression matching is tried
204 * over the @v string. If the matching is possible, the match result
205 * is returned, or NULL otherwise.
207 * If @r is an array (of strings), each element is tried sequentially
208 * as an individual regular expression over the @v string, each one using
209 * the offset returned by the previous match. All regular expressions
210 * must match to be successful. If this is the case, an array (with
211 * the same number of arguments) is returned containing the matched
212 * strings, or NULL otherwise.
214 * If @r is NULL, the result of the previous regex matching
215 * is returned as a two element array. The first element will contain
216 * the character offset of the matching and the second the number of
217 * characters matched. If the previous regex was unsuccessful, NULL
219 * [Regular Expressions]
221 mpdm_t
mpdm_regex(const mpdm_t v
, const mpdm_t r
, int offset
)
228 /* special case: if r is NULL, return previous match */
230 /* if previous regex was successful... */
231 if (mpdm_regex_offset
!= -1) {
235 mpdm_aset(w
, MPDM_I(mpdm_regex_offset
), 0);
236 mpdm_aset(w
, MPDM_I(mpdm_regex_size
), 1);
242 if (r
->flags
& MPDM_MULTIPLE
) {
246 /* multiple value; try sequentially all regexes,
247 moving the offset forward */
252 for (n
= 0; n
< mpdm_size(r
); n
++) {
253 t
= mpdm_regex(v
, mpdm_aget(r
, n
), offset
);
258 /* found; store and move forward */
260 offset
= mpdm_regex_offset
+ mpdm_regex_size
;
268 /* takes pointer to 'global' flag */
269 if ((global
= regex_flags(r
)) !=NULL
)
270 global
= wcschr(global
, 'g');
275 /* match sequentially until done */
279 while ((t
= regex1(r
, v
, offset
)) != NULL
) {
282 offset
= mpdm_regex_offset
+ mpdm_regex_size
;
288 w
= regex1(r
, v
, offset
);
299 static mpdm_t
expand_ampersands(const mpdm_t s
, const mpdm_t t
)
300 /* substitutes all unescaped ampersands in s with t */
302 const wchar_t *sptr
= mpdm_string(s
);
304 wchar_t *optr
= NULL
;
312 while ((wptr
= wcschr(sptr
, L
'\\')) != NULL
||
313 (wptr
= wcschr(sptr
, L
'&')) != NULL
) {
316 /* add the leading part */
317 optr
= mpdm_pokewsn(optr
, &osize
, sptr
, n
);
319 if (*wptr
== L
'\\') {
320 if (*(wptr
+ 1) == L
'&' || *(wptr
+ 1) == L
'\\')
323 optr
= mpdm_pokewsn(optr
, &osize
, wptr
, 1);
327 optr
= mpdm_pokev(optr
, &osize
, t
);
332 /* add the rest of the string */
333 optr
= mpdm_pokews(optr
, &osize
, sptr
);
334 optr
= mpdm_pokewsn(optr
, &osize
, L
"", 1);
335 r
= MPDM_ENS(optr
, osize
- 1);
346 * mpdm_sregex - Matches and substitutes a regular expression.
347 * @v: the value to be matched
348 * @r: the regular expression
349 * @s: the substitution string, hash or code
350 * @offset: offset from the start of v->data
352 * Matches a regular expression against a value, and substitutes the
353 * found substring with @s. Valid flags are 'i', for case-insensitive
354 * matching, and 'g', for global replacements (all ocurrences in @v
355 * will be replaced, instead of just the first found one).
357 * If @s is executable, it's executed with the matched part as
358 * the only argument and its return value is used as the
359 * substitution string.
361 * If @s is a hash, the matched string is used as a key to it and
362 * its value used as the substitution. If this value itself is
363 * executable, it's executed with the matched string as its only
364 * argument and its return value used as the substitution.
366 * If @r is NULL, returns the number of substitutions made in the
367 * previous call to mpdm_sregex() (can be zero if none was done).
369 * The global variables @mpdm_regex_offset and @mpdm_regex_size are
370 * set to the offset of the matched string and the size of the
371 * replaced string, respectively.
373 * Always returns a new string (either modified or an exact copy).
374 * [Regular Expressions]
376 mpdm_t
mpdm_sregex(mpdm_t v
, const mpdm_t r
, const mpdm_t s
, int offset
)
379 wchar_t *optr
= NULL
;
388 /* return last count */
389 o
= MPDM_I(mpdm_sregex_count
);
393 /* compile the regex */
394 if ((cr
= mpdm_regcomp(r
)) != NULL
) {
400 /* takes pointer to global flag */
401 if ((global
= regex_flags(r
)) !=NULL
)
402 global
= wcschr(global
, 'g');
404 /* store the first part */
405 optr
= mpdm_pokewsn(optr
, &osize
, v
->data
, offset
);
408 ptr
= mpdm_wcstombs((wchar_t *) v
->data
+ offset
, NULL
);
411 mpdm_sregex_count
= 0;
412 mpdm_regex_offset
= -1;
418 f
= !regexec((regex_t
*) cr
->data
, ptr
+ i
,
419 1, &rm
, offset
> 0 ? REG_NOTBOL
: 0);
422 /* creates a string from the beginning
423 to the start of the match */
424 t
= mpdm_ref(MPDM_NMBS(ptr
+ i
, rm
.rm_so
));
425 optr
= mpdm_pokev(optr
, &osize
, t
);
427 /* store offset of substitution */
428 mpdm_regex_offset
= mpdm_size(t
) + offset
;
432 /* get the matched part */
433 t
= MPDM_NMBS(ptr
+ i
+ rm
.rm_so
, rm
.rm_eo
- rm
.rm_so
);
435 /* is s an executable value? */
436 if (MPDM_IS_EXEC(s
)) {
437 /* execute s, with t as argument */
438 t
= mpdm_exec_1(s
, t
, NULL
);
441 /* is s a hash? use match as key */
442 if (MPDM_IS_HASH(s
)) {
448 /* is the value executable? */
449 if (MPDM_IS_EXEC(v
)) {
450 mpdm_t w
= mpdm_ref(v
);
452 v
= mpdm_exec_1(w
, t
, NULL
);
462 t
= expand_ampersands(s
, t
);
464 /* appends the substitution string */
467 optr
= mpdm_pokev(optr
, &osize
, t
);
469 /* store size of substitution */
470 mpdm_regex_size
= mpdm_size(t
);
476 /* one more substitution */
479 } while (f
&& global
);
481 /* no (more) matches; convert and append the rest */
482 t
= MPDM_MBS(ptr
+ i
);
483 optr
= mpdm_pokev(optr
, &osize
, t
);
489 optr
= mpdm_pokewsn(optr
, &osize
, L
"", 1);
491 o
= MPDM_ENS(optr
, osize
- 1);