New functions mpdm_set_ival() and mpdm_set_rval() (Closes: #1107).
[mpdm.git] / mpdm_r.c
blob20f4927c4b8765f70965b12ac760e29fc7a108ce
1 /*
3 MPDM - Minimum Profit Data Manager
4 Copyright (C) 2003/2007 Angel Ortega <angel@triptico.com>
6 mpdm_r.c - Regular expressions
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 http://www.triptico.com
26 #include "config.h"
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <wchar.h>
33 #include "mpdm.h"
35 #ifdef CONFOPT_PCRE
36 #include <pcreposix.h>
37 #endif
39 #ifdef CONFOPT_SYSTEM_REGEX
40 #include <regex.h>
41 #endif
43 #ifdef CONFOPT_INCLUDED_REGEX
44 #include "gnu_regex.h"
45 #endif
47 /*******************
48 Data
49 ********************/
51 /* matching of the last regex */
53 int mpdm_regex_offset = -1;
54 int mpdm_regex_size = 0;
56 /* number of substitutions in last sregex */
58 int mpdm_sregex_count = 0;
60 /*******************
61 Code
62 ********************/
64 static wchar_t *regex_flags(mpdm_t r)
66 return wcsrchr((wchar_t *) r->data, *(wchar_t *) r->data);
70 mpdm_t mpdm_regcomp(mpdm_t r)
72 mpdm_t c = NULL;
73 mpdm_t regex_cache = NULL;
75 /* if cache does not exist, create it */
76 if ((regex_cache = mpdm_hget_s(mpdm_root(), L"__REGEX_CACHE__")) == NULL) {
77 regex_cache = MPDM_H(0);
78 mpdm_hset_s(mpdm_root(), L"__REGEX_CACHE__", regex_cache);
81 /* search the regex in the cache */
82 if ((c = mpdm_hget(regex_cache, r)) == NULL) {
83 mpdm_t rmb;
84 regex_t re;
85 char *regex;
86 char *flags;
87 int f = REG_EXTENDED;
89 /* not found; regex must be compiled */
91 /* convert to mbs */
92 rmb = MPDM_2MBS(r->data);
94 regex = (char *) rmb->data;
95 if ((flags = strrchr(regex, *regex)) == NULL)
96 return NULL;
98 if (strchr(flags, 'i') != NULL)
99 f |= REG_ICASE;
100 if (strchr(flags, 'm') != NULL)
101 f |= REG_NEWLINE;
103 regex++;
104 *flags = '\0';
106 if (!regcomp(&re, regex, f)) {
107 void *ptr;
109 if ((ptr = malloc(sizeof(regex_t))) != NULL) {
110 /* copies */
111 memcpy(ptr, &re, sizeof(regex_t));
113 /* create value */
114 c = mpdm_new(MPDM_FREE, ptr, sizeof(regex_t));
116 /* stores */
117 mpdm_hset(regex_cache, r, c);
122 return c;
127 * mpdm_regex - Matches a regular expression.
128 * @r: the regular expression
129 * @v: the value to be matched
130 * @offset: offset from the start of v->data
132 * Matches a regular expression against a value. Valid flags are 'i',
133 * for case-insensitive matching, 'm', to treat the string as a
134 * multiline string (i.e., one containing newline characters), so
135 * that ^ and $ match the boundaries of each line instead of the
136 * whole string, or 'l', to return the last matching instead of
137 * the first one.
139 * If @r is a string, an ordinary regular expression matching is tried
140 * over the @v string. If the matching is possible, the matched string
141 * is returned, or NULL otherwise.
143 * If @r is an array (of strings), each element is tried sequentially
144 * as an individual regular expression over the @v string, each one using
145 * the offset returned by the previous match. All regular expressions
146 * must match to be successful. If this is the case, an array (with
147 * the same number of arguments) is returned containing the matched
148 * strings, or NULL otherwise.
150 * If @r is NULL, the result of the previous regex matching
151 * is returned as a two element array. The first element will contain
152 * the character offset of the matching and the second the number of
153 * characters matched. If the previous regex was unsuccessful, NULL
154 * is returned.
155 * [Regular Expressions]
157 mpdm_t mpdm_regex(mpdm_t r, mpdm_t v, int offset)
159 mpdm_t w = NULL;
160 mpdm_t t;
162 /* special case: if r is NULL, return previous match */
163 if (r == NULL) {
164 /* if previous regex was successful... */
165 if (mpdm_regex_offset != -1) {
166 w = MPDM_A(2);
168 mpdm_aset(w, MPDM_I(mpdm_regex_offset), 0);
169 mpdm_aset(w, MPDM_I(mpdm_regex_size), 1);
172 return w;
175 /* if the string to be tested is NULL, return NULL */
176 if (v == NULL)
177 return NULL;
179 if (r->flags & MPDM_MULTIPLE) {
180 int n;
182 /* multiple value; try sequentially all regexes,
183 moving the offset forward */
185 w = MPDM_A(mpdm_size(r));
187 for (n = 0; n < mpdm_size(r); n++) {
188 t = mpdm_regex(mpdm_aget(r, n), v, offset);
190 /* if not found, invalid all search and exit */
191 if (t == NULL) {
192 w = NULL;
193 break;
196 /* found; store and move forward */
197 mpdm_aset(w, t, n);
198 offset = mpdm_regex_offset + mpdm_regex_size;
201 else {
202 mpdm_t cr;
204 /* single value; really do the regex */
206 /* no matching yet */
207 mpdm_regex_offset = -1;
209 /* compile the regex */
210 if ((cr = mpdm_regcomp(r)) != NULL) {
211 regmatch_t rm;
212 char *ptr;
213 wchar_t *last;
214 int o = 0;
215 int f = 0;
217 /* takes pointer to 'last' flag */
218 if ((last = regex_flags(r)) != NULL)
219 last = wcschr(last, 'l');
221 /* convert to mbs */
222 ptr = mpdm_wcstombs((wchar_t *) v->data + offset, NULL);
224 /* match? */
225 while (regexec((regex_t *) cr->data, ptr + o, 1,
226 &rm, offset > 0 ? REG_NOTBOL : 0) == 0) {
227 f++;
229 /* if 'last' is not set, it's done */
230 if (last == NULL)
231 break;
233 rm.rm_so += o;
234 rm.rm_eo += o;
235 o = rm.rm_eo;
238 if (f) {
239 /* converts to mbs the string from the beginning
240 to the start of the match, just to know
241 the size (and immediately frees it) */
242 free(mpdm_mbstowcs(ptr, &mpdm_regex_offset, rm.rm_so));
244 /* add the offset */
245 mpdm_regex_offset += offset;
247 /* create now the matching string */
248 w = MPDM_NMBS(ptr + rm.rm_so, rm.rm_eo - rm.rm_so);
250 /* and store the size */
251 mpdm_regex_size = mpdm_size(w);
254 free(ptr);
258 return w;
262 static mpdm_t expand_ampersands(mpdm_t s, mpdm_t t)
263 /* substitutes all unescaped ampersands in s with t */
265 wchar_t *sptr = mpdm_string(s);
266 wchar_t *wptr;
268 if ((wptr = wcschr(sptr, L'&')) != NULL) {
269 mpdm_t v = NULL;
271 while (wptr != NULL) {
272 int n = wptr - sptr;
273 mpdm_t t2 = t;
275 if (n && *(wptr - 1) == '\\') {
276 /* is it escaped? avoid the \ */
277 n--;
279 /* and set the substitution string to & */
280 t2 = MPDM_LS(L"&");
283 /* add the leading part */
284 v = mpdm_strcat(v, MPDM_NS(sptr, n));
286 /* now add the substitution string */
287 v = mpdm_strcat(v, t2);
289 sptr = wptr + 1;
290 wptr = wcschr(sptr, L'&');
293 /* add the rest of the string */
294 s = mpdm_strcat(v, MPDM_S(sptr));
297 return s;
302 * mpdm_sregex - Matches and substitutes a regular expression.
303 * @r: the regular expression
304 * @v: the value to be matched
305 * @s: the substitution string, hash or code
306 * @offset: offset from the start of v->data
308 * Matches a regular expression against a value, and substitutes the
309 * found substring with @s. Valid flags are 'i', for case-insensitive
310 * matching, and 'g', for global replacements (all ocurrences in @v
311 * will be replaced, instead of just the first found one).
313 * If @s is executable, it's executed with the matched part as
314 * the only argument and its return value is used as the
315 * substitution string.
317 * If @s is a hash, the matched string is used as a key to it and
318 * its value used as the substitution.
320 * If @r is NULL, returns the number of substitutions made in the
321 * previous call to mpdm_sregex() (can be zero if none was done).
323 * The global variables @mpdm_regex_offset and @mpdm_regex_size are
324 * set to the offset of the matched string and the size of the
325 * replaced string, respectively.
327 * Returns the modified string, or the original one if no substitutions
328 * were done.
329 * [Regular Expressions]
331 mpdm_t mpdm_sregex(mpdm_t r, mpdm_t v, mpdm_t s, int offset)
333 mpdm_t cr;
334 mpdm_t o = v;
336 if (r == NULL) {
337 /* return last count */
338 return MPDM_I(mpdm_sregex_count);
341 if (v == NULL)
342 return NULL;
344 /* compile the regex */
345 if ((cr = mpdm_regcomp(r)) != NULL) {
346 char *ptr;
347 int f, i = 0;
348 wchar_t *global;
349 mpdm_t t;
351 /* takes pointer to global flag */
352 if ((global = regex_flags(r)) !=NULL)
353 global = wcschr(global, 'g');
355 /* store the first part */
356 o = MPDM_NS(v->data, offset);
358 /* convert to mbs */
359 if ((ptr = mpdm_wcstombs((wchar_t *) v->data + offset, NULL)) == NULL)
360 return NULL;
362 /* reset count */
363 mpdm_sregex_count = 0;
364 mpdm_regex_offset = -1;
366 do {
367 regmatch_t rm;
369 /* try match */
370 f = !regexec((regex_t *) cr->data, ptr + i,
371 1, &rm, offset > 0 ? REG_NOTBOL : 0);
373 if (f) {
374 /* creates a string from the beginning
375 to the start of the match */
376 t = MPDM_NMBS(ptr + i, rm.rm_so);
377 o = mpdm_strcat(o, t);
379 /* store offset of substitution */
380 mpdm_regex_offset = mpdm_size(t) + offset;
382 /* get the matched part */
383 t = MPDM_NMBS(ptr + i + rm.rm_so, rm.rm_eo - rm.rm_so);
385 /* is s an executable value? */
386 if (MPDM_IS_EXEC(s)) {
387 /* protect o from sweeping */
388 mpdm_ref(o);
390 /* execute s, with t as argument */
391 t = mpdm_exec_1(s, t);
393 mpdm_unref(o);
395 else
396 /* is s a hash? use match as key */
397 if (MPDM_IS_HASH(s))
398 t = mpdm_hget(s, t);
399 else
400 t = expand_ampersands(s, t);
402 /* appends the substitution string */
403 o = mpdm_strcat(o, t);
405 /* store size of substitution */
406 mpdm_regex_size = mpdm_size(t);
408 i += rm.rm_eo;
410 /* one more substitution */
411 mpdm_sregex_count++;
414 } while (f && global);
416 /* no (more) matches; convert and append the rest */
417 t = MPDM_MBS(ptr + i);
418 o = mpdm_strcat(o, t);
420 free(ptr);
423 return o;