Fixed file permissions.
[mpdm.git] / mpdm_r.c
blobeed826660303ee573c9ffcfe1f665add53547213
1 /*
3 MPDM - Minimum Profit Data Manager
4 Copyright (C) 2003/2010 Angel Ortega <angel@triptico.com>
6 mpdm_r.c - Regular expressions
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License
10 as published by the Free Software Foundation; either version 2
11 of the License, or (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 http://www.triptico.com
26 #include "config.h"
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <wchar.h>
33 #include "mpdm.h"
35 #ifdef CONFOPT_PCRE
36 #include <pcreposix.h>
37 #endif
39 #ifdef CONFOPT_SYSTEM_REGEX
40 #include <regex.h>
41 #endif
43 #ifdef CONFOPT_INCLUDED_REGEX
44 #include "gnu_regex.h"
45 #endif
48 /** data **/
50 /* matching of the last regex */
52 int mpdm_regex_offset = -1;
53 int mpdm_regex_size = 0;
55 /* number of substitutions in last sregex */
57 int mpdm_sregex_count = 0;
60 /** code **/
62 static wchar_t *regex_flags(const mpdm_t r)
64 return wcsrchr((wchar_t *) r->data, *(wchar_t *) r->data);
68 static mpdm_t mpdm_regcomp(mpdm_t r)
70 mpdm_t c = NULL;
71 mpdm_t regex_cache = NULL;
73 mpdm_ref(r);
75 /* if cache does not exist, create it */
76 if ((regex_cache = mpdm_hget_s(mpdm_root(), L"__REGEX_CACHE__")) == NULL) {
77 regex_cache = MPDM_H(0);
78 mpdm_hset_s(mpdm_root(), L"__REGEX_CACHE__", regex_cache);
81 /* search the regex in the cache */
82 if ((c = mpdm_hget(regex_cache, r)) == NULL) {
83 mpdm_t rmb;
84 regex_t re;
85 char *regex;
86 char *flags;
87 int f = REG_EXTENDED;
89 /* not found; regex must be compiled */
91 /* convert to mbs */
92 rmb = mpdm_ref(MPDM_2MBS(r->data));
93 regex = (char *) rmb->data;
95 if ((flags = strrchr(regex, *regex)) != NULL) {
97 if (strchr(flags, 'i') != NULL)
98 f |= REG_ICASE;
99 if (strchr(flags, 'm') != NULL)
100 f |= REG_NEWLINE;
102 regex++;
103 *flags = '\0';
105 if (!regcomp(&re, regex, f)) {
106 void *ptr;
108 if ((ptr = malloc(sizeof(regex_t))) != NULL) {
109 /* copies */
110 memcpy(ptr, &re, sizeof(regex_t));
112 /* create value */
113 c = mpdm_new(MPDM_FREE, ptr, sizeof(regex_t));
115 /* stores */
116 mpdm_hset(regex_cache, r, c);
121 mpdm_unref(rmb);
124 mpdm_unref(r);
126 return c;
130 static mpdm_t regex1(mpdm_t r, const mpdm_t v, int offset)
131 /* test for one regex */
133 mpdm_t w = NULL;
134 mpdm_t cr;
136 mpdm_ref(r);
137 mpdm_ref(v);
139 /* no matching yet */
140 mpdm_regex_offset = -1;
142 /* compile the regex */
143 if ((cr = mpdm_regcomp(r)) != NULL) {
144 regmatch_t rm;
145 char *ptr;
146 wchar_t *last;
147 int o = 0;
148 int f = 0;
150 /* takes pointer to 'last' flag */
151 if ((last = regex_flags(r)) != NULL)
152 last = wcschr(last, 'l');
154 /* convert to mbs */
155 ptr = mpdm_wcstombs((wchar_t *) v->data + offset, NULL);
157 /* match? */
158 while (regexec((regex_t *) cr->data, ptr + o, 1,
159 &rm, offset > 0 ? REG_NOTBOL : 0) == 0) {
160 f++;
162 /* if 'last' is not set, it's done */
163 if (last == NULL)
164 break;
166 rm.rm_so += o;
167 rm.rm_eo += o;
168 o = rm.rm_eo;
171 if (f) {
172 /* converts to mbs the string from the beginning
173 to the start of the match, just to know
174 the size (and immediately frees it) */
175 free(mpdm_mbstowcs(ptr, &mpdm_regex_offset, rm.rm_so));
177 /* add the offset */
178 mpdm_regex_offset += offset;
180 /* create now the matching string */
181 w = MPDM_NMBS(ptr + rm.rm_so, rm.rm_eo - rm.rm_so);
183 /* and store the size */
184 mpdm_regex_size = mpdm_size(w);
187 free(ptr);
190 mpdm_unref(v);
191 mpdm_unref(r);
193 return w;
198 * mpdm_regex - Matches a regular expression.
199 * @r: the regular expression
200 * @v: the value to be matched
201 * @offset: offset from the start of v->data
203 * Matches a regular expression against a value. Valid flags are 'i',
204 * for case-insensitive matching, 'm', to treat the string as a
205 * multiline string (i.e., one containing newline characters), so
206 * that ^ and $ match the boundaries of each line instead of the
207 * whole string, 'l', to return the last matching instead of the
208 * first one, or 'g', to match globally; in that last case, an array
209 * containing all matches is returned instead of a string scalar.
211 * If @r is a string, an ordinary regular expression matching is tried
212 * over the @v string. If the matching is possible, the match result
213 * is returned, or NULL otherwise.
215 * If @r is an array (of strings), each element is tried sequentially
216 * as an individual regular expression over the @v string, each one using
217 * the offset returned by the previous match. All regular expressions
218 * must match to be successful. If this is the case, an array (with
219 * the same number of arguments) is returned containing the matched
220 * strings, or NULL otherwise.
222 * If @r is NULL, the result of the previous regex matching
223 * is returned as a two element array. The first element will contain
224 * the character offset of the matching and the second the number of
225 * characters matched. If the previous regex was unsuccessful, NULL
226 * is returned.
227 * [Regular Expressions]
229 mpdm_t mpdm_regex(mpdm_t r, const mpdm_t v, int offset)
231 mpdm_t w = NULL;
233 mpdm_ref(r);
234 mpdm_ref(v);
236 /* special case: if r is NULL, return previous match */
237 if (r == NULL) {
238 /* if previous regex was successful... */
239 if (mpdm_regex_offset != -1) {
240 w = MPDM_A(2);
242 mpdm_ref(w);
243 mpdm_aset(w, MPDM_I(mpdm_regex_offset), 0);
244 mpdm_aset(w, MPDM_I(mpdm_regex_size), 1);
245 mpdm_unrefnd(w);
248 else
249 if (v != NULL) {
250 if (r->flags & MPDM_MULTIPLE) {
251 int n;
252 mpdm_t t;
254 /* multiple value; try sequentially all regexes,
255 moving the offset forward */
257 w = MPDM_A(0);
258 mpdm_ref(w);
260 for (n = 0; n < mpdm_size(r); n++) {
261 t = mpdm_regex(mpdm_aget(r, n), v, offset);
263 if (t == NULL)
264 break;
266 /* found; store and move forward */
267 mpdm_push(w, t);
268 offset = mpdm_regex_offset + mpdm_regex_size;
271 mpdm_unrefnd(w);
273 else {
274 wchar_t *global;
276 /* takes pointer to 'global' flag */
277 if ((global = regex_flags(r)) != NULL)
278 global = wcschr(global, 'g');
280 if (global != NULL) {
281 mpdm_t t;
283 /* match sequentially until done */
284 w = MPDM_A(0);
285 mpdm_ref(w);
287 while ((t = regex1(r, v, offset)) != NULL) {
288 mpdm_push(w, t);
290 offset = mpdm_regex_offset + mpdm_regex_size;
293 mpdm_unrefnd(w);
295 else
296 w = regex1(r, v, offset);
300 mpdm_unref(v);
301 mpdm_unref(r);
303 return w;
307 static mpdm_t expand_ampersands(const mpdm_t s, const mpdm_t t)
308 /* substitutes all unescaped ampersands in s with t */
310 const wchar_t *sptr = mpdm_string(s);
311 wchar_t *wptr;
312 wchar_t *optr = NULL;
313 int osize = 0;
314 mpdm_t r = NULL;
316 mpdm_ref(s);
317 mpdm_ref(t);
319 if (s != NULL) {
320 while ((wptr = wcschr(sptr, L'\\')) != NULL ||
321 (wptr = wcschr(sptr, L'&')) != NULL) {
322 int n = wptr - sptr;
324 /* add the leading part */
325 optr = mpdm_pokewsn(optr, &osize, sptr, n);
327 if (*wptr == L'\\') {
328 if (*(wptr + 1) == L'&' || *(wptr + 1) == L'\\')
329 wptr++;
331 optr = mpdm_pokewsn(optr, &osize, wptr, 1);
333 else
334 if (*wptr == '&')
335 optr = mpdm_pokev(optr, &osize, t);
337 sptr = wptr + 1;
340 /* add the rest of the string */
341 optr = mpdm_pokews(optr, &osize, sptr);
342 optr = mpdm_pokewsn(optr, &osize, L"", 1);
343 r = MPDM_ENS(optr, osize - 1);
346 mpdm_unref(t);
347 mpdm_unref(s);
349 return r;
354 * mpdm_sregex - Matches and substitutes a regular expression.
355 * @r: the regular expression
356 * @v: the value to be matched
357 * @s: the substitution string, hash or code
358 * @offset: offset from the start of v->data
360 * Matches a regular expression against a value, and substitutes the
361 * found substring with @s. Valid flags are 'i', for case-insensitive
362 * matching, and 'g', for global replacements (all ocurrences in @v
363 * will be replaced, instead of just the first found one).
365 * If @s is executable, it's executed with the matched part as
366 * the only argument and its return value is used as the
367 * substitution string.
369 * If @s is a hash, the matched string is used as a key to it and
370 * its value used as the substitution. If this value itself is
371 * executable, it's executed with the matched string as its only
372 * argument and its return value used as the substitution.
374 * If @r is NULL, returns the number of substitutions made in the
375 * previous call to mpdm_sregex() (can be zero if none was done).
377 * The global variables @mpdm_regex_offset and @mpdm_regex_size are
378 * set to the offset of the matched string and the size of the
379 * replaced string, respectively.
381 * Always returns a new string (either modified or an exact copy).
382 * [Regular Expressions]
384 mpdm_t mpdm_sregex(mpdm_t r, const mpdm_t v, const mpdm_t s, int offset)
386 mpdm_t cr;
387 wchar_t *optr = NULL;
388 int osize = 0;
389 mpdm_t o = NULL;
391 mpdm_ref(r);
392 mpdm_ref(v);
393 mpdm_ref(s);
395 if (r == NULL) {
396 /* return last count */
397 o = MPDM_I(mpdm_sregex_count);
399 else
400 if (v != NULL) {
401 /* compile the regex */
402 if ((cr = mpdm_regcomp(r)) != NULL) {
403 char *ptr;
404 int f, i = 0;
405 wchar_t *global;
406 mpdm_t t;
408 /* takes pointer to global flag */
409 if ((global = regex_flags(r)) !=NULL)
410 global = wcschr(global, 'g');
412 /* store the first part */
413 optr = mpdm_pokewsn(optr, &osize, v->data, offset);
415 /* convert to mbs */
416 ptr = mpdm_wcstombs((wchar_t *) v->data + offset, NULL);
418 /* reset count */
419 mpdm_sregex_count = 0;
420 mpdm_regex_offset = -1;
422 do {
423 regmatch_t rm;
425 /* try match */
426 f = !regexec((regex_t *) cr->data, ptr + i,
427 1, &rm, offset > 0 ? REG_NOTBOL : 0);
429 if (f) {
430 /* creates a string from the beginning
431 to the start of the match */
432 t = mpdm_ref(MPDM_NMBS(ptr + i, rm.rm_so));
433 optr = mpdm_pokev(optr, &osize, t);
435 /* store offset of substitution */
436 mpdm_regex_offset = mpdm_size(t) + offset;
438 mpdm_unref(t);
440 /* get the matched part */
441 t = MPDM_NMBS(ptr + i + rm.rm_so, rm.rm_eo - rm.rm_so);
443 /* is s an executable value? */
444 if (MPDM_IS_EXEC(s)) {
445 /* execute s, with t as argument */
446 t = mpdm_exec_1(s, t);
448 else
449 /* is s a hash? use match as key */
450 if (MPDM_IS_HASH(s)) {
451 mpdm_t v = mpdm_hget(s, t);
453 mpdm_ref(t);
455 /* is the value executable? */
456 if (MPDM_IS_EXEC(v)) {
457 mpdm_t w = mpdm_ref(v);
459 v = mpdm_exec_1(w, t);
461 mpdm_unref(w);
464 mpdm_unref(t);
466 t = v;
468 else
469 t = expand_ampersands(s, t);
471 /* appends the substitution string */
472 mpdm_ref(t);
474 optr = mpdm_pokev(optr, &osize, t);
476 /* store size of substitution */
477 mpdm_regex_size = mpdm_size(t);
479 mpdm_unref(t);
481 i += rm.rm_eo;
483 /* one more substitution */
484 mpdm_sregex_count++;
486 } while (f && global);
488 /* no (more) matches; convert and append the rest */
489 t = MPDM_MBS(ptr + i);
490 optr = mpdm_pokev(optr, &osize, t);
492 free(ptr);
495 /* NULL-terminate */
496 optr = mpdm_pokewsn(optr, &osize, L"", 1);
498 o = MPDM_ENS(optr, osize - 1);
501 mpdm_unref(s);
502 mpdm_unref(v);
503 mpdm_unref(r);
505 return o;