Add an uninstall target
[jimtcl.git] / jim-regexp.c
blob702fdfe295a25e4a28d4fa352019c08fdc0ac4c1
1 /*
2 * Implements the regexp and regsub commands for Jim
4 * (c) 2008 Steve Bennett <steveb@workware.net.au>
6 * Uses C library regcomp()/regexec() for the matching.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials
17 * provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
24 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
30 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 * The views and conclusions contained in the software and documentation
33 * are those of the authors and should not be interpreted as representing
34 * official policies, either expressed or implied, of the Jim Tcl Project.
36 * Based on code originally from Tcl 6.7:
38 * Copyright 1987-1991 Regents of the University of California
39 * Permission to use, copy, modify, and distribute this
40 * software and its documentation for any purpose and without
41 * fee is hereby granted, provided that the above copyright
42 * notice appear in all copies. The University of California
43 * makes no representations about the suitability of this
44 * software for any purpose. It is provided "as is" without
45 * express or implied warranty.
48 #include <stdlib.h>
49 #include <string.h>
51 #include "jim.h"
52 #include "jimautoconf.h"
53 #include "jimregexp.h"
55 static void FreeRegexpInternalRep(Jim_Interp *interp, Jim_Obj *objPtr)
57 regfree(objPtr->internalRep.regexpValue.compre);
58 Jim_Free(objPtr->internalRep.regexpValue.compre);
61 static const Jim_ObjType regexpObjType = {
62 "regexp",
63 FreeRegexpInternalRep,
64 NULL,
65 NULL,
66 JIM_TYPE_NONE
69 static regex_t *SetRegexpFromAny(Jim_Interp *interp, Jim_Obj *objPtr, unsigned flags)
71 regex_t *compre;
72 const char *pattern;
73 int ret;
75 /* Check if the object is already an uptodate variable */
76 if (objPtr->typePtr == &regexpObjType &&
77 objPtr->internalRep.regexpValue.compre && objPtr->internalRep.regexpValue.flags == flags) {
78 /* nothing to do */
79 return objPtr->internalRep.regexpValue.compre;
82 /* Not a regexp or the flags do not match */
83 if (objPtr->typePtr == &regexpObjType) {
84 FreeRegexpInternalRep(interp, objPtr);
85 objPtr->typePtr = NULL;
88 /* Get the string representation */
89 pattern = Jim_String(objPtr);
90 compre = Jim_Alloc(sizeof(regex_t));
92 if ((ret = regcomp(compre, pattern, REG_EXTENDED | flags)) != 0) {
93 char buf[100];
95 regerror(ret, compre, buf, sizeof(buf));
96 Jim_SetResultFormatted(interp, "couldn't compile regular expression pattern: %s", buf);
97 regfree(compre);
98 Jim_Free(compre);
99 return NULL;
102 objPtr->typePtr = &regexpObjType;
103 objPtr->internalRep.regexpValue.flags = flags;
104 objPtr->internalRep.regexpValue.compre = compre;
106 return compre;
109 int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
111 int opt_indices = 0;
112 int opt_all = 0;
113 int opt_inline = 0;
114 regex_t *regex;
115 int match, i, j;
116 int offset = 0;
117 regmatch_t *pmatch = NULL;
118 int source_len;
119 int result = JIM_OK;
120 const char *pattern;
121 const char *source_str;
122 int num_matches = 0;
123 int num_vars;
124 Jim_Obj *resultListObj = NULL;
125 int regcomp_flags = 0;
126 int eflags = 0;
127 int option;
128 enum {
129 OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_END
131 static const char * const options[] = {
132 "-indices", "-nocase", "-line", "-all", "-inline", "-start", "--", NULL
135 if (argc < 3) {
136 wrongNumArgs:
137 Jim_WrongNumArgs(interp, 1, argv,
138 "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
139 return JIM_ERR;
142 for (i = 1; i < argc; i++) {
143 const char *opt = Jim_String(argv[i]);
145 if (*opt != '-') {
146 break;
148 if (Jim_GetEnum(interp, argv[i], options, &option, "switch", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
149 return JIM_ERR;
151 if (option == OPT_END) {
152 i++;
153 break;
155 switch (option) {
156 case OPT_INDICES:
157 opt_indices = 1;
158 break;
160 case OPT_NOCASE:
161 regcomp_flags |= REG_ICASE;
162 break;
164 case OPT_LINE:
165 regcomp_flags |= REG_NEWLINE;
166 break;
168 case OPT_ALL:
169 opt_all = 1;
170 break;
172 case OPT_INLINE:
173 opt_inline = 1;
174 break;
176 case OPT_START:
177 if (++i == argc) {
178 goto wrongNumArgs;
180 if (Jim_GetIndex(interp, argv[i], &offset) != JIM_OK) {
181 return JIM_ERR;
183 break;
186 if (argc - i < 2) {
187 goto wrongNumArgs;
190 regex = SetRegexpFromAny(interp, argv[i], regcomp_flags);
191 if (!regex) {
192 return JIM_ERR;
195 pattern = Jim_String(argv[i]);
196 source_str = Jim_GetString(argv[i + 1], &source_len);
198 num_vars = argc - i - 2;
200 if (opt_inline) {
201 if (num_vars) {
202 Jim_SetResultString(interp, "regexp match variables not allowed when using -inline",
203 -1);
204 result = JIM_ERR;
205 goto done;
207 num_vars = regex->re_nsub + 1;
210 pmatch = Jim_Alloc((num_vars + 1) * sizeof(*pmatch));
212 /* If an offset has been specified, adjust for that now.
213 * If it points past the end of the string, point to the terminating null
215 if (offset) {
216 if (offset < 0) {
217 offset += source_len + 1;
219 if (offset > source_len) {
220 source_str += source_len;
222 else if (offset > 0) {
223 source_str += offset;
225 eflags |= REG_NOTBOL;
228 if (opt_inline) {
229 resultListObj = Jim_NewListObj(interp, NULL, 0);
232 next_match:
233 match = regexec(regex, source_str, num_vars + 1, pmatch, eflags);
234 if (match >= REG_BADPAT) {
235 char buf[100];
237 regerror(match, regex, buf, sizeof(buf));
238 Jim_SetResultFormatted(interp, "error while matching pattern: %s", buf);
239 result = JIM_ERR;
240 goto done;
243 if (match == REG_NOMATCH) {
244 goto done;
247 num_matches++;
249 if (opt_all && !opt_inline) {
250 /* Just count the number of matches, so skip the substitution h */
251 goto try_next_match;
255 * If additional variable names have been specified, return
256 * index information in those variables.
259 j = 0;
260 for (i += 2; opt_inline ? j < num_vars : i < argc; i++, j++) {
261 Jim_Obj *resultObj;
263 if (opt_indices) {
264 resultObj = Jim_NewListObj(interp, NULL, 0);
266 else {
267 resultObj = Jim_NewStringObj(interp, "", 0);
270 if (pmatch[j].rm_so == -1) {
271 if (opt_indices) {
272 Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
273 Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
276 else {
277 int len = pmatch[j].rm_eo - pmatch[j].rm_so;
279 if (opt_indices) {
280 Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp,
281 offset + pmatch[j].rm_so));
282 Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp,
283 offset + pmatch[j].rm_so + len - 1));
285 else {
286 Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, len);
290 if (opt_inline) {
291 Jim_ListAppendElement(interp, resultListObj, resultObj);
293 else {
294 /* And now set the result variable */
295 result = Jim_SetVariable(interp, argv[i], resultObj);
297 if (result != JIM_OK) {
298 Jim_FreeObj(interp, resultObj);
299 break;
304 try_next_match:
305 if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) {
306 if (pmatch[0].rm_eo) {
307 offset += pmatch[0].rm_eo;
308 source_str += pmatch[0].rm_eo;
310 else {
311 source_str++;
312 offset++;
314 if (*source_str) {
315 eflags = REG_NOTBOL;
316 goto next_match;
320 done:
321 if (result == JIM_OK) {
322 if (opt_inline) {
323 Jim_SetResult(interp, resultListObj);
325 else {
326 Jim_SetResultInt(interp, num_matches);
330 Jim_Free(pmatch);
331 return result;
334 #define MAX_SUB_MATCHES 50
336 int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
338 int regcomp_flags = 0;
339 int regexec_flags = 0;
340 int opt_all = 0;
341 int offset = 0;
342 regex_t *regex;
343 const char *p;
344 int result;
345 regmatch_t pmatch[MAX_SUB_MATCHES + 1];
346 int num_matches = 0;
348 int i, j, n;
349 Jim_Obj *varname;
350 Jim_Obj *resultObj;
351 const char *source_str;
352 int source_len;
353 const char *replace_str;
354 int replace_len;
355 const char *pattern;
356 int option;
357 enum {
358 OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_END
360 static const char * const options[] = {
361 "-nocase", "-line", "-all", "-start", "--", NULL
364 if (argc < 4) {
365 wrongNumArgs:
366 Jim_WrongNumArgs(interp, 1, argv,
367 "?switches? exp string subSpec ?varName?");
368 return JIM_ERR;
371 for (i = 1; i < argc; i++) {
372 const char *opt = Jim_String(argv[i]);
374 if (*opt != '-') {
375 break;
377 if (Jim_GetEnum(interp, argv[i], options, &option, "switch", JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
378 return JIM_ERR;
380 if (option == OPT_END) {
381 i++;
382 break;
384 switch (option) {
385 case OPT_NOCASE:
386 regcomp_flags |= REG_ICASE;
387 break;
389 case OPT_LINE:
390 regcomp_flags |= REG_NEWLINE;
391 break;
393 case OPT_ALL:
394 opt_all = 1;
395 break;
397 case OPT_START:
398 if (++i == argc) {
399 goto wrongNumArgs;
401 if (Jim_GetIndex(interp, argv[i], &offset) != JIM_OK) {
402 return JIM_ERR;
404 break;
407 if (argc - i != 3 && argc - i != 4) {
408 goto wrongNumArgs;
411 regex = SetRegexpFromAny(interp, argv[i], regcomp_flags);
412 if (!regex) {
413 return JIM_ERR;
415 pattern = Jim_String(argv[i]);
417 source_str = Jim_GetString(argv[i + 1], &source_len);
418 replace_str = Jim_GetString(argv[i + 2], &replace_len);
419 varname = argv[i + 3];
421 /* Create the result string */
422 resultObj = Jim_NewStringObj(interp, "", 0);
424 /* If an offset has been specified, adjust for that now.
425 * If it points past the end of the string, point to the terminating null
427 if (offset) {
428 if (offset < 0) {
429 offset += source_len + 1;
431 if (offset > source_len) {
432 offset = source_len;
434 else if (offset < 0) {
435 offset = 0;
439 /* Copy the part before -start */
440 Jim_AppendString(interp, resultObj, source_str, offset);
443 * The following loop is to handle multiple matches within the
444 * same source string; each iteration handles one match and its
445 * corresponding substitution. If "-all" hasn't been specified
446 * then the loop body only gets executed once.
449 n = source_len - offset;
450 p = source_str + offset;
451 do {
452 int match = regexec(regex, p, MAX_SUB_MATCHES, pmatch, regexec_flags);
454 if (match >= REG_BADPAT) {
455 char buf[100];
457 regerror(match, regex, buf, sizeof(buf));
458 Jim_SetResultFormatted(interp, "error while matching pattern: %s", buf);
459 return JIM_ERR;
461 if (match == REG_NOMATCH) {
462 break;
465 num_matches++;
468 * Copy the portion of the source string before the match to the
469 * result variable.
471 Jim_AppendString(interp, resultObj, p, pmatch[0].rm_so);
474 * Append the subSpec (replace_str) argument to the variable, making appropriate
475 * substitutions. This code is a bit hairy because of the backslash
476 * conventions and because the code saves up ranges of characters in
477 * subSpec to reduce the number of calls to Jim_SetVar.
480 for (j = 0; j < replace_len; j++) {
481 int idx;
482 int c = replace_str[j];
484 if (c == '&') {
485 idx = 0;
487 else if (c == '\\' && j < replace_len) {
488 c = replace_str[++j];
489 if ((c >= '0') && (c <= '9')) {
490 idx = c - '0';
492 else if ((c == '\\') || (c == '&')) {
493 Jim_AppendString(interp, resultObj, replace_str + j, 1);
494 continue;
496 else {
497 Jim_AppendString(interp, resultObj, replace_str + j - 1, 2);
498 continue;
501 else {
502 Jim_AppendString(interp, resultObj, replace_str + j, 1);
503 continue;
505 if ((idx < MAX_SUB_MATCHES) && pmatch[idx].rm_so != -1 && pmatch[idx].rm_eo != -1) {
506 Jim_AppendString(interp, resultObj, p + pmatch[idx].rm_so,
507 pmatch[idx].rm_eo - pmatch[idx].rm_so);
511 p += pmatch[0].rm_eo;
512 n -= pmatch[0].rm_eo;
514 /* If -all is not specified, or there is no source left, we are done */
515 if (!opt_all || n == 0) {
516 break;
519 /* An anchored pattern without -line must be done */
520 if ((regcomp_flags & REG_NEWLINE) == 0 && pattern[0] == '^') {
521 break;
524 /* If the pattern is empty, need to step forwards */
525 if (pattern[0] == '\0' && n) {
526 /* Need to copy the char we are moving over */
527 Jim_AppendString(interp, resultObj, p, 1);
528 p++;
529 n--;
532 regexec_flags |= REG_NOTBOL;
533 } while (n);
536 * Copy the portion of the string after the last match to the
537 * result variable.
539 Jim_AppendString(interp, resultObj, p, -1);
541 /* And now set or return the result variable */
542 if (argc - i == 4) {
543 result = Jim_SetVariable(interp, varname, resultObj);
545 if (result == JIM_OK) {
546 Jim_SetResultInt(interp, num_matches);
548 else {
549 Jim_FreeObj(interp, resultObj);
552 else {
553 Jim_SetResult(interp, resultObj);
554 result = JIM_OK;
557 return result;
560 int Jim_regexpInit(Jim_Interp *interp)
562 if (Jim_PackageProvide(interp, "regexp", "1.0", JIM_ERRMSG))
563 return JIM_ERR;
565 Jim_CreateCommand(interp, "regexp", Jim_RegexpCmd, NULL, NULL);
566 Jim_CreateCommand(interp, "regsub", Jim_RegsubCmd, NULL, NULL);
567 return JIM_OK;