aio recvfrom was not null terminating the result
[jimtcl.git] / jim-regexp.c
blob8744ef17d20f51fbc0507ed5e44794827358790a
2 /*
3 * (c) 2008 Steve Bennett <steveb@workware.net.au>
5 * Implements the regexp and regsub commands for Jim
7 * Uses C library regcomp()/regexec() for the matching.
9 * The FreeBSD license
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials
20 * provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY
23 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
27 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
33 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 * The views and conclusions contained in the software and documentation
36 * are those of the authors and should not be interpreted as representing
37 * official policies, either expressed or implied, of the Jim Tcl Project.
39 * Based on code originally from Tcl 6.7:
41 * Copyright 1987-1991 Regents of the University of California
42 * Permission to use, copy, modify, and distribute this
43 * software and its documentation for any purpose and without
44 * fee is hereby granted, provided that the above copyright
45 * notice appear in all copies. The University of California
46 * makes no representations about the suitability of this
47 * software for any purpose. It is provided "as is" without
48 * express or implied warranty.
51 #include <stdlib.h>
52 #include <string.h>
53 #include <regex.h>
55 #include "jim.h"
57 void FreeRegexpInternalRep(Jim_Interp *interp, Jim_Obj *objPtr)
59 regfree(objPtr->internalRep.regexpValue.compre);
60 Jim_Free(objPtr->internalRep.regexpValue.compre);
63 static Jim_ObjType regexpObjType = {
64 "regexp",
65 FreeRegexpInternalRep,
66 NULL,
67 NULL,
68 JIM_TYPE_NONE
71 static regex_t *SetRegexpFromAny(Jim_Interp *interp, Jim_Obj *objPtr, unsigned flags)
73 regex_t *compre;
74 const char *pattern;
75 int ret;
77 /* Check if the object is already an uptodate variable */
78 if (objPtr->typePtr == &regexpObjType &&
79 objPtr->internalRep.regexpValue.compre && objPtr->internalRep.regexpValue.flags == flags) {
80 /* nothing to do */
81 return objPtr->internalRep.regexpValue.compre;
84 /* Not a regexp or the flags do not match */
85 if (objPtr->typePtr == &regexpObjType) {
86 FreeRegexpInternalRep(interp, objPtr);
87 objPtr->typePtr = NULL;
90 /* Get the string representation */
91 pattern = Jim_GetString(objPtr, NULL);
92 compre = Jim_Alloc(sizeof(regex_t));
94 if ((ret = regcomp(compre, pattern, REG_EXTENDED | flags)) != 0) {
95 char buf[100];
97 regerror(ret, compre, buf, sizeof(buf));
98 Jim_SetResultFormatted(interp, "couldn't compile regular expression pattern: %s", buf);
99 regfree(compre);
100 Jim_Free(compre);
101 return NULL;
104 objPtr->typePtr = &regexpObjType;
105 objPtr->internalRep.regexpValue.flags = flags;
106 objPtr->internalRep.regexpValue.compre = compre;
108 return compre;
111 int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
113 int opt_indices = 0;
114 int opt_all = 0;
115 int opt_inline = 0;
116 regex_t *regex;
117 int match, i, j;
118 long offset = 0;
119 regmatch_t *pmatch = NULL;
120 int source_len;
121 int result = JIM_OK;
122 const char *pattern;
123 const char *source_str;
124 int num_matches = 0;
125 int num_vars;
126 Jim_Obj *resultListObj = NULL;
127 int regcomp_flags = 0;
129 if (argc < 3) {
130 wrongNumArgs:
131 Jim_WrongNumArgs(interp, 1, argv,
132 "?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? ?--? exp string ?matchVar? ?subMatchVar ...?");
133 return JIM_ERR;
136 for (i = 1; i < argc; i++) {
137 if (Jim_CompareStringImmediate(interp, argv[i], "-indices")) {
138 opt_indices = 1;
140 else if (Jim_CompareStringImmediate(interp, argv[i], "-nocase")) {
141 regcomp_flags |= REG_ICASE;
143 else if (Jim_CompareStringImmediate(interp, argv[i], "-line")) {
144 regcomp_flags |= REG_NEWLINE;
146 else if (Jim_CompareStringImmediate(interp, argv[i], "-all")) {
147 opt_all = 1;
149 else if (Jim_CompareStringImmediate(interp, argv[i], "-inline")) {
150 opt_inline = 1;
152 else if (Jim_CompareStringImmediate(interp, argv[i], "-start")) {
153 if (++i == argc) {
154 goto wrongNumArgs;
156 if (Jim_GetLong(interp, argv[i], &offset) != JIM_OK) {
157 return JIM_ERR;
160 else if (Jim_CompareStringImmediate(interp, argv[i], "--")) {
161 i++;
162 break;
164 else {
165 const char *opt = Jim_GetString(argv[i], NULL);
167 if (*opt == '-') {
168 /* Bad option */
169 goto wrongNumArgs;
171 break;
174 if (argc - i < 2) {
175 goto wrongNumArgs;
178 regex = SetRegexpFromAny(interp, argv[i], regcomp_flags);
179 if (!regex) {
180 return JIM_ERR;
183 pattern = Jim_GetString(argv[i], NULL);
184 source_str = Jim_GetString(argv[i + 1], &source_len);
186 num_vars = argc - i - 2;
188 if (opt_inline) {
189 if (num_vars) {
190 Jim_SetResultString(interp, "regexp match variables not allowed when using -inline",
191 -1);
192 result = JIM_ERR;
193 goto done;
195 /* REVISIT: Ugly! */
196 num_vars = 100;
199 pmatch = Jim_Alloc((num_vars + 1) * sizeof(*pmatch));
201 /* If an offset has been specified, adjust for that now.
202 * If it points past the end of the string, point to the terminating null
204 if (offset) {
205 if (offset > source_len) {
206 source_str += source_len;
208 else if (offset > 0) {
209 source_str += offset;
213 if (opt_inline) {
214 resultListObj = Jim_NewListObj(interp, NULL, 0);
217 next_match:
218 match = regexec(regex, source_str, num_vars + 1, pmatch, 0);
219 if (match >= REG_BADPAT) {
220 char buf[100];
222 regerror(match, regex, buf, sizeof(buf));
223 Jim_SetResultFormatted(interp, "error while matching pattern: %s", buf);
224 result = JIM_ERR;
225 goto done;
228 if (match == REG_NOMATCH) {
229 goto done;
232 num_matches++;
234 if (opt_all && !opt_inline) {
235 /* Just count the number of matches, so skip the substitution h */
236 goto try_next_match;
240 * If additional variable names have been specified, return
241 * index information in those variables.
244 j = 0;
245 for (i += 2; opt_inline ? pmatch[j].rm_so != -1 : i < argc; i++, j++) {
246 Jim_Obj *resultObj;
248 if (opt_indices) {
249 resultObj = Jim_NewListObj(interp, NULL, 0);
251 else {
252 resultObj = Jim_NewStringObj(interp, "", 0);
255 if (pmatch[j].rm_so == -1) {
256 if (opt_indices) {
257 Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
258 Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
261 else {
262 int len = pmatch[j].rm_eo - pmatch[j].rm_so;
264 if (opt_indices) {
265 Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp,
266 offset + pmatch[j].rm_so));
267 Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp,
268 offset + pmatch[j].rm_so + len - 1));
270 else {
271 Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, len);
275 if (opt_inline) {
276 Jim_ListAppendElement(interp, resultListObj, resultObj);
278 else {
279 /* And now set the result variable */
280 result = Jim_SetVariable(interp, argv[i], resultObj);
282 if (result != JIM_OK) {
283 Jim_FreeObj(interp, resultObj);
284 break;
289 try_next_match:
290 if (opt_all && pattern[0] != '^' && *source_str) {
291 if (pmatch[0].rm_eo) {
292 source_str += pmatch[0].rm_eo;
294 else {
295 source_str++;
297 if (*source_str) {
298 goto next_match;
302 done:
303 if (result == JIM_OK) {
304 if (opt_inline) {
305 Jim_SetResult(interp, resultListObj);
307 else {
308 Jim_SetResultInt(interp, num_matches);
312 Jim_Free(pmatch);
313 return result;
316 #define MAX_SUB_MATCHES 50
318 int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
320 int regcomp_flags = 0;
321 int opt_all = 0;
322 long offset = 0;
323 regex_t *regex;
324 const char *p;
325 int result;
326 regmatch_t pmatch[MAX_SUB_MATCHES + 1];
327 int num_matches = 0;
329 int i;
330 Jim_Obj *varname;
331 Jim_Obj *resultObj;
332 const char *source_str;
333 int source_len;
334 const char *replace_str;
335 const char *pattern;
337 if (argc < 4) {
338 wrongNumArgs:
339 Jim_WrongNumArgs(interp, 1, argv,
340 "?-nocase? ?-all? ?-line? ?-start offset? ?--? exp string subSpec ?varName?");
341 return JIM_ERR;
344 for (i = 1; i < argc; i++) {
345 if (Jim_CompareStringImmediate(interp, argv[i], "-nocase")) {
346 regcomp_flags |= REG_ICASE;
348 else if (Jim_CompareStringImmediate(interp, argv[i], "-line")) {
349 regcomp_flags |= REG_NEWLINE;
351 else if (Jim_CompareStringImmediate(interp, argv[i], "-all")) {
352 opt_all = 1;
354 else if (Jim_CompareStringImmediate(interp, argv[i], "-start")) {
355 if (++i == argc) {
356 goto wrongNumArgs;
358 if (Jim_GetLong(interp, argv[i], &offset) != JIM_OK) {
359 return JIM_ERR;
362 else if (Jim_CompareStringImmediate(interp, argv[i], "--")) {
363 i++;
364 break;
366 else {
367 const char *opt = Jim_GetString(argv[i], NULL);
369 if (*opt == '-') {
370 /* Bad option */
371 goto wrongNumArgs;
373 break;
376 if (argc - i != 3 && argc - i != 4) {
377 goto wrongNumArgs;
380 regex = SetRegexpFromAny(interp, argv[i], regcomp_flags);
381 if (!regex) {
382 return JIM_ERR;
384 pattern = Jim_GetString(argv[i], NULL);
386 source_str = Jim_GetString(argv[i + 1], &source_len);
387 replace_str = Jim_GetString(argv[i + 2], NULL);
388 varname = argv[i + 3];
390 /* Create the result string */
391 resultObj = Jim_NewStringObj(interp, "", 0);
393 /* If an offset has been specified, adjust for that now.
394 * If it points past the end of the string, point to the terminating null
396 if (offset) {
397 if (offset > source_len) {
398 offset = source_len;
400 else if (offset < 0) {
401 offset = 0;
405 /* Copy the part before -start */
406 Jim_AppendString(interp, resultObj, source_str, offset);
409 * The following loop is to handle multiple matches within the
410 * same source string; each iteration handles one match and its
411 * corresponding substitution. If "-all" hasn't been specified
412 * then the loop body only gets executed once.
415 for (p = source_str + offset; *p != 0;) {
416 const char *src;
417 int match = regexec(regex, p, MAX_SUB_MATCHES, pmatch, 0);
419 if (match >= REG_BADPAT) {
420 char buf[100];
422 regerror(match, regex, buf, sizeof(buf));
423 Jim_SetResultFormatted(interp, "error while matching pattern: %s", buf);
424 return JIM_ERR;
426 if (match == REG_NOMATCH) {
427 break;
430 num_matches++;
433 * Copy the portion of the source string before the match to the
434 * result variable.
436 Jim_AppendString(interp, resultObj, p, pmatch[0].rm_so);
439 * Append the subSpec (replace_str) argument to the variable, making appropriate
440 * substitutions. This code is a bit hairy because of the backslash
441 * conventions and because the code saves up ranges of characters in
442 * subSpec to reduce the number of calls to Jim_SetVar.
445 for (src = replace_str; *src; src++) {
446 int index;
447 int c = *src;
449 if (c == '&') {
450 index = 0;
452 else if (c == '\\') {
453 c = *++src;
454 if ((c >= '0') && (c <= '9')) {
455 index = c - '0';
457 else if ((c == '\\') || (c == '&')) {
458 Jim_AppendString(interp, resultObj, src, 1);
459 continue;
461 else {
462 Jim_AppendString(interp, resultObj, src - 1, 2);
463 continue;
466 else {
467 Jim_AppendString(interp, resultObj, src, 1);
468 continue;
470 if ((index < MAX_SUB_MATCHES) && pmatch[index].rm_so != -1 && pmatch[index].rm_eo != -1) {
471 Jim_AppendString(interp, resultObj, p + pmatch[index].rm_so,
472 pmatch[index].rm_eo - pmatch[index].rm_so);
476 p += pmatch[0].rm_eo;
478 if (!opt_all || pmatch[0].rm_eo == 0 || pattern[0] == '^') {
479 /* If we are doing a single match, or we haven't moved with this match
480 * or this is an anchored match, we stop */
481 break;
486 * Copy the portion of the string after the last match to the
487 * result variable.
489 Jim_AppendString(interp, resultObj, p, -1);
491 /* And now set or return the result variable */
492 if (argc - i == 4) {
493 result = Jim_SetVariable(interp, varname, resultObj);
495 if (result == JIM_OK) {
496 Jim_SetResultInt(interp, num_matches);
498 else {
499 Jim_FreeObj(interp, resultObj);
502 else {
503 Jim_SetResult(interp, resultObj);
504 result = JIM_OK;
507 return result;
510 int Jim_regexpInit(Jim_Interp *interp)
512 Jim_CreateCommand(interp, "regexp", Jim_RegexpCmd, NULL, NULL);
513 Jim_CreateCommand(interp, "regsub", Jim_RegsubCmd, NULL, NULL);
514 return JIM_OK;