2 * (c) 2008 Steve Bennett <steveb@workware.net.au>
4 * Implements the regexp and regsub commands for Jim
6 * Uses C library regcomp()/regexec() for the matching.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY
22 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
26 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
30 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
32 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * The views and conclusions contained in the software and documentation
35 * are those of the authors and should not be interpreted as representing
36 * official policies, either expressed or implied, of the Jim Tcl Project.
38 * Based on code originally from Tcl 6.7:
40 * Copyright 1987-1991 Regents of the University of California
41 * Permission to use, copy, modify, and distribute this
42 * software and its documentation for any purpose and without
43 * fee is hereby granted, provided that the above copyright
44 * notice appear in all copies. The University of California
45 * makes no representations about the suitability of this
46 * software for any purpose. It is provided "as is" without
47 * express or implied warranty.
56 /* REVISIT: Would be useful in jim.h */
57 static void Jim_SetIntResult(Jim_Interp
*interp
, jim_wide wide
)
59 Jim_SetResult(interp
, Jim_NewIntObj(interp
, wide
));
63 * REVISIT: Should cache a number of compiled regexps for performance reasons.
66 compile_regexp(Jim_Interp
*interp
, const char *pattern
, int flags
)
70 regex_t
*result
= (regex_t
*)Jim_Alloc(sizeof(*result
));
72 if ((ret
= regcomp(result
, pattern
, REG_EXTENDED
| flags
)) != 0) {
74 regerror(ret
, result
, buf
, sizeof(buf
));
75 Jim_SetResult(interp
, Jim_NewEmptyStringObj(interp
));
76 Jim_AppendStrings(interp
, Jim_GetResult(interp
), "couldn't compile regular expression pattern: ", buf
, NULL
);
83 int Jim_RegexpCmd(Jim_Interp
*interp
, int argc
, Jim_Obj
*const *argv
)
91 regmatch_t
*pmatch
= NULL
;
95 const char *source_str
;
98 Jim_Obj
*resultListObj
= NULL
;
99 int regcomp_flags
= 0;
103 Jim_WrongNumArgs(interp
, 1, argv
, "?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? exp string ?matchVar? ?subMatchVar ...?");
107 for (i
= 1; i
< argc
; i
++) {
108 if (Jim_CompareStringImmediate(interp
, argv
[i
], "-indices")) {
111 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-nocase")) {
112 regcomp_flags
|= REG_ICASE
;
114 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-line")) {
115 regcomp_flags
|= REG_NEWLINE
;
117 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-all")) {
120 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-inline")) {
123 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-start")) {
127 if (Jim_GetLong(interp
, argv
[i
], &offset
) != JIM_OK
) {
131 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "--")) {
136 const char *opt
= Jim_GetString(argv
[i
], NULL
);
148 pattern
= Jim_GetString(argv
[i
], NULL
);
149 regex
= compile_regexp(interp
, pattern
, regcomp_flags
);
154 source_str
= Jim_GetString(argv
[i
+ 1], &source_len
);
156 num_vars
= argc
- i
- 2;
160 Jim_SetResultString(interp
, "regexp match variables not allowed when using -inline", -1);
168 pmatch
= Jim_Alloc((num_vars
+ 1) * sizeof(*pmatch
));
170 /* If an offset has been specified, adjust for that now.
171 * If it points past the end of the string, point to the terminating null
174 if (offset
> source_len
) {
175 source_str
+= source_len
;
176 } else if (offset
> 0) {
177 source_str
+= offset
;
182 resultListObj
= Jim_NewListObj(interp
, NULL
, 0);
186 match
= regexec(regex
, source_str
, num_vars
+ 1, pmatch
, 0);
187 if (match
>= REG_BADPAT
) {
189 regerror(match
, regex
, buf
, sizeof(buf
));
190 Jim_SetResultString(interp
, "", 0);
191 Jim_AppendStrings(interp
, Jim_GetResult(interp
), "error while matching pattern: ", buf
, NULL
);
196 if (match
== REG_NOMATCH
) {
202 if (opt_all
&& !opt_inline
) {
203 /* Just count the number of matches, so skip the substitution h*/
208 * If additional variable names have been specified, return
209 * index information in those variables.
212 //fprintf(stderr, "source_str=%s, [0].rm_eo=%d\n", source_str, pmatch[0].rm_eo);
215 for (i
+= 2; opt_inline
? pmatch
[j
].rm_so
!= -1 : i
< argc
; i
++, j
++) {
219 resultObj
= Jim_NewListObj(interp
, NULL
, 0);
222 resultObj
= Jim_NewStringObj(interp
, "", 0);
225 if (pmatch
[j
].rm_so
== -1) {
227 Jim_ListAppendElement(interp
, resultObj
, Jim_NewIntObj(interp
, -1));
228 Jim_ListAppendElement(interp
, resultObj
, Jim_NewIntObj(interp
, -1));
231 int len
= pmatch
[j
].rm_eo
- pmatch
[j
].rm_so
;
233 Jim_ListAppendElement(interp
, resultObj
, Jim_NewIntObj(interp
, offset
+ pmatch
[j
].rm_so
));
234 Jim_ListAppendElement(interp
, resultObj
, Jim_NewIntObj(interp
, offset
+ pmatch
[j
].rm_so
+ len
- 1));
236 Jim_AppendString(interp
, resultObj
, source_str
+ pmatch
[j
].rm_so
, len
);
241 Jim_ListAppendElement(interp
, resultListObj
, resultObj
);
244 /* And now set the result variable */
245 result
= Jim_SetVariable(interp
, argv
[i
], resultObj
);
247 if (result
!= JIM_OK
) {
248 Jim_SetResult(interp
, Jim_NewEmptyStringObj(interp
));
249 Jim_AppendStrings(interp
, Jim_GetResult(interp
), "couldn't set variable \"", Jim_GetString(argv
[i
], NULL
), "\"", NULL
);
250 Jim_FreeObj(interp
, resultObj
);
257 if (opt_all
&& pattern
[0] != '^' && *source_str
) {
258 if (pmatch
[0].rm_eo
) {
259 source_str
+= pmatch
[0].rm_eo
;
270 if (result
== JIM_OK
) {
272 Jim_SetResult(interp
, resultListObj
);
275 Jim_SetIntResult(interp
, num_matches
);
285 #define MAX_SUB_MATCHES 10
287 int Jim_RegsubCmd(Jim_Interp
*interp
, int argc
, Jim_Obj
*const *argv
)
289 int regcomp_flags
= 0;
294 int result
= JIM_ERR
;
295 regmatch_t pmatch
[MAX_SUB_MATCHES
+ 1];
301 const char *source_str
;
303 const char *replace_str
;
308 Jim_WrongNumArgs(interp
, 1, argv
, "?-nocase? ?-all? exp string subSpec varName");
312 for (i
= 1; i
< argc
; i
++) {
313 if (Jim_CompareStringImmediate(interp
, argv
[i
], "-nocase")) {
314 regcomp_flags
|= REG_ICASE
;
316 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-line")) {
317 regcomp_flags
|= REG_NEWLINE
;
319 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-all")) {
322 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-start")) {
326 if (Jim_GetLong(interp
, argv
[i
], &offset
) != JIM_OK
) {
330 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "--")) {
335 const char *opt
= Jim_GetString(argv
[i
], NULL
);
347 pattern
= Jim_GetString(argv
[i
], NULL
);
348 regex
= compile_regexp(interp
, pattern
, regcomp_flags
);
353 source_str
= Jim_GetString(argv
[i
+ 1], &source_len
);
354 replace_str
= Jim_GetString(argv
[i
+ 2], NULL
);
355 varname
= argv
[i
+ 3];
357 /* Create the result string */
358 resultObj
= Jim_NewStringObj(interp
, "", 0);
360 /* If an offset has been specified, adjust for that now.
361 * If it points past the end of the string, point to the terminating null
364 if (offset
> source_len
) {
366 } else if (offset
< 0) {
371 /* Copy the part before -start */
372 Jim_AppendString(interp
, resultObj
, source_str
, offset
);
375 * The following loop is to handle multiple matches within the
376 * same source string; each iteration handles one match and its
377 * corresponding substitution. If "-all" hasn't been specified
378 * then the loop body only gets executed once.
381 for (p
= source_str
+ offset
; *p
!= 0; ) {
383 int match
= regexec(regex
, p
, MAX_SUB_MATCHES
, pmatch
, 0);
384 if (match
>= REG_BADPAT
) {
386 regerror(match
, regex
, buf
, sizeof(buf
));
387 Jim_SetResultString(interp
, "", 0);
388 Jim_AppendStrings(interp
, Jim_GetResult(interp
), "error while matching pattern: ", buf
, NULL
);
391 if (match
== REG_NOMATCH
) {
398 * Copy the portion of the source string before the match to the
401 Jim_AppendString(interp
, resultObj
, p
, pmatch
[0].rm_so
);
404 * Append the subSpec (replace_str) argument to the variable, making appropriate
405 * substitutions. This code is a bit hairy because of the backslash
406 * conventions and because the code saves up ranges of characters in
407 * subSpec to reduce the number of calls to Jim_SetVar.
410 for (src
= replace_str
; *src
; src
++) {
417 else if (c
== '\\') {
419 if ((c
>= '0') && (c
<= '9')) {
422 else if ((c
== '\\') || (c
== '&')) {
423 Jim_AppendString(interp
, resultObj
, src
, 1);
427 Jim_AppendString(interp
, resultObj
, src
- 1, 2);
432 Jim_AppendString(interp
, resultObj
, src
, 1);
435 if ((index
< MAX_SUB_MATCHES
) && pmatch
[index
].rm_so
!= -1 && pmatch
[index
].rm_eo
!= -1) {
436 Jim_AppendString(interp
, resultObj
, p
+ pmatch
[index
].rm_so
, pmatch
[index
].rm_eo
- pmatch
[index
].rm_so
);
440 p
+= pmatch
[0].rm_eo
;
442 if (!opt_all
|| pmatch
[0].rm_eo
== 0 || pattern
[0] == '^') {
443 /* If we are doing a single match, or we haven't moved with this match
444 * or this is an anchored match, we stop */
450 * Copy the portion of the string after the last match to the
453 Jim_AppendString(interp
, resultObj
, p
, -1);
455 /* And now set the result variable */
456 result
= Jim_SetVariable(interp
, varname
, resultObj
);
458 if (result
== JIM_OK
) {
459 Jim_SetIntResult(interp
, num_matches
);
462 Jim_SetResult(interp
, Jim_NewEmptyStringObj(interp
));
463 Jim_AppendStrings(interp
, Jim_GetResult(interp
), "couldn't set variable \"", Jim_GetString(varname
, NULL
), "\"", NULL
);
464 Jim_FreeObj(interp
, resultObj
);
473 int Jim_OnLoad(Jim_Interp
*interp
)
475 Jim_InitExtension(interp
);
476 if (Jim_PackageProvide(interp
, "regexp", "1.0", JIM_ERRMSG
) != JIM_OK
) {
479 Jim_CreateCommand(interp
, "regexp", Jim_RegexpCmd
, NULL
, NULL
);
480 Jim_CreateCommand(interp
, "regsub", Jim_RegsubCmd
, NULL
, NULL
);