3 * (c) 2008 Steve Bennett <steveb@workware.net.au>
5 * Implements the regexp and regsub commands for Jim
7 * Uses C library regcomp()/regexec() for the matching.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials
20 * provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY
23 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
25 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
27 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
33 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 * The views and conclusions contained in the software and documentation
36 * are those of the authors and should not be interpreted as representing
37 * official policies, either expressed or implied, of the Jim Tcl Project.
39 * Based on code originally from Tcl 6.7:
41 * Copyright 1987-1991 Regents of the University of California
42 * Permission to use, copy, modify, and distribute this
43 * software and its documentation for any purpose and without
44 * fee is hereby granted, provided that the above copyright
45 * notice appear in all copies. The University of California
46 * makes no representations about the suitability of this
47 * software for any purpose. It is provided "as is" without
48 * express or implied warranty.
57 void FreeRegexpInternalRep(Jim_Interp
*interp
, Jim_Obj
*objPtr
)
59 regfree(objPtr
->internalRep
.regexpValue
.compre
);
60 Jim_Free(objPtr
->internalRep
.regexpValue
.compre
);
63 static Jim_ObjType regexpObjType
= {
65 FreeRegexpInternalRep
,
71 static regex_t
*SetRegexpFromAny(Jim_Interp
*interp
, Jim_Obj
*objPtr
, unsigned flags
)
77 /* Check if the object is already an uptodate variable */
78 if (objPtr
->typePtr
== ®expObjType
&&
79 objPtr
->internalRep
.regexpValue
.compre
&& objPtr
->internalRep
.regexpValue
.flags
== flags
) {
81 return objPtr
->internalRep
.regexpValue
.compre
;
84 /* Not a regexp or the flags do not match */
85 if (objPtr
->typePtr
== ®expObjType
) {
86 FreeRegexpInternalRep(interp
, objPtr
);
87 objPtr
->typePtr
= NULL
;
90 /* Get the string representation */
91 pattern
= Jim_GetString(objPtr
, NULL
);
92 compre
= Jim_Alloc(sizeof(regex_t
));
94 if ((ret
= regcomp(compre
, pattern
, REG_EXTENDED
| flags
)) != 0) {
97 regerror(ret
, compre
, buf
, sizeof(buf
));
98 Jim_SetResultFormatted(interp
, "couldn't compile regular expression pattern: %s", buf
);
104 objPtr
->typePtr
= ®expObjType
;
105 objPtr
->internalRep
.regexpValue
.flags
= flags
;
106 objPtr
->internalRep
.regexpValue
.compre
= compre
;
111 int Jim_RegexpCmd(Jim_Interp
*interp
, int argc
, Jim_Obj
*const *argv
)
119 regmatch_t
*pmatch
= NULL
;
123 const char *source_str
;
126 Jim_Obj
*resultListObj
= NULL
;
127 int regcomp_flags
= 0;
131 Jim_WrongNumArgs(interp
, 1, argv
,
132 "?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? ?--? exp string ?matchVar? ?subMatchVar ...?");
136 for (i
= 1; i
< argc
; i
++) {
137 if (Jim_CompareStringImmediate(interp
, argv
[i
], "-indices")) {
140 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-nocase")) {
141 regcomp_flags
|= REG_ICASE
;
143 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-line")) {
144 regcomp_flags
|= REG_NEWLINE
;
146 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-all")) {
149 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-inline")) {
152 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-start")) {
156 if (Jim_GetLong(interp
, argv
[i
], &offset
) != JIM_OK
) {
160 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "--")) {
165 const char *opt
= Jim_GetString(argv
[i
], NULL
);
178 regex
= SetRegexpFromAny(interp
, argv
[i
], regcomp_flags
);
183 pattern
= Jim_GetString(argv
[i
], NULL
);
184 source_str
= Jim_GetString(argv
[i
+ 1], &source_len
);
186 num_vars
= argc
- i
- 2;
190 Jim_SetResultString(interp
, "regexp match variables not allowed when using -inline",
199 pmatch
= Jim_Alloc((num_vars
+ 1) * sizeof(*pmatch
));
201 /* If an offset has been specified, adjust for that now.
202 * If it points past the end of the string, point to the terminating null
205 if (offset
> source_len
) {
206 source_str
+= source_len
;
208 else if (offset
> 0) {
209 source_str
+= offset
;
214 resultListObj
= Jim_NewListObj(interp
, NULL
, 0);
218 match
= regexec(regex
, source_str
, num_vars
+ 1, pmatch
, 0);
219 if (match
>= REG_BADPAT
) {
222 regerror(match
, regex
, buf
, sizeof(buf
));
223 Jim_SetResultFormatted(interp
, "error while matching pattern: %s", buf
);
228 if (match
== REG_NOMATCH
) {
234 if (opt_all
&& !opt_inline
) {
235 /* Just count the number of matches, so skip the substitution h */
240 * If additional variable names have been specified, return
241 * index information in those variables.
245 for (i
+= 2; opt_inline
? pmatch
[j
].rm_so
!= -1 : i
< argc
; i
++, j
++) {
249 resultObj
= Jim_NewListObj(interp
, NULL
, 0);
252 resultObj
= Jim_NewStringObj(interp
, "", 0);
255 if (pmatch
[j
].rm_so
== -1) {
257 Jim_ListAppendElement(interp
, resultObj
, Jim_NewIntObj(interp
, -1));
258 Jim_ListAppendElement(interp
, resultObj
, Jim_NewIntObj(interp
, -1));
262 int len
= pmatch
[j
].rm_eo
- pmatch
[j
].rm_so
;
265 Jim_ListAppendElement(interp
, resultObj
, Jim_NewIntObj(interp
,
266 offset
+ pmatch
[j
].rm_so
));
267 Jim_ListAppendElement(interp
, resultObj
, Jim_NewIntObj(interp
,
268 offset
+ pmatch
[j
].rm_so
+ len
- 1));
271 Jim_AppendString(interp
, resultObj
, source_str
+ pmatch
[j
].rm_so
, len
);
276 Jim_ListAppendElement(interp
, resultListObj
, resultObj
);
279 /* And now set the result variable */
280 result
= Jim_SetVariable(interp
, argv
[i
], resultObj
);
282 if (result
!= JIM_OK
) {
283 Jim_FreeObj(interp
, resultObj
);
290 if (opt_all
&& pattern
[0] != '^' && *source_str
) {
291 if (pmatch
[0].rm_eo
) {
292 source_str
+= pmatch
[0].rm_eo
;
303 if (result
== JIM_OK
) {
305 Jim_SetResult(interp
, resultListObj
);
308 Jim_SetResultInt(interp
, num_matches
);
316 #define MAX_SUB_MATCHES 50
318 int Jim_RegsubCmd(Jim_Interp
*interp
, int argc
, Jim_Obj
*const *argv
)
320 int regcomp_flags
= 0;
326 regmatch_t pmatch
[MAX_SUB_MATCHES
+ 1];
332 const char *source_str
;
334 const char *replace_str
;
339 Jim_WrongNumArgs(interp
, 1, argv
,
340 "?-nocase? ?-all? ?-line? ?-start offset? ?--? exp string subSpec ?varName?");
344 for (i
= 1; i
< argc
; i
++) {
345 if (Jim_CompareStringImmediate(interp
, argv
[i
], "-nocase")) {
346 regcomp_flags
|= REG_ICASE
;
348 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-line")) {
349 regcomp_flags
|= REG_NEWLINE
;
351 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-all")) {
354 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "-start")) {
358 if (Jim_GetLong(interp
, argv
[i
], &offset
) != JIM_OK
) {
362 else if (Jim_CompareStringImmediate(interp
, argv
[i
], "--")) {
367 const char *opt
= Jim_GetString(argv
[i
], NULL
);
376 if (argc
- i
!= 3 && argc
- i
!= 4) {
380 regex
= SetRegexpFromAny(interp
, argv
[i
], regcomp_flags
);
384 pattern
= Jim_GetString(argv
[i
], NULL
);
386 source_str
= Jim_GetString(argv
[i
+ 1], &source_len
);
387 replace_str
= Jim_GetString(argv
[i
+ 2], NULL
);
388 varname
= argv
[i
+ 3];
390 /* Create the result string */
391 resultObj
= Jim_NewStringObj(interp
, "", 0);
393 /* If an offset has been specified, adjust for that now.
394 * If it points past the end of the string, point to the terminating null
397 if (offset
> source_len
) {
400 else if (offset
< 0) {
405 /* Copy the part before -start */
406 Jim_AppendString(interp
, resultObj
, source_str
, offset
);
409 * The following loop is to handle multiple matches within the
410 * same source string; each iteration handles one match and its
411 * corresponding substitution. If "-all" hasn't been specified
412 * then the loop body only gets executed once.
415 for (p
= source_str
+ offset
; *p
!= 0;) {
417 int match
= regexec(regex
, p
, MAX_SUB_MATCHES
, pmatch
, 0);
419 if (match
>= REG_BADPAT
) {
422 regerror(match
, regex
, buf
, sizeof(buf
));
423 Jim_SetResultFormatted(interp
, "error while matching pattern: %s", buf
);
426 if (match
== REG_NOMATCH
) {
433 * Copy the portion of the source string before the match to the
436 Jim_AppendString(interp
, resultObj
, p
, pmatch
[0].rm_so
);
439 * Append the subSpec (replace_str) argument to the variable, making appropriate
440 * substitutions. This code is a bit hairy because of the backslash
441 * conventions and because the code saves up ranges of characters in
442 * subSpec to reduce the number of calls to Jim_SetVar.
445 for (src
= replace_str
; *src
; src
++) {
452 else if (c
== '\\') {
454 if ((c
>= '0') && (c
<= '9')) {
457 else if ((c
== '\\') || (c
== '&')) {
458 Jim_AppendString(interp
, resultObj
, src
, 1);
462 Jim_AppendString(interp
, resultObj
, src
- 1, 2);
467 Jim_AppendString(interp
, resultObj
, src
, 1);
470 if ((index
< MAX_SUB_MATCHES
) && pmatch
[index
].rm_so
!= -1 && pmatch
[index
].rm_eo
!= -1) {
471 Jim_AppendString(interp
, resultObj
, p
+ pmatch
[index
].rm_so
,
472 pmatch
[index
].rm_eo
- pmatch
[index
].rm_so
);
476 p
+= pmatch
[0].rm_eo
;
478 if (!opt_all
|| pmatch
[0].rm_eo
== 0 || pattern
[0] == '^') {
479 /* If we are doing a single match, or we haven't moved with this match
480 * or this is an anchored match, we stop */
486 * Copy the portion of the string after the last match to the
489 Jim_AppendString(interp
, resultObj
, p
, -1);
491 /* And now set or return the result variable */
493 result
= Jim_SetVariable(interp
, varname
, resultObj
);
495 if (result
== JIM_OK
) {
496 Jim_SetResultInt(interp
, num_matches
);
499 Jim_FreeObj(interp
, resultObj
);
503 Jim_SetResult(interp
, resultObj
);
510 int Jim_regexpInit(Jim_Interp
*interp
)
512 Jim_CreateCommand(interp
, "regexp", Jim_RegexpCmd
, NULL
, NULL
);
513 Jim_CreateCommand(interp
, "regsub", Jim_RegsubCmd
, NULL
, NULL
);