2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * %sccs.include.redist.c%
9 static char sccsid
[] = "$Id: ex_subst.c,v 8.33 1994/01/09 17:56:14 bostic Exp $ (Berkeley) $Date: 1994/01/09 17:56:14 $";
12 #include <sys/types.h>
22 #include "interrupt.h"
24 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
25 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
27 static int checkmatchsize
__P((SCR
*, regex_t
*));
28 static inline int regsub
__P((SCR
*,
29 char *, char **, size_t *, size_t *));
30 static void subst_intr
__P((int));
31 static int substitute
__P((SCR
*, EXF
*,
32 EXCMDARG
*, char *, regex_t
*, u_int
));
36 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
38 * Substitute on lines matching a pattern.
41 ex_substitute(sp
, ep
, cmdp
)
49 int delim
, eval
, reflags
, replaced
;
50 char *bp
, *ptrn
, *rep
, *p
, *t
;
53 * Skip leading white space.
56 * Historic vi allowed any non-alphanumeric to serve as the
57 * substitution command delimiter.
60 * If the arguments are empty, it's the same as &, i.e. we
61 * repeat the last substitution.
63 for (p
= cmdp
->argv
[0]->bp
,
64 len
= cmdp
->argv
[0]->len
; len
> 0; --len
, ++p
) {
69 return (ex_subagain(sp
, ep
, cmdp
));
72 return (substitute(sp
, ep
,
73 cmdp
, p
, &sp
->subre
, SUB_MUSTSETR
));
76 * Get the pattern string, toss escaped characters.
79 * Historic vi accepted any of the following forms:
81 * :s/abc/def/ change "abc" to "def"
82 * :s/abc/def change "abc" to "def"
83 * :s/abc/ delete "abc"
88 * Only toss an escape character if it escapes a delimiter.
89 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
90 * would be nice to be more regular, i.e. for each layer of
91 * escaping a single escape character is removed, but that's
92 * not how the historic vi worked.
94 for (ptrn
= t
= p
;;) {
95 if (p
[0] == '\0' || p
[0] == delim
) {
100 * Nul terminate the pattern string -- it's passed
101 * to regcomp which doesn't understand anything else.
106 if (p
[0] == '\\' && p
[1] == delim
)
111 /* If the pattern string is empty, use the last one. */
113 if (!F_ISSET(sp
, S_SUBRE_SET
)) {
115 "No previous regular expression.");
123 if (O_ISSET(sp
, O_EXTENDED
))
124 reflags
|= REG_EXTENDED
;
125 if (O_ISSET(sp
, O_IGNORECASE
))
126 reflags
|= REG_ICASE
;
128 /* Convert vi-style RE's to POSIX 1003.2 RE's. */
129 if (re_conv(sp
, &ptrn
, &replaced
))
132 /* Compile the RE. */
133 eval
= regcomp(&lre
, (char *)ptrn
, reflags
);
135 /* Free up any allocated memory. */
137 FREE_SPACE(sp
, ptrn
, 0);
140 re_error(sp
, eval
, &lre
);
148 * Historic practice is that substitutes set the search
149 * direction as well as both substitute and search RE's.
151 sp
->searchdir
= FORWARD
;
153 F_SET(sp
, S_SRE_SET
);
155 F_SET(sp
, S_SUBRE_SET
);
162 * Get the replacement string.
164 * The special character ~ (\~ if O_MAGIC not set) inserts the
165 * previous replacement string into this replacement string.
167 * The special character & (\& if O_MAGIC not set) matches the
168 * entire RE. No handling of & is required here, it's done by
173 * Only toss an escape character if it escapes a delimiter or
174 * an escape character, or if O_MAGIC is set and it escapes a
178 if (sp
->repl
!= NULL
)
179 FREE(sp
->repl
, sp
->repl_len
);
184 * Count ~'s to figure out how much space we need. We could
185 * special case nonexistent last patterns or whether or not
186 * O_MAGIC is set, but it's probably not worth the effort.
188 for (rep
= p
, len
= 0;
189 p
[0] != '\0' && p
[0] != delim
; ++p
, ++len
)
192 GET_SPACE_RET(sp
, bp
, blen
, len
);
193 for (t
= bp
, len
= 0, p
= rep
;;) {
194 if (p
[0] == '\0' || p
[0] == delim
) {
200 if (p
[1] == '\\' || p
[1] == delim
)
202 else if (p
[1] == '~') {
204 if (!O_ISSET(sp
, O_MAGIC
))
207 } else if (p
[0] == '~' && O_ISSET(sp
, O_MAGIC
)) {
209 memmove(t
, sp
->repl
, sp
->repl_len
);
217 if (sp
->repl
!= NULL
)
218 FREE(sp
->repl
, sp
->repl_len
);
219 if ((sp
->repl
= malloc(len
)) == NULL
) {
220 msgq(sp
, M_SYSERR
, NULL
);
221 FREE_SPACE(sp
, bp
, blen
);
224 memmove(sp
->repl
, bp
, len
);
226 FREE_SPACE(sp
, bp
, blen
);
229 if (checkmatchsize(sp
, &sp
->subre
))
231 return (substitute(sp
, ep
, cmdp
, p
, re
, flags
));
236 * [line [,line]] & [cgr] [count] [#lp]]
238 * Substitute using the last substitute RE and replacement pattern.
241 ex_subagain(sp
, ep
, cmdp
)
246 if (!F_ISSET(sp
, S_SUBRE_SET
)) {
247 msgq(sp
, M_ERR
, "No previous regular expression.");
250 return (substitute(sp
, ep
, cmdp
, cmdp
->argv
[0]->bp
, &sp
->subre
, 0));
255 * [line [,line]] ~ [cgr] [count] [#lp]]
257 * Substitute using the last RE and last substitute replacement pattern.
260 ex_subtilde(sp
, ep
, cmdp
)
265 if (!F_ISSET(sp
, S_SRE_SET
)) {
266 msgq(sp
, M_ERR
, "No previous regular expression.");
269 return (substitute(sp
, ep
, cmdp
, cmdp
->argv
[0]->bp
, &sp
->sre
, 0));
273 * The nasty part of the substitution is what happens when the replacement
274 * string contains newlines. It's a bit tricky -- consider the information
275 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
276 * to build a set of newline offsets which we use to break the line up later,
277 * when the replacement is done. Don't change it unless you're pretty damned
280 #define NEEDNEWLINE(sp) { \
281 if (sp->newl_len == sp->newl_cnt) { \
282 sp->newl_len += 25; \
283 REALLOC(sp, sp->newl, size_t *, \
284 sp->newl_len * sizeof(size_t)); \
285 if (sp->newl == NULL) { \
292 #define BUILD(sp, l, len) { \
293 if (lbclen + (len) > lblen) { \
294 lblen += MAX(lbclen + (len), 256); \
295 REALLOC(sp, lb, char *, lblen); \
301 memmove(lb + lbclen, l, len); \
305 #define NEEDSP(sp, len, pnt) { \
306 if (lbclen + (len) > lblen) { \
307 lblen += MAX(lbclen + (len), 256); \
308 REALLOC(sp, lb, char *, lblen); \
319 * Do the substitution. This stuff is *really* tricky. There are
320 * lots of special cases, and general nastiness. Don't mess with it
321 * unless you're pretty confident.
324 substitute(sp
, ep
, cmdp
, s
, re
, flags
)
335 size_t blen
, cnt
, last
, lbclen
, lblen
, len
, llen
, offset
, saved_offset
;
336 int didsub
, do_eol_match
, eflags
, empty_ok
, eval
;
337 int linechanged
, matched
, quit
, rval
;
338 int cflag
, gflag
, lflag
, nflag
, pflag
, rflag
;
342 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
343 * it only displayed the last change. I'd disallow them, but they are
344 * useful in combination with the [v]global commands. In the current
345 * model the problem is combining them with the 'c' flag -- the screen
346 * would have to flip back and forth between the confirm screen and the
347 * ex print screen, which would be pretty awful. We do display all
348 * changes, though, for what that's worth.
351 * Historic vi was fairly strict about the order of "options", the
352 * count, and "flags". I'm somewhat fuzzy on the difference between
353 * options and flags, anyway, so this is a simpler approach, and we
354 * just take it them in whatever order the user gives them. (The ex
355 * usage statement doesn't reflect this.)
357 cflag
= gflag
= lflag
= nflag
= pflag
= rflag
= 0;
358 for (lno
= OOBLNO
; *s
!= '\0'; ++s
)
363 case '0': case '1': case '2': case '3': case '4':
364 case '5': case '6': case '7': case '8': case '9':
368 lno
= strtoul(s
, &s
, 10);
369 if (*s
== '\0') /* Loop increment correction. */
371 if (errno
== ERANGE
) {
373 msgq(sp
, M_ERR
, "Count overflow.");
374 else if (lno
== LONG_MIN
)
375 msgq(sp
, M_ERR
, "Count underflow.");
377 msgq(sp
, M_SYSERR
, NULL
);
381 * In historic vi, the count was inclusive from the
384 cmdp
->addr1
.lno
= cmdp
->addr2
.lno
;
385 cmdp
->addr2
.lno
+= lno
- 1;
403 if (LF_ISSET(SUB_FIRST
)) {
405 "Regular expression specified; r flag meaningless.");
408 if (!F_ISSET(sp
, S_SUBRE_SET
)) {
410 "No previous regular expression.");
419 if (*s
!= '\0' || !rflag
&& LF_ISSET(SUB_MUSTSETR
)) {
420 usage
: msgq(sp
, M_ERR
, "Usage: %s", cmdp
->cmd
->usage
);
424 if (IN_VI_MODE(sp
) && cflag
&& (lflag
|| nflag
|| pflag
)) {
426 "The #, l and p flags may not be combined with the c flag in vi mode.");
430 if (!F_ISSET(sp
, S_GLOBAL
))
431 SET_UP_INTERRUPTS(subst_intr
);
434 * bp: if interactive, line cache
435 * blen: if interactive, line cache length
436 * lb: build buffer pointer.
437 * lbclen: current length of built buffer.
438 * lblen; length of build buffer.
441 blen
= lbclen
= lblen
= 0;
443 /* For each line... */
444 for (matched
= quit
= 0, lno
= cmdp
->addr1
.lno
,
445 elno
= cmdp
->addr2
.lno
; !quit
&& lno
<= elno
; ++lno
) {
447 /* Someone's unhappy, time to stop. */
448 if (F_ISSET(sp
, S_INTERRUPTED
)) {
449 if (!F_ISSET(sp
, S_GLOBAL
))
450 msgq(sp
, M_INFO
, "Interrupted.");
455 if ((s
= file_gline(sp
, ep
, lno
, &llen
)) == NULL
) {
456 GETLINE_ERR(sp
, lno
);
461 * Make a local copy if doing confirmation -- when calling
462 * the confirm routine we're likely to lose the cached copy.
466 GET_SPACE_RET(sp
, bp
, blen
, llen
);
468 ADD_SPACE_RET(sp
, bp
, blen
, llen
);
469 memmove(bp
, s
, llen
);
473 /* Start searching from the beginning. */
477 /* Reset the build buffer offset. */
480 /* Reset empty match flag. */
484 * We don't want to have to do a setline if the line didn't
485 * change -- keep track of whether or not this line changed.
486 * If doing confirmations, don't want to keep setting the
487 * line if change is refused -- keep track of substitutions.
489 didsub
= linechanged
= 0;
491 /* New line, do an EOL match. */
494 /* It's not nul terminated, but we pretend it is. */
495 eflags
= REG_STARTEND
;
498 * The search area is from s + offset to the EOL.
500 * Generally, sp->match[0].rm_so is the offset of the start
501 * of the match from the start of the search, and offset is
502 * the offset of the start of the last search.
504 nextmatch
: sp
->match
[0].rm_so
= 0;
505 sp
->match
[0].rm_eo
= len
;
507 /* Get the next match. */
509 (char *)s
+ offset
, re
->re_nsub
+ 1, sp
->match
, eflags
);
512 * There wasn't a match or if there was an error, deal with
513 * it. If there was a previous match in this line, resolve
514 * the changes into the database. Otherwise, just move on.
516 if (eval
== REG_NOMATCH
)
519 re_error(sp
, eval
, re
);
524 /* Only the first search can match an anchored expression. */
525 eflags
|= REG_NOTBOL
;
529 * It's possible to match 0-length strings -- for example, the
530 * command s;a*;X;, when matched against the string "aabb" will
531 * result in "XbXbX", i.e. the matches are "aa", the space
532 * between the b's and the space between the b's and the end of
533 * the string. There is a similar space between the beginning
534 * of the string and the a's. The rule that we use (because vi
535 * historically used it) is that any 0-length match, occurring
536 * immediately after a match, is ignored. Otherwise, the above
537 * example would have resulted in "XXbXbX". Another example is
538 * incorrectly using " *" to replace groups of spaces with one
541 * The way we do this is that if we just had a successful match,
542 * the starting offset does not skip characters, and the match
543 * is empty, ignore the match and move forward. If there's no
544 * more characters in the string, we were attempting to match
545 * after the last character, so quit.
548 sp
->match
[0].rm_so
== 0 && sp
->match
[0].rm_eo
== 0) {
552 BUILD(sp
, s
+ offset
, 1)
558 /* Confirm change. */
561 * Set the cursor position for confirmation. Note,
562 * if we matched on a '$', the cursor may be past
566 * We may want to "fix" this in the confirm routine,
567 * if the confirm routine should be able to display
570 from
.lno
= to
.lno
= lno
;
571 from
.cno
= sp
->match
[0].rm_so
+ offset
;
572 to
.cno
= sp
->match
[0].rm_eo
;
574 from
.cno
= to
.cno
= 0;
578 if (from
.cno
>= llen
)
581 switch (sp
->s_confirm(sp
, ep
, &from
, &to
)) {
586 BUILD(sp
, s
+offset
, sp
->match
[0].rm_eo
);
589 /* Set the quit flag. */
592 /* If interruptible, pass the info back. */
593 if (F_ISSET(sp
, S_INTERRUPTIBLE
))
594 F_SET(sp
, S_INTERRUPTED
);
597 * If any changes, resolve them, otherwise
598 * return to the main loop.
604 /* Copy the bytes before the match into the build buffer. */
605 BUILD(sp
, s
+ offset
, sp
->match
[0].rm_so
);
608 * Cursor moves to last line changed, unless doing confirm,
609 * in which case don't move it.
612 * Historic vi just put the cursor on the first non-blank
613 * of the last line changed. This might be better.
617 sp
->cno
= sp
->match
[0].rm_so
+ offset
;
620 /* Substitute the matching bytes. */
622 if (regsub(sp
, s
+ offset
, &lb
, &lbclen
, &lblen
))
625 /* Set the change flag so we know this line was modified. */
628 /* Move past the matched bytes. */
629 skip
: offset
+= sp
->match
[0].rm_eo
;
630 len
-= sp
->match
[0].rm_eo
;
632 /* A match cannot be followed by an empty pattern. */
636 * If doing a global change with confirmation, we have to
637 * update the screen. The basic idea is to store the line
638 * so the screen update routines can find it, and restart.
640 if (didsub
&& cflag
&& gflag
) {
642 * The new search offset will be the end of the
645 saved_offset
= lbclen
;
647 /* Copy the rest of the line. */
649 BUILD(sp
, s
+ offset
, len
)
651 /* Set the new offset. */
652 offset
= saved_offset
;
654 /* Store inserted lines, adjusting the build buffer. */
658 cnt
< sp
->newl_cnt
; ++cnt
, ++lno
, ++elno
) {
659 if (file_iline(sp
, ep
, lno
,
660 lb
+ last
, sp
->newl
[cnt
] - last
))
662 last
= sp
->newl
[cnt
] + 1;
663 ++sp
->rptlines
[L_ADDED
];
670 /* Store and retrieve the line. */
671 if (file_sline(sp
, ep
, lno
, lb
+ last
, lbclen
))
673 if ((s
= file_gline(sp
, ep
, lno
, &llen
)) == NULL
) {
674 GETLINE_ERR(sp
, lno
);
677 ADD_SPACE_RET(sp
, bp
, blen
, llen
)
678 memmove(bp
, s
, llen
);
682 /* Restart the build. */
684 BUILD(sp
, s
, offset
);
687 * If we haven't already done the after-the-string
688 * match, do one. Set REG_NOTEOL so the '$' pattern
695 eflags
|= REG_NOTEOL
;
703 * If at the end of the string, do a test for the after
704 * the string match. Set REG_NOTEOL so the '$' pattern
707 if (gflag
&& do_eol_match
) {
710 eflags
|= REG_NOTEOL
;
715 endmatch
: if (!linechanged
)
718 /* Copy any remaining bytes into the build buffer. */
720 BUILD(sp
, s
+ offset
, len
)
722 /* Store inserted lines, adjusting the build buffer. */
726 cnt
< sp
->newl_cnt
; ++cnt
, ++lno
, ++elno
) {
727 if (file_iline(sp
, ep
,
728 lno
, lb
+ last
, sp
->newl
[cnt
] - last
))
730 last
= sp
->newl
[cnt
] + 1;
731 ++sp
->rptlines
[L_ADDED
];
737 /* Store the changed line. */
738 if (file_sline(sp
, ep
, lno
, lb
+ last
, lbclen
))
741 /* Update changed line counter. */
742 ++sp
->rptlines
[L_CHANGED
];
744 /* Display as necessary. */
745 if (lflag
|| nflag
|| pflag
) {
746 from
.lno
= to
.lno
= lno
;
747 from
.cno
= to
.cno
= 0;
749 ex_print(sp
, ep
, &from
, &to
, E_F_LIST
);
751 ex_print(sp
, ep
, &from
, &to
, E_F_HASH
);
753 ex_print(sp
, ep
, &from
, &to
, E_F_PRINT
);
758 * If not in a global command, and nothing matched, say so.
759 * Else, if none of the lines displayed, put something up.
762 if (!F_ISSET(sp
, S_GLOBAL
))
763 msgq(sp
, M_INFO
, "No match found.");
764 } else if (!lflag
&& !nflag
&& !pflag
)
765 F_SET(EXP(sp
), EX_AUTOPRINT
);
773 if (!F_ISSET(sp
, S_GLOBAL
))
774 TEAR_DOWN_INTERRUPTS
;
777 FREE_SPACE(sp
, bp
, blen
);
783 * Do the substitution for a regular expression.
786 regsub(sp
, ip
, lbp
, lbclenp
, lblenp
)
788 char *ip
; /* Input line. */
790 size_t *lbclenp
, *lblenp
;
792 enum { C_NOTSET
, C_LOWER
, C_ONELOWER
, C_ONEUPPER
, C_UPPER
} conv
;
793 size_t lbclen
, lblen
; /* Local copies. */
794 size_t mlen
; /* Match length. */
795 size_t rpl
; /* Remaining replacement length. */
796 char *rp
; /* Replacement pointer. */
798 int no
; /* Match replacement offset. */
799 char *p
, *t
; /* Buffer pointers. */
800 char *lb
; /* Local copies. */
802 lb
= *lbp
; /* Get local copies. */
809 * There are some special sequences that vi provides in the
810 * replacement patterns.
811 * & string the RE matched (\& if nomagic set)
812 * \# n-th regular subexpression
813 * \E end \U, \L conversion
814 * \e end \U, \L conversion
815 * \l convert the next character to lower-case
816 * \L convert to lower-case, until \E, \e, or end of replacement
817 * \u convert the next character to upper-case
818 * \U convert to upper-case, until \E, \e, or end of replacement
820 * Otherwise, since this is the lowest level of replacement, discard
821 * all escape characters. This (hopefully) follows historic practice.
823 #define ADDCH(ch) { \
824 CHAR_T __ch = (ch); \
825 u_int __value = term_key_val(sp, __ch); \
826 if (__value == K_CR || __value == K_NL) { \
828 sp->newl[sp->newl_cnt++] = lbclen; \
829 } else if (conv != C_NOTSET) { \
836 __ch = tolower(__ch); \
843 __ch = toupper(__ch); \
854 for (rp
= sp
->repl
, rpl
= sp
->repl_len
, p
= lb
+ lbclen
; rpl
--;) {
855 switch (ch
= *rp
++) {
857 if (O_ISSET(sp
, O_MAGIC
)) {
868 if (!O_ISSET(sp
, O_MAGIC
)) {
874 case '0': case '1': case '2': case '3': case '4':
875 case '5': case '6': case '7': case '8': case '9':
877 subzero
: if (sp
->match
[no
].rm_so
== -1 ||
878 sp
->match
[no
].rm_eo
== -1)
881 sp
->match
[no
].rm_eo
- sp
->match
[no
].rm_so
;
882 for (t
= ip
+ sp
->match
[no
].rm_so
; mlen
--; ++t
)
914 *lbp
= lb
; /* Update caller's information. */
921 checkmatchsize(sp
, re
)
925 /* Build nsub array as necessary. */
926 if (sp
->matchsize
< re
->re_nsub
+ 1) {
927 sp
->matchsize
= re
->re_nsub
+ 1;
928 REALLOC(sp
, sp
->match
,
929 regmatch_t
*, sp
->matchsize
* sizeof(regmatch_t
));
930 if (sp
->match
== NULL
) {
940 * Set the interrupt bit in any screen that is interruptible.
943 * In the future this may be a problem. The user should be able to move to
944 * another screen and keep typing while this runs. If so, and the user has
945 * more than one substitute running, it will be hard to decide which one to
954 for (sp
= __global_list
->dq
.cqh_first
;
955 sp
!= (void *)&__global_list
->dq
; sp
= sp
->q
.cqe_next
)
956 if (F_ISSET(sp
, S_INTERRUPTIBLE
))
957 F_SET(sp
, S_INTERRUPTED
);