2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
5 * %sccs.include.redist.c%
9 static char sccsid
[] = "$Id: ex_subst.c,v 8.19 1993/11/29 14:15:19 bostic Exp $ (Berkeley) $Date: 1993/11/29 14:15:19 $";
12 #include <sys/types.h>
22 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
23 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
25 static int checkmatchsize
__P((SCR
*, regex_t
*));
26 static inline int regsub
__P((SCR
*,
27 char *, char **, size_t *, size_t *));
28 static int substitute
__P((SCR
*, EXF
*,
29 EXCMDARG
*, char *, regex_t
*, u_int
));
32 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
34 * Substitute on lines matching a pattern.
37 ex_substitute(sp
, ep
, cmdp
)
45 int delim
, eval
, reflags
, replaced
;
46 char *bp
, *ptrn
, *rep
, *p
, *t
;
49 * Skip leading white space. Historic vi allowed any non-
50 * alphanumeric to serve as the substitution command delimiter.
52 for (p
= cmdp
->argv
[0]; isblank(*p
); ++p
);
55 return (substitute(sp
, ep
,
56 cmdp
, p
, &sp
->subre
, SUB_MUSTSETR
));
59 * Get the pattern string, toss escaped characters.
62 * Historic vi accepted any of the following forms:
64 * :s/abc/def/ change "abc" to "def"
65 * :s/abc/def change "abc" to "def"
66 * :s/abc/ delete "abc"
71 * Only toss an escape character if it escapes a delimiter.
72 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
73 * would be nice to be more regular, i.e. for each layer of
74 * escaping a single escape character is removed, but that's
75 * not how the historic vi worked.
77 for (ptrn
= t
= p
;;) {
78 if (p
[0] == '\0' || p
[0] == delim
) {
83 * Nul terminate the pattern string -- it's passed
84 * to regcomp which doesn't understand anything else.
89 if (p
[0] == '\\' && p
[1] == delim
)
94 /* If the pattern string is empty, use the last one. */
96 if (!F_ISSET(sp
, S_SUBRE_SET
)) {
98 "No previous regular expression.");
106 if (O_ISSET(sp
, O_EXTENDED
))
107 reflags
|= REG_EXTENDED
;
108 if (O_ISSET(sp
, O_IGNORECASE
))
109 reflags
|= REG_ICASE
;
111 /* Convert vi-style RE's to POSIX 1003.2 RE's. */
112 if (re_conv(sp
, &ptrn
, &replaced
))
115 /* Compile the RE. */
116 eval
= regcomp(&lre
, (char *)ptrn
, reflags
);
118 /* Free up any allocated memory. */
120 FREE_SPACE(sp
, ptrn
, 0);
123 re_error(sp
, eval
, &lre
);
128 * Set saved RE. Historic practice is that
129 * substitutes set direction as well as the RE.
132 sp
->searchdir
= FORWARD
;
133 F_SET(sp
, S_SUBRE_SET
);
140 * Get the replacement string.
142 * The special character ~ (\~ if O_MAGIC not set) inserts the
143 * previous replacement string into this replacement string.
145 * The special character & (\& if O_MAGIC not set) matches the
146 * entire RE. No handling of & is required here, it's done by
151 * Only toss an escape character if it escapes a delimiter or
152 * if O_MAGIC is set and it escapes a tilde.
155 if (sp
->repl
!= NULL
)
156 FREE(sp
->repl
, sp
->repl_len
);
161 * Count ~'s to figure out how much space we need. We could
162 * special case nonexistent last patterns or whether or not
163 * O_MAGIC is set, but it's probably not worth the effort.
165 for (rep
= p
, len
= 0;
166 p
[0] != '\0' && p
[0] != delim
; ++p
, ++len
)
169 GET_SPACE(sp
, bp
, blen
, len
);
170 for (t
= bp
, len
= 0, p
= rep
;;) {
171 if (p
[0] == '\0' || p
[0] == delim
) {
179 else if (p
[1] == '~') {
181 if (!O_ISSET(sp
, O_MAGIC
))
184 } else if (p
[0] == '~' && O_ISSET(sp
, O_MAGIC
)) {
186 memmove(t
, sp
->repl
, sp
->repl_len
);
194 if (sp
->repl
!= NULL
)
195 FREE(sp
->repl
, sp
->repl_len
);
196 if ((sp
->repl
= malloc(len
)) == NULL
) {
197 msgq(sp
, M_SYSERR
, NULL
);
198 FREE_SPACE(sp
, bp
, blen
);
201 memmove(sp
->repl
, bp
, len
);
203 FREE_SPACE(sp
, bp
, blen
);
206 if (checkmatchsize(sp
, &sp
->subre
))
208 return (substitute(sp
, ep
, cmdp
, p
, re
, flags
));
213 * [line [,line]] & [cgr] [count] [#lp]]
215 * Substitute using the last substitute RE and replacement pattern.
218 ex_subagain(sp
, ep
, cmdp
)
223 if (!F_ISSET(sp
, S_SUBRE_SET
)) {
224 msgq(sp
, M_ERR
, "No previous regular expression.");
227 return (substitute(sp
, ep
, cmdp
, cmdp
->argv
[0], &sp
->subre
, 0));
232 * [line [,line]] ~ [cgr] [count] [#lp]]
234 * Substitute using the last RE and last substitute replacement pattern.
237 ex_subtilde(sp
, ep
, cmdp
)
242 if (!F_ISSET(sp
, S_SRE_SET
)) {
243 msgq(sp
, M_ERR
, "No previous regular expression.");
246 return (substitute(sp
, ep
, cmdp
, cmdp
->argv
[0], &sp
->sre
, 0));
250 * The nasty part of the substitution is what happens when the replacement
251 * string contains newlines. It's a bit tricky -- consider the information
252 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
253 * to build a set of newline offets which we use to break the line up later,
254 * when the replacement is done. Don't change it unless you're pretty damned
257 #define NEEDNEWLINE(sp) { \
258 if (sp->newl_len == sp->newl_cnt) { \
259 sp->newl_len += 25; \
260 if ((sp->newl = realloc(sp->newl, \
261 sp->newl_len * sizeof(size_t))) == NULL) { \
262 msgq(sp, M_SYSERR, NULL); \
269 #define BUILD(sp, l, len) { \
270 if (lbclen + (len) > lblen) { \
271 lblen += MAX(lbclen + (len), 256); \
272 if ((lb = realloc(lb, lblen)) == NULL) { \
273 msgq(sp, M_SYSERR, NULL); \
278 memmove(lb + lbclen, l, len); \
282 #define NEEDSP(sp, len, pnt) { \
283 if (lbclen + (len) > lblen) { \
284 lblen += MAX(lbclen + (len), 256); \
285 if ((lb = realloc(lb, lblen)) == NULL) { \
286 msgq(sp, M_SYSERR, NULL); \
296 * Do the substitution. This stuff is *really* tricky. There are
297 * lots of special cases, and general nastiness. Don't mess with it
298 * unless you're pretty confident.
301 substitute(sp
, ep
, cmdp
, s
, re
, flags
)
310 recno_t elno
, lno
, lastline
;
311 size_t blen
, cnt
, last
, lbclen
, lblen
, len
, offset
;
312 int do_eol_match
, eflags
, eval
, linechanged
, quit
;
313 int cflag
, gflag
, lflag
, nflag
, pflag
, rflag
;
317 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
318 * it only displayed the last change. I'd disallow them, but they are
319 * useful in combination with the [v]global commands. In the current
320 * model the problem is combining them with the 'c' flag -- the screen
321 * would have to flip back and forth between the confirm screen and the
322 * ex print screen, which would be pretty awful. We do display all
323 * changes, though, for what that's worth.
326 * Historic vi was fairly strict about the order of "options", the
327 * count, and "flags". I'm somewhat fuzzy on the difference between
328 * options and flags, anyway, so this is a simpler approach, and we
329 * just take it them in whatever order the user gives them. (The ex
330 * usage statement doesn't reflect this.)
332 cflag
= gflag
= lflag
= nflag
= pflag
= rflag
= 0;
333 for (lno
= OOBLNO
; *s
!= '\0'; ++s
)
338 case '0': case '1': case '2': case '3': case '4':
339 case '5': case '6': case '7': case '8': case '9':
343 lno
= strtoul(s
, &s
, 10);
344 if (errno
== ERANGE
) {
346 msgq(sp
, M_ERR
, "Count overflow.");
347 else if (lno
== LONG_MIN
)
348 msgq(sp
, M_ERR
, "Count underflow.");
350 msgq(sp
, M_SYSERR
, NULL
);
354 * In historic vi, the count was inclusive from the
357 cmdp
->addr1
.lno
= cmdp
->addr2
.lno
;
358 cmdp
->addr2
.lno
+= lno
- 1;
376 if (LF_ISSET(SUB_FIRST
)) {
378 "Regular expression specified; r flag meaningless.");
381 if (!F_ISSET(sp
, S_SUBRE_SET
)) {
383 "No previous regular expression.");
392 if (*s
!= '\0' || !rflag
&& LF_ISSET(SUB_MUSTSETR
)) {
393 usage
: msgq(sp
, M_ERR
, "Usage: %s", cmdp
->cmd
->usage
);
397 if (IN_VI_MODE(sp
) && cflag
&& (lflag
|| nflag
|| pflag
)) {
399 "The #, l and p flags may not be combined with the c flag in vi mode.");
403 /* Get some space. */
404 GET_SPACE(sp
, bp
, blen
, 512);
407 * lb: build buffer pointer.
408 * lbclen: current length of built buffer.
409 * lblen; length of build buffer.
415 * Since multiple changes can happen in a line, we only increment
416 * the change count on the first change to a line.
420 /* For each line... */
421 for (quit
= 0, lno
= cmdp
->addr1
.lno
,
422 elno
= cmdp
->addr2
.lno
; !quit
&& lno
<= elno
; ++lno
) {
425 if ((s
= file_gline(sp
, ep
, lno
, &len
)) == NULL
) {
426 GETLINE_ERR(sp
, lno
);
431 * Make a local copy if doing confirmation -- when calling
432 * the confirm routine we're likely to lose our cached copy.
435 ADD_SPACE(sp
, bp
, blen
, len
)
440 /* Reset the buffer pointer. */
444 * We don't want to have to do a setline if the line didn't
445 * change -- keep track of whether or not this line changed.
449 /* New line, do EOL match. */
452 /* It's not nul terminated, but we pretend it is. */
453 eflags
= REG_STARTEND
;
455 /* The search area is from 's' to the end of the line. */
456 nextmatch
: sp
->match
[0].rm_so
= 0;
457 sp
->match
[0].rm_eo
= len
;
459 /* Get the next match. */
460 skipmatch
: eval
= regexec(re
,
461 (char *)s
, re
->re_nsub
+ 1, sp
->match
, eflags
);
464 * There wasn't a match -- if there was an error, deal with
465 * it. If there was a previous match in this line, resolve
466 * the changes into the database. Otherwise, just move on.
468 if (eval
== REG_NOMATCH
) {
474 re_error(sp
, eval
, re
);
478 /* Confirm change. */
481 * Set the cursor position for confirmation. Note,
482 * if we matched on a '$', the cursor may be past
486 * May want to "fix" this in the confirm routine;
487 * the confirm routine may be able to display a
491 from
.cno
= sp
->match
[0].rm_so
;
493 to
.cno
= sp
->match
[0].rm_eo
;
501 switch (sp
->s_confirm(sp
, ep
, &from
, &to
)) {
506 * Copy the bytes before the match and the
507 * bytes in the match into the build buffer.
509 BUILD(sp
, s
, sp
->match
[0].rm_eo
);
512 /* Set the quit flag. */
515 /* If interruptible, pass the info back. */
516 if (F_ISSET(sp
, S_INTERRUPTIBLE
))
517 F_SET(sp
, S_INTERRUPTED
);
520 * If any changes, resolve them, otherwise
521 * return to the main loop.
529 /* Copy the bytes before the match into the build buffer. */
530 BUILD(sp
, s
, sp
->match
[0].rm_so
);
533 * Update the cursor to the start of the change.
536 * Historic vi just put the cursor on the first non-blank
537 * of the last line changed. This might be better.
540 sp
->cno
= sp
->match
[0].rm_so
;
542 /* Substitute the matching bytes. */
543 if (regsub(sp
, s
, &lb
, &lbclen
, &lblen
))
546 /* Set the change flag so we know this line was modified. */
550 * Move the pointers past the matched bytes. One very special
551 * case is that it's possible to match strings of 0 length.
552 * A common example is trying to use " *" to replace groups of
553 * spaces with a single space. Guarantee that we move forward,
554 * but not if we're matching the 0 length string after the last
555 * character in the line.
557 skip
: s
+= sp
->match
[0].rm_eo
;
558 len
-= sp
->match
[0].rm_eo
;
559 if (len
&& sp
->match
[0].rm_so
== sp
->match
[0].rm_eo
) {
565 /* Only the first search matches anchored expression. */
566 eflags
|= REG_NOTBOL
;
569 * If doing a global change with confirmation, we have to
570 * update the screen. The basic idea is to store the line
571 * so the screen update routines can find it, but start at
574 if (linechanged
&& cflag
&& gflag
) {
578 /* Copy the suffix. */
582 /* Store inserted lines, adjusting the build buffer. */
585 for (cnt
= 0; cnt
< sp
->newl_cnt
;
586 ++cnt
, ++lno
, ++elno
, ++lastline
) {
587 if (file_iline(sp
, ep
, lno
,
588 lb
+ last
, sp
->newl
[cnt
] - last
))
590 last
= sp
->newl
[cnt
] + 1;
591 ++sp
->rptlines
[L_ADDED
];
599 /* Store and retrieve the line. */
600 if (file_sline(sp
, ep
, lno
, lb
+ last
, lbclen
))
602 if ((s
= file_gline(sp
, ep
, lno
, &len
)) == NULL
) {
603 GETLINE_ERR(sp
, lno
);
606 ADD_SPACE(sp
, bp
, blen
, len
)
610 /* Restart the build. */
613 /* Update changed line counter. */
614 if (lastline
!= lno
) {
615 ++sp
->rptlines
[L_CHANGED
];
619 /* Start in the middle of the line. */
620 sp
->match
[0].rm_so
= offset
;
621 sp
->match
[0].rm_eo
= len
;
624 * If it's global, continue.
626 * NB: It's possible to match 0-length strings, and we
627 * behave as if such a string matches the space before
628 * the first character in the string and after the last
629 * character in the string. (This is how the historic
630 * vi did it.) So, do one more check after reaching
631 * the end of the string. Set REG_NOTEOL so the '$'
632 * pattern only matches once. One possible bug is that
633 * the '$' pattern will match BEFORE the empty match
634 * after the last character matches. (The '^' matching
635 * doesn't share the problem because the first match
636 * will force you past the matching location.) I don't
637 * see any reasonable way to fix this now.
642 eflags
|= REG_NOTEOL
;
648 /* If it's global ... (see comment immediately above). */
649 if (gflag
&& do_eol_match
) {
652 eflags
|= REG_NOTEOL
;
658 /* Copy any remaining bytes into the build buffer. */
662 /* Store inserted lines, adjusting the build buffer. */
665 for (cnt
= 0; cnt
< sp
->newl_cnt
;
666 ++cnt
, ++lno
, ++elno
, ++lastline
) {
667 if (file_iline(sp
, ep
,
668 lno
, lb
+ last
, sp
->newl
[cnt
] - last
))
670 last
= sp
->newl
[cnt
] + 1;
671 ++sp
->rptlines
[L_ADDED
];
679 /* Store the changed line. */
681 if (file_sline(sp
, ep
, lno
, lb
+ last
, lbclen
))
684 /* Update changed line counter. */
685 if (lastline
!= lno
) {
686 ++sp
->rptlines
[L_CHANGED
];
690 /* Display as necessary. */
691 if (lflag
|| nflag
|| pflag
) {
692 from
.lno
= to
.lno
= lno
;
693 from
.cno
= to
.cno
= 0;
695 ex_print(sp
, ep
, &from
, &to
, E_F_LIST
);
697 ex_print(sp
, ep
, &from
, &to
, E_F_HASH
);
699 ex_print(sp
, ep
, &from
, &to
, E_F_PRINT
);
704 * Cursor moves to last line changed, unless doing confirm,
705 * in which case don't move it.
707 if (!cflag
&& lastline
!= OOBLNO
)
711 * Note if nothing found. Else, if nothing displayed to the
712 * screen, put something up.
714 if (sp
->rptlines
[L_CHANGED
] == 0 && !F_ISSET(sp
, S_GLOBAL
))
715 msgq(sp
, M_INFO
, "No match found.");
716 else if (!lflag
&& !nflag
&& !pflag
)
717 F_SET(sp
, S_AUTOPRINT
);
719 FREE_SPACE(sp
, bp
, blen
);
722 ret1
: FREE_SPACE(sp
, bp
, blen
);
728 * Do the substitution for a regular expression.
731 regsub(sp
, ip
, lbp
, lbclenp
, lblenp
)
733 char *ip
; /* Input line. */
735 size_t *lbclenp
, *lblenp
;
737 enum { C_NOTSET
, C_LOWER
, C_ONELOWER
, C_ONEUPPER
, C_UPPER
} conv
;
738 size_t lbclen
, lblen
; /* Local copies. */
739 size_t mlen
; /* Match length. */
740 size_t rpl
; /* Remaining replacement length. */
741 char *rp
; /* Replacement pointer. */
743 int no
; /* Match replacement offset. */
744 char *p
, *t
; /* Buffer pointers. */
745 char *lb
; /* Local copies. */
747 lb
= *lbp
; /* Get local copies. */
754 * There are some special sequences that vi provides in the
755 * replacement patterns.
756 * & string the RE matched (\& if nomagic set)
757 * \# n-th regular subexpression
758 * \E end \U, \L conversion
759 * \e end \U, \L conversion
760 * \l convert the next character to lower-case
761 * \L convert to lower-case, until \E, \e, or end of replacement
762 * \u convert the next character to upper-case
763 * \U convert to upper-case, until \E, \e, or end of replacement
765 * Otherwise, since this is the lowest level of replacement, discard
766 * all escape characters. This (hopefully) follows historic practice.
768 #define ADDCH(ch) { \
770 __value = term_key_val(sp, ch); \
771 if (__value == K_CR || __value == K_NL) { \
773 sp->newl[sp->newl_cnt++] = lbclen; \
774 } else if (conv != C_NOTSET) { \
799 for (rp
= sp
->repl
, rpl
= sp
->repl_len
, p
= lb
+ lbclen
; rpl
--;) {
800 switch (ch
= *rp
++) {
802 if (O_ISSET(sp
, O_MAGIC
)) {
813 if (!O_ISSET(sp
, O_MAGIC
)) {
819 case '0': case '1': case '2': case '3': case '4':
820 case '5': case '6': case '7': case '8': case '9':
822 subzero
: if (sp
->match
[no
].rm_so
== -1 ||
823 sp
->match
[no
].rm_eo
== -1)
826 sp
->match
[no
].rm_eo
- sp
->match
[no
].rm_so
;
827 for (t
= ip
+ sp
->match
[no
].rm_so
; mlen
--; ++t
)
859 *lbp
= lb
; /* Update caller's information. */
866 checkmatchsize(sp
, re
)
870 /* Build nsub array as necessary. */
871 if (sp
->matchsize
< re
->re_nsub
+ 1) {
872 sp
->matchsize
= re
->re_nsub
+ 1;
873 if ((sp
->match
= realloc(sp
->match
,
874 sp
->matchsize
* sizeof(regmatch_t
))) == NULL
) {
875 msgq(sp
, M_SYSERR
, NULL
);