MFC:
[dragonfly.git] / contrib / nvi / ex / ex_subst.c
blob0ebb81dd58e7c66231ac8bb7d675fc9e6bb0c80f
1 /*-
2 * Copyright (c) 1992, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
8 */
10 #include "config.h"
12 #ifndef lint
13 static const char sccsid[] = "@(#)ex_subst.c 10.37 (Berkeley) 9/15/96";
14 #endif /* not lint */
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
29 #include "../common/common.h"
30 #include "../vi/vi.h"
32 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
33 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
35 static int re_conv __P((SCR *, char **, size_t *, int *));
36 static int re_cscope_conv __P((SCR *, char **, size_t *, int *));
37 static int re_sub __P((SCR *,
38 char *, char **, size_t *, size_t *, regmatch_t [10]));
39 static int re_tag_conv __P((SCR *, char **, size_t *, int *));
40 static int s __P((SCR *, EXCMD *, char *, regex_t *, u_int));
43 * ex_s --
44 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
46 * Substitute on lines matching a pattern.
48 * PUBLIC: int ex_s __P((SCR *, EXCMD *));
50 int
51 ex_s(sp, cmdp)
52 SCR *sp;
53 EXCMD *cmdp;
55 regex_t *re;
56 size_t blen, len;
57 u_int flags;
58 int delim;
59 char *bp, *ptrn, *rep, *p, *t;
62 * Skip leading white space.
64 * !!!
65 * Historic vi allowed any non-alphanumeric to serve as the
66 * substitution command delimiter.
68 * !!!
69 * If the arguments are empty, it's the same as &, i.e. we
70 * repeat the last substitution.
72 if (cmdp->argc == 0)
73 goto subagain;
74 for (p = cmdp->argv[0]->bp,
75 len = cmdp->argv[0]->len; len > 0; --len, ++p) {
76 if (!isblank(*p))
77 break;
79 if (len == 0)
80 subagain: return (ex_subagain(sp, cmdp));
82 delim = *p++;
83 if (isalnum(delim) || delim == '\\')
84 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
87 * !!!
88 * The full-blown substitute command reset the remembered
89 * state of the 'c' and 'g' suffices.
91 sp->c_suffix = sp->g_suffix = 0;
94 * Get the pattern string, toss escaping characters.
96 * !!!
97 * Historic vi accepted any of the following forms:
99 * :s/abc/def/ change "abc" to "def"
100 * :s/abc/def change "abc" to "def"
101 * :s/abc/ delete "abc"
102 * :s/abc delete "abc"
104 * QUOTING NOTE:
106 * Only toss an escaping character if it escapes a delimiter.
107 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
108 * would be nice to be more regular, i.e. for each layer of
109 * escaping a single escaping character is removed, but that's
110 * not how the historic vi worked.
112 for (ptrn = t = p;;) {
113 if (p[0] == '\0' || p[0] == delim) {
114 if (p[0] == delim)
115 ++p;
117 * !!!
118 * Nul terminate the pattern string -- it's passed
119 * to regcomp which doesn't understand anything else.
121 *t = '\0';
122 break;
124 if (p[0] == '\\')
125 if (p[1] == delim)
126 ++p;
127 else if (p[1] == '\\')
128 *t++ = *p++;
129 *t++ = *p++;
133 * If the pattern string is empty, use the last RE (not just the
134 * last substitution RE).
136 if (*ptrn == '\0') {
137 if (sp->re == NULL) {
138 ex_emsg(sp, NULL, EXM_NOPREVRE);
139 return (1);
142 /* Re-compile the RE if necessary. */
143 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
144 sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
145 return (1);
146 flags = 0;
147 } else {
149 * !!!
150 * Compile the RE. Historic practice is that substitutes set
151 * the search direction as well as both substitute and search
152 * RE's. We compile the RE twice, as we don't want to bother
153 * ref counting the pattern string and (opaque) structure.
155 if (re_compile(sp, ptrn, t - ptrn,
156 &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
157 return (1);
158 if (re_compile(sp, ptrn, t - ptrn,
159 &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
160 return (1);
162 flags = SUB_FIRST;
163 sp->searchdir = FORWARD;
165 re = &sp->re_c;
168 * Get the replacement string.
170 * The special character & (\& if O_MAGIC not set) matches the
171 * entire RE. No handling of & is required here, it's done by
172 * re_sub().
174 * The special character ~ (\~ if O_MAGIC not set) inserts the
175 * previous replacement string into this replacement string.
176 * Count ~'s to figure out how much space we need. We could
177 * special case nonexistent last patterns or whether or not
178 * O_MAGIC is set, but it's probably not worth the effort.
180 * QUOTING NOTE:
182 * Only toss an escaping character if it escapes a delimiter or
183 * if O_MAGIC is set and it escapes a tilde.
185 * !!!
186 * If the entire replacement pattern is "%", then use the last
187 * replacement pattern. This semantic was added to vi in System
188 * V and then percolated elsewhere, presumably around the time
189 * that it was added to their version of ed(1).
191 if (p[0] == '\0' || p[0] == delim) {
192 if (p[0] == delim)
193 ++p;
194 if (sp->repl != NULL)
195 free(sp->repl);
196 sp->repl = NULL;
197 sp->repl_len = 0;
198 } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
199 p += p[1] == delim ? 2 : 1;
200 else {
201 for (rep = p, len = 0;
202 p[0] != '\0' && p[0] != delim; ++p, ++len)
203 if (p[0] == '~')
204 len += sp->repl_len;
205 GET_SPACE_RET(sp, bp, blen, len);
206 for (t = bp, len = 0, p = rep;;) {
207 if (p[0] == '\0' || p[0] == delim) {
208 if (p[0] == delim)
209 ++p;
210 break;
212 if (p[0] == '\\') {
213 if (p[1] == delim)
214 ++p;
215 else if (p[1] == '\\') {
216 *t++ = *p++;
217 ++len;
218 } else if (p[1] == '~') {
219 ++p;
220 if (!O_ISSET(sp, O_MAGIC))
221 goto tilde;
223 } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
224 tilde: ++p;
225 memcpy(t, sp->repl, sp->repl_len);
226 t += sp->repl_len;
227 len += sp->repl_len;
228 continue;
230 *t++ = *p++;
231 ++len;
233 if ((sp->repl_len = len) != 0) {
234 if (sp->repl != NULL)
235 free(sp->repl);
236 if ((sp->repl = malloc(len)) == NULL) {
237 msgq(sp, M_SYSERR, NULL);
238 FREE_SPACE(sp, bp, blen);
239 return (1);
241 memcpy(sp->repl, bp, len);
243 FREE_SPACE(sp, bp, blen);
245 return (s(sp, cmdp, p, re, flags));
249 * ex_subagain --
250 * [line [,line]] & [cgr] [count] [#lp]]
252 * Substitute using the last substitute RE and replacement pattern.
254 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
257 ex_subagain(sp, cmdp)
258 SCR *sp;
259 EXCMD *cmdp;
261 if (sp->subre == NULL) {
262 ex_emsg(sp, NULL, EXM_NOPREVRE);
263 return (1);
265 if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
266 sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
267 return (1);
268 return (s(sp,
269 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
273 * ex_subtilde --
274 * [line [,line]] ~ [cgr] [count] [#lp]]
276 * Substitute using the last RE and last substitute replacement pattern.
278 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
281 ex_subtilde(sp, cmdp)
282 SCR *sp;
283 EXCMD *cmdp;
285 if (sp->re == NULL) {
286 ex_emsg(sp, NULL, EXM_NOPREVRE);
287 return (1);
289 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
290 sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
291 return (1);
292 return (s(sp,
293 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
297 * s --
298 * Do the substitution. This stuff is *really* tricky. There are lots of
299 * special cases, and general nastiness. Don't mess with it unless you're
300 * pretty confident.
302 * The nasty part of the substitution is what happens when the replacement
303 * string contains newlines. It's a bit tricky -- consider the information
304 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
305 * to build a set of newline offsets which we use to break the line up later,
306 * when the replacement is done. Don't change it unless you're *damned*
307 * confident.
309 #define NEEDNEWLINE(sp) { \
310 if (sp->newl_len == sp->newl_cnt) { \
311 sp->newl_len += 25; \
312 REALLOC(sp, sp->newl, size_t *, \
313 sp->newl_len * sizeof(size_t)); \
314 if (sp->newl == NULL) { \
315 sp->newl_len = 0; \
316 return (1); \
321 #define BUILD(sp, l, len) { \
322 if (lbclen + (len) > lblen) { \
323 lblen += MAX(lbclen + (len), 256); \
324 REALLOC(sp, lb, char *, lblen); \
325 if (lb == NULL) { \
326 lbclen = 0; \
327 return (1); \
330 memcpy(lb + lbclen, l, len); \
331 lbclen += len; \
334 #define NEEDSP(sp, len, pnt) { \
335 if (lbclen + (len) > lblen) { \
336 lblen += MAX(lbclen + (len), 256); \
337 REALLOC(sp, lb, char *, lblen); \
338 if (lb == NULL) { \
339 lbclen = 0; \
340 return (1); \
342 pnt = lb + lbclen; \
346 static int
347 s(sp, cmdp, s, re, flags)
348 SCR *sp;
349 EXCMD *cmdp;
350 char *s;
351 regex_t *re;
352 u_int flags;
354 EVENT ev;
355 MARK from, to;
356 TEXTH tiq;
357 recno_t elno, lno, slno;
358 regmatch_t match[10];
359 size_t blen, cnt, last, lbclen, lblen, len, llen;
360 size_t offset, saved_offset, scno;
361 int cflag, lflag, nflag, pflag, rflag;
362 int didsub, do_eol_match, eflags, empty_ok, eval;
363 int linechanged, matched, quit, rval;
364 char *bp, *lb;
366 NEEDFILE(sp, cmdp);
368 slno = sp->lno;
369 scno = sp->cno;
372 * !!!
373 * Historically, the 'g' and 'c' suffices were always toggled as flags,
374 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
375 * not set, they were initialized to 0 for all substitute commands. If
376 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
377 * specified substitute/replacement patterns (see ex_s()).
379 if (!O_ISSET(sp, O_EDCOMPATIBLE))
380 sp->c_suffix = sp->g_suffix = 0;
383 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
384 * it only displayed the last change. I'd disallow them, but they are
385 * useful in combination with the [v]global commands. In the current
386 * model the problem is combining them with the 'c' flag -- the screen
387 * would have to flip back and forth between the confirm screen and the
388 * ex print screen, which would be pretty awful. We do display all
389 * changes, though, for what that's worth.
391 * !!!
392 * Historic vi was fairly strict about the order of "options", the
393 * count, and "flags". I'm somewhat fuzzy on the difference between
394 * options and flags, anyway, so this is a simpler approach, and we
395 * just take it them in whatever order the user gives them. (The ex
396 * usage statement doesn't reflect this.)
398 cflag = lflag = nflag = pflag = rflag = 0;
399 if (s == NULL)
400 goto noargs;
401 for (lno = OOBLNO; *s != '\0'; ++s)
402 switch (*s) {
403 case ' ':
404 case '\t':
405 continue;
406 case '+':
407 ++cmdp->flagoff;
408 break;
409 case '-':
410 --cmdp->flagoff;
411 break;
412 case '0': case '1': case '2': case '3': case '4':
413 case '5': case '6': case '7': case '8': case '9':
414 if (lno != OOBLNO)
415 goto usage;
416 errno = 0;
417 lno = strtoul(s, &s, 10);
418 if (*s == '\0') /* Loop increment correction. */
419 --s;
420 if (errno == ERANGE) {
421 if (lno == LONG_MAX)
422 msgq(sp, M_ERR, "153|Count overflow");
423 else if (lno == LONG_MIN)
424 msgq(sp, M_ERR, "154|Count underflow");
425 else
426 msgq(sp, M_SYSERR, NULL);
427 return (1);
430 * In historic vi, the count was inclusive from the
431 * second address.
433 cmdp->addr1.lno = cmdp->addr2.lno;
434 cmdp->addr2.lno += lno - 1;
435 if (!db_exist(sp, cmdp->addr2.lno) &&
436 db_last(sp, &cmdp->addr2.lno))
437 return (1);
438 break;
439 case '#':
440 nflag = 1;
441 break;
442 case 'c':
443 sp->c_suffix = !sp->c_suffix;
445 /* Ex text structure initialization. */
446 if (F_ISSET(sp, SC_EX)) {
447 memset(&tiq, 0, sizeof(TEXTH));
448 CIRCLEQ_INIT(&tiq);
450 break;
451 case 'g':
452 sp->g_suffix = !sp->g_suffix;
453 break;
454 case 'l':
455 lflag = 1;
456 break;
457 case 'p':
458 pflag = 1;
459 break;
460 case 'r':
461 if (LF_ISSET(SUB_FIRST)) {
462 msgq(sp, M_ERR,
463 "155|Regular expression specified; r flag meaningless");
464 return (1);
466 if (!F_ISSET(sp, SC_RE_SEARCH)) {
467 ex_emsg(sp, NULL, EXM_NOPREVRE);
468 return (1);
470 rflag = 1;
471 re = &sp->re_c;
472 break;
473 default:
474 goto usage;
477 if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
478 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
479 return (1);
482 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
483 msgq(sp, M_ERR,
484 "156|The #, l and p flags may not be combined with the c flag in vi mode");
485 return (1);
489 * bp: if interactive, line cache
490 * blen: if interactive, line cache length
491 * lb: build buffer pointer.
492 * lbclen: current length of built buffer.
493 * lblen; length of build buffer.
495 bp = lb = NULL;
496 blen = lbclen = lblen = 0;
498 /* For each line... */
499 for (matched = quit = 0, lno = cmdp->addr1.lno,
500 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
502 /* Someone's unhappy, time to stop. */
503 if (INTERRUPTED(sp))
504 break;
506 /* Get the line. */
507 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
508 goto err;
511 * Make a local copy if doing confirmation -- when calling
512 * the confirm routine we're likely to lose the cached copy.
514 if (sp->c_suffix) {
515 if (bp == NULL) {
516 GET_SPACE_RET(sp, bp, blen, llen);
517 } else
518 ADD_SPACE_RET(sp, bp, blen, llen);
519 memcpy(bp, s, llen);
520 s = bp;
523 /* Start searching from the beginning. */
524 offset = 0;
525 len = llen;
527 /* Reset the build buffer offset. */
528 lbclen = 0;
530 /* Reset empty match flag. */
531 empty_ok = 1;
534 * We don't want to have to do a setline if the line didn't
535 * change -- keep track of whether or not this line changed.
536 * If doing confirmations, don't want to keep setting the
537 * line if change is refused -- keep track of substitutions.
539 didsub = linechanged = 0;
541 /* New line, do an EOL match. */
542 do_eol_match = 1;
544 /* It's not nul terminated, but we pretend it is. */
545 eflags = REG_STARTEND;
548 * The search area is from s + offset to the EOL.
550 * Generally, match[0].rm_so is the offset of the start
551 * of the match from the start of the search, and offset
552 * is the offset of the start of the last search.
554 nextmatch: match[0].rm_so = 0;
555 match[0].rm_eo = len;
557 /* Get the next match. */
558 eval = regexec(re, (char *)s + offset, 10, match, eflags);
561 * There wasn't a match or if there was an error, deal with
562 * it. If there was a previous match in this line, resolve
563 * the changes into the database. Otherwise, just move on.
565 if (eval == REG_NOMATCH)
566 goto endmatch;
567 if (eval != 0) {
568 re_error(sp, eval, re);
569 goto err;
571 matched = 1;
573 /* Only the first search can match an anchored expression. */
574 eflags |= REG_NOTBOL;
577 * !!!
578 * It's possible to match 0-length strings -- for example, the
579 * command s;a*;X;, when matched against the string "aabb" will
580 * result in "XbXbX", i.e. the matches are "aa", the space
581 * between the b's and the space between the b's and the end of
582 * the string. There is a similar space between the beginning
583 * of the string and the a's. The rule that we use (because vi
584 * historically used it) is that any 0-length match, occurring
585 * immediately after a match, is ignored. Otherwise, the above
586 * example would have resulted in "XXbXbX". Another example is
587 * incorrectly using " *" to replace groups of spaces with one
588 * space.
590 * The way we do this is that if we just had a successful match,
591 * the starting offset does not skip characters, and the match
592 * is empty, ignore the match and move forward. If there's no
593 * more characters in the string, we were attempting to match
594 * after the last character, so quit.
596 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
597 empty_ok = 1;
598 if (len == 0)
599 goto endmatch;
600 BUILD(sp, s + offset, 1)
601 ++offset;
602 --len;
603 goto nextmatch;
606 /* Confirm change. */
607 if (sp->c_suffix) {
609 * Set the cursor position for confirmation. Note,
610 * if we matched on a '$', the cursor may be past
611 * the end of line.
613 from.lno = to.lno = lno;
614 from.cno = match[0].rm_so + offset;
615 to.cno = match[0].rm_eo + offset;
617 * Both ex and vi have to correct for a change before
618 * the first character in the line.
620 if (llen == 0)
621 from.cno = to.cno = 0;
622 if (F_ISSET(sp, SC_VI)) {
624 * Only vi has to correct for a change after
625 * the last character in the line.
627 * XXX
628 * It would be nice to change the vi code so
629 * that we could display a cursor past EOL.
631 if (to.cno >= llen)
632 to.cno = llen - 1;
633 if (from.cno >= llen)
634 from.cno = llen - 1;
636 sp->lno = from.lno;
637 sp->cno = from.cno;
638 if (vs_refresh(sp, 1))
639 goto err;
641 vs_update(sp, msg_cat(sp,
642 "169|Confirm change? [n]", NULL), NULL);
644 if (v_event_get(sp, &ev, 0, 0))
645 goto err;
646 switch (ev.e_event) {
647 case E_CHARACTER:
648 break;
649 case E_EOF:
650 case E_ERR:
651 case E_INTERRUPT:
652 goto lquit;
653 default:
654 v_event_err(sp, &ev);
655 goto lquit;
657 } else {
658 if (ex_print(sp, cmdp, &from, &to, 0) ||
659 ex_scprint(sp, &from, &to))
660 goto lquit;
661 if (ex_txt(sp, &tiq, 0, TXT_CR))
662 goto err;
663 ev.e_c = tiq.cqh_first->lb[0];
666 switch (ev.e_c) {
667 case CH_YES:
668 break;
669 default:
670 case CH_NO:
671 didsub = 0;
672 BUILD(sp, s +offset, match[0].rm_eo);
673 goto skip;
674 case CH_QUIT:
675 /* Set the quit/interrupted flags. */
676 lquit: quit = 1;
677 F_SET(sp->gp, G_INTERRUPTED);
680 * Resolve any changes, then return to (and
681 * exit from) the main loop.
683 goto endmatch;
688 * Set the cursor to the last position changed, converting
689 * from 1-based to 0-based.
691 sp->lno = lno;
692 sp->cno = match[0].rm_so;
694 /* Copy the bytes before the match into the build buffer. */
695 BUILD(sp, s + offset, match[0].rm_so);
697 /* Substitute the matching bytes. */
698 didsub = 1;
699 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
700 goto err;
702 /* Set the change flag so we know this line was modified. */
703 linechanged = 1;
705 /* Move past the matched bytes. */
706 skip: offset += match[0].rm_eo;
707 len -= match[0].rm_eo;
709 /* A match cannot be followed by an empty pattern. */
710 empty_ok = 0;
713 * If doing a global change with confirmation, we have to
714 * update the screen. The basic idea is to store the line
715 * so the screen update routines can find it, and restart.
717 if (didsub && sp->c_suffix && sp->g_suffix) {
719 * The new search offset will be the end of the
720 * modified line.
722 saved_offset = lbclen;
724 /* Copy the rest of the line. */
725 if (len)
726 BUILD(sp, s + offset, len)
728 /* Set the new offset. */
729 offset = saved_offset;
731 /* Store inserted lines, adjusting the build buffer. */
732 last = 0;
733 if (sp->newl_cnt) {
734 for (cnt = 0;
735 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
736 if (db_insert(sp, lno,
737 lb + last, sp->newl[cnt] - last))
738 goto err;
739 last = sp->newl[cnt] + 1;
740 ++sp->rptlines[L_ADDED];
742 lbclen -= last;
743 offset -= last;
744 sp->newl_cnt = 0;
747 /* Store and retrieve the line. */
748 if (db_set(sp, lno, lb + last, lbclen))
749 goto err;
750 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
751 goto err;
752 ADD_SPACE_RET(sp, bp, blen, llen)
753 memcpy(bp, s, llen);
754 s = bp;
755 len = llen - offset;
757 /* Restart the build. */
758 lbclen = 0;
759 BUILD(sp, s, offset);
762 * If we haven't already done the after-the-string
763 * match, do one. Set REG_NOTEOL so the '$' pattern
764 * only matches once.
766 if (!do_eol_match)
767 goto endmatch;
768 if (offset == len) {
769 do_eol_match = 0;
770 eflags |= REG_NOTEOL;
772 goto nextmatch;
776 * If it's a global:
778 * If at the end of the string, do a test for the after
779 * the string match. Set REG_NOTEOL so the '$' pattern
780 * only matches once.
782 if (sp->g_suffix && do_eol_match) {
783 if (len == 0) {
784 do_eol_match = 0;
785 eflags |= REG_NOTEOL;
787 goto nextmatch;
790 endmatch: if (!linechanged)
791 continue;
793 /* Copy any remaining bytes into the build buffer. */
794 if (len)
795 BUILD(sp, s + offset, len)
797 /* Store inserted lines, adjusting the build buffer. */
798 last = 0;
799 if (sp->newl_cnt) {
800 for (cnt = 0;
801 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
802 if (db_insert(sp,
803 lno, lb + last, sp->newl[cnt] - last))
804 goto err;
805 last = sp->newl[cnt] + 1;
806 ++sp->rptlines[L_ADDED];
808 lbclen -= last;
809 sp->newl_cnt = 0;
812 /* Store the changed line. */
813 if (db_set(sp, lno, lb + last, lbclen))
814 goto err;
816 /* Update changed line counter. */
817 if (sp->rptlchange != lno) {
818 sp->rptlchange = lno;
819 ++sp->rptlines[L_CHANGED];
823 * !!!
824 * Display as necessary. Historic practice is to only
825 * display the last line of a line split into multiple
826 * lines.
828 if (lflag || nflag || pflag) {
829 from.lno = to.lno = lno;
830 from.cno = to.cno = 0;
831 if (lflag)
832 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
833 if (nflag)
834 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
835 if (pflag)
836 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
841 * !!!
842 * Historically, vi attempted to leave the cursor at the same place if
843 * the substitution was done at the current cursor position. Otherwise
844 * it moved it to the first non-blank of the last line changed. There
845 * were some problems: for example, :s/$/foo/ with the cursor on the
846 * last character of the line left the cursor on the last character, or
847 * the & command with multiple occurrences of the matching string in the
848 * line usually left the cursor in a fairly random position.
850 * We try to do the same thing, with the exception that if the user is
851 * doing substitution with confirmation, we move to the last line about
852 * which the user was consulted, as opposed to the last line that they
853 * actually changed. This prevents a screen flash if the user doesn't
854 * change many of the possible lines.
856 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
857 sp->cno = 0;
858 (void)nonblank(sp, sp->lno, &sp->cno);
862 * If not in a global command, and nothing matched, say so.
863 * Else, if none of the lines displayed, put something up.
865 rval = 0;
866 if (!matched) {
867 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
868 msgq(sp, M_ERR, "157|No match found");
869 goto err;
871 } else if (!lflag && !nflag && !pflag)
872 F_SET(cmdp, E_AUTOPRINT);
874 if (0) {
875 err: rval = 1;
878 if (bp != NULL)
879 FREE_SPACE(sp, bp, blen);
880 if (lb != NULL)
881 free(lb);
882 return (rval);
886 * re_compile --
887 * Compile the RE.
889 * PUBLIC: int re_compile __P((SCR *,
890 * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int));
893 re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags)
894 SCR *sp;
895 char *ptrn, **ptrnp;
896 size_t plen, *lenp;
897 regex_t *rep;
898 u_int flags;
900 size_t len;
901 int reflags, replaced, rval;
902 char *p;
904 /* Set RE flags. */
905 reflags = 0;
906 if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
907 if (O_ISSET(sp, O_EXTENDED))
908 reflags |= REG_EXTENDED;
909 if (O_ISSET(sp, O_IGNORECASE))
910 reflags |= REG_ICASE;
911 if (O_ISSET(sp, O_ICLOWER)) {
912 for (p = ptrn, len = plen; len > 0; ++p, --len)
913 if (isupper(*p))
914 break;
915 if (len == 0)
916 reflags |= REG_ICASE;
920 /* If we're replacing a saved value, clear the old one. */
921 if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
922 regfree(&sp->re_c);
923 F_CLR(sp, SC_RE_SEARCH);
925 if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
926 regfree(&sp->subre_c);
927 F_CLR(sp, SC_RE_SUBST);
931 * If we're saving the string, it's a pattern we haven't seen before,
932 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
933 * later recompilation. Free any previously saved value.
935 if (ptrnp != NULL) {
936 if (LF_ISSET(RE_C_CSCOPE)) {
937 if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
938 return (1);
940 * XXX
941 * Currently, the match-any-<blank> expression used in
942 * re_cscope_conv() requires extended RE's. This may
943 * not be right or safe.
945 reflags |= REG_EXTENDED;
946 } else if (LF_ISSET(RE_C_TAG)) {
947 if (re_tag_conv(sp, &ptrn, &plen, &replaced))
948 return (1);
949 } else
950 if (re_conv(sp, &ptrn, &plen, &replaced))
951 return (1);
953 /* Discard previous pattern. */
954 if (*ptrnp != NULL) {
955 free(*ptrnp);
956 *ptrnp = NULL;
958 if (lenp != NULL)
959 *lenp = plen;
962 * Copy the string into allocated memory.
964 * XXX
965 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
966 * for now. There's just no other solution.
968 MALLOC(sp, *ptrnp, char *, plen + 1);
969 if (*ptrnp != NULL) {
970 memcpy(*ptrnp, ptrn, plen);
971 (*ptrnp)[plen] = '\0';
974 /* Free up conversion-routine-allocated memory. */
975 if (replaced)
976 FREE_SPACE(sp, ptrn, 0);
978 if (*ptrnp == NULL)
979 return (1);
981 ptrn = *ptrnp;
985 * XXX
986 * Regcomp isn't 8-bit clean, so we just lost if the pattern
987 * contained a nul. Bummer!
989 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
990 if (!LF_ISSET(RE_C_SILENT))
991 re_error(sp, rval, rep);
992 return (1);
995 if (LF_ISSET(RE_C_SEARCH))
996 F_SET(sp, SC_RE_SEARCH);
997 if (LF_ISSET(RE_C_SUBST))
998 F_SET(sp, SC_RE_SUBST);
1000 return (0);
1004 * re_conv --
1005 * Convert vi's regular expressions into something that the
1006 * the POSIX 1003.2 RE functions can handle.
1008 * There are three conversions we make to make vi's RE's (specifically
1009 * the global, search, and substitute patterns) work with POSIX RE's.
1011 * 1: If O_MAGIC is not set, strip backslashes from the magic character
1012 * set (.[*~) that have them, and add them to the ones that don't.
1013 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1014 * from the last substitute command's replacement string. If O_MAGIC
1015 * is set, it's the string "~".
1016 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1017 * new RE escapes.
1019 * !!!/XXX
1020 * This doesn't exactly match the historic behavior of vi because we do
1021 * the ~ substitution before calling the RE engine, so magic characters
1022 * in the replacement string will be expanded by the RE engine, and they
1023 * weren't historically. It's a bug.
1025 static int
1026 re_conv(sp, ptrnp, plenp, replacedp)
1027 SCR *sp;
1028 char **ptrnp;
1029 size_t *plenp;
1030 int *replacedp;
1032 size_t blen, len, needlen;
1033 int magic;
1034 char *bp, *p, *t;
1037 * First pass through, we figure out how much space we'll need.
1038 * We do it in two passes, on the grounds that most of the time
1039 * the user is doing a search and won't have magic characters.
1040 * That way we can skip most of the memory allocation and copies.
1042 magic = 0;
1043 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1044 switch (*p) {
1045 case '\\':
1046 if (len > 1) {
1047 --len;
1048 switch (*++p) {
1049 case '<':
1050 magic = 1;
1051 needlen += sizeof(RE_WSTART);
1052 break;
1053 case '>':
1054 magic = 1;
1055 needlen += sizeof(RE_WSTOP);
1056 break;
1057 case '~':
1058 if (!O_ISSET(sp, O_MAGIC)) {
1059 magic = 1;
1060 needlen += sp->repl_len;
1062 break;
1063 case '.':
1064 case '[':
1065 case '*':
1066 if (!O_ISSET(sp, O_MAGIC)) {
1067 magic = 1;
1068 needlen += 1;
1070 break;
1071 default:
1072 needlen += 2;
1074 } else
1075 needlen += 1;
1076 break;
1077 case '~':
1078 if (O_ISSET(sp, O_MAGIC)) {
1079 magic = 1;
1080 needlen += sp->repl_len;
1082 break;
1083 case '.':
1084 case '[':
1085 case '*':
1086 if (!O_ISSET(sp, O_MAGIC)) {
1087 magic = 1;
1088 needlen += 2;
1090 break;
1091 default:
1092 needlen += 1;
1093 break;
1096 if (!magic) {
1097 *replacedp = 0;
1098 return (0);
1101 /* Get enough memory to hold the final pattern. */
1102 *replacedp = 1;
1103 GET_SPACE_RET(sp, bp, blen, needlen);
1105 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1106 switch (*p) {
1107 case '\\':
1108 if (len > 1) {
1109 --len;
1110 switch (*++p) {
1111 case '<':
1112 memcpy(t,
1113 RE_WSTART, sizeof(RE_WSTART) - 1);
1114 t += sizeof(RE_WSTART) - 1;
1115 break;
1116 case '>':
1117 memcpy(t,
1118 RE_WSTOP, sizeof(RE_WSTOP) - 1);
1119 t += sizeof(RE_WSTOP) - 1;
1120 break;
1121 case '~':
1122 if (O_ISSET(sp, O_MAGIC))
1123 *t++ = '~';
1124 else {
1125 memcpy(t,
1126 sp->repl, sp->repl_len);
1127 t += sp->repl_len;
1129 break;
1130 case '.':
1131 case '[':
1132 case '*':
1133 if (O_ISSET(sp, O_MAGIC))
1134 *t++ = '\\';
1135 *t++ = *p;
1136 break;
1137 default:
1138 *t++ = '\\';
1139 *t++ = *p;
1141 } else
1142 *t++ = '\\';
1143 break;
1144 case '~':
1145 if (O_ISSET(sp, O_MAGIC)) {
1146 memcpy(t, sp->repl, sp->repl_len);
1147 t += sp->repl_len;
1148 } else
1149 *t++ = '~';
1150 break;
1151 case '.':
1152 case '[':
1153 case '*':
1154 if (!O_ISSET(sp, O_MAGIC))
1155 *t++ = '\\';
1156 *t++ = *p;
1157 break;
1158 default:
1159 *t++ = *p;
1160 break;
1163 *ptrnp = bp;
1164 *plenp = t - bp;
1165 return (0);
1169 * re_tag_conv --
1170 * Convert a tags search path into something that the POSIX
1171 * 1003.2 RE functions can handle.
1173 static int
1174 re_tag_conv(sp, ptrnp, plenp, replacedp)
1175 SCR *sp;
1176 char **ptrnp;
1177 size_t *plenp;
1178 int *replacedp;
1180 size_t blen, len;
1181 int lastdollar;
1182 char *bp, *p, *t;
1184 len = *plenp;
1186 /* Max memory usage is 2 times the length of the string. */
1187 *replacedp = 1;
1188 GET_SPACE_RET(sp, bp, blen, len * 2);
1190 p = *ptrnp;
1191 t = bp;
1193 /* If the last character is a '/' or '?', we just strip it. */
1194 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1195 --len;
1197 /* If the next-to-last or last character is a '$', it's magic. */
1198 if (len > 0 && p[len - 1] == '$') {
1199 --len;
1200 lastdollar = 1;
1201 } else
1202 lastdollar = 0;
1204 /* If the first character is a '/' or '?', we just strip it. */
1205 if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1206 ++p;
1207 --len;
1210 /* If the first or second character is a '^', it's magic. */
1211 if (p[0] == '^') {
1212 *t++ = *p++;
1213 --len;
1217 * Escape every other magic character we can find, meanwhile stripping
1218 * the backslashes ctags inserts when escaping the search delimiter
1219 * characters.
1221 for (; len > 0; --len) {
1222 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1223 ++p;
1224 --len;
1225 } else if (strchr("^.[]$*", p[0]))
1226 *t++ = '\\';
1227 *t++ = *p++;
1229 if (lastdollar)
1230 *t++ = '$';
1232 *ptrnp = bp;
1233 *plenp = t - bp;
1234 return (0);
1238 * re_cscope_conv --
1239 * Convert a cscope search path into something that the POSIX
1240 * 1003.2 RE functions can handle.
1242 static int
1243 re_cscope_conv(sp, ptrnp, plenp, replacedp)
1244 SCR *sp;
1245 char **ptrnp;
1246 size_t *plenp;
1247 int *replacedp;
1249 size_t blen, len, nspaces;
1250 char *bp, *p, *t;
1253 * Each space in the source line printed by cscope represents an
1254 * arbitrary sequence of spaces, tabs, and comments.
1256 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1257 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1258 if (*p == ' ')
1259 ++nspaces;
1262 * Allocate plenty of space:
1263 * the string, plus potential escaping characters;
1264 * nspaces + 2 copies of CSCOPE_RE_SPACE;
1265 * ^, $, nul terminator characters.
1267 *replacedp = 1;
1268 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1269 GET_SPACE_RET(sp, bp, blen, len);
1271 p = *ptrnp;
1272 t = bp;
1274 *t++ = '^';
1275 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
1276 t += sizeof(CSCOPE_RE_SPACE) - 1;
1278 for (len = *plenp; len > 0; ++p, --len)
1279 if (*p == ' ') {
1280 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
1281 t += sizeof(CSCOPE_RE_SPACE) - 1;
1282 } else {
1283 if (strchr("\\^.[]$*+?()|{}", *p))
1284 *t++ = '\\';
1285 *t++ = *p;
1288 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
1289 t += sizeof(CSCOPE_RE_SPACE) - 1;
1290 *t++ = '$';
1292 *ptrnp = bp;
1293 *plenp = t - bp;
1294 return (0);
1298 * re_error --
1299 * Report a regular expression error.
1301 * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1303 void
1304 re_error(sp, errcode, preg)
1305 SCR *sp;
1306 int errcode;
1307 regex_t *preg;
1309 size_t s;
1310 char *oe;
1312 s = regerror(errcode, preg, "", 0);
1313 if ((oe = malloc(s)) == NULL)
1314 msgq(sp, M_SYSERR, NULL);
1315 else {
1316 (void)regerror(errcode, preg, oe, s);
1317 msgq(sp, M_ERR, "RE error: %s", oe);
1318 free(oe);
1323 * re_sub --
1324 * Do the substitution for a regular expression.
1326 static int
1327 re_sub(sp, ip, lbp, lbclenp, lblenp, match)
1328 SCR *sp;
1329 char *ip; /* Input line. */
1330 char **lbp;
1331 size_t *lbclenp, *lblenp;
1332 regmatch_t match[10];
1334 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1335 size_t lbclen, lblen; /* Local copies. */
1336 size_t mlen; /* Match length. */
1337 size_t rpl; /* Remaining replacement length. */
1338 char *rp; /* Replacement pointer. */
1339 int ch;
1340 int no; /* Match replacement offset. */
1341 char *p, *t; /* Buffer pointers. */
1342 char *lb; /* Local copies. */
1344 lb = *lbp; /* Get local copies. */
1345 lbclen = *lbclenp;
1346 lblen = *lblenp;
1349 * QUOTING NOTE:
1351 * There are some special sequences that vi provides in the
1352 * replacement patterns.
1353 * & string the RE matched (\& if nomagic set)
1354 * \# n-th regular subexpression
1355 * \E end \U, \L conversion
1356 * \e end \U, \L conversion
1357 * \l convert the next character to lower-case
1358 * \L convert to lower-case, until \E, \e, or end of replacement
1359 * \u convert the next character to upper-case
1360 * \U convert to upper-case, until \E, \e, or end of replacement
1362 * Otherwise, since this is the lowest level of replacement, discard
1363 * all escaping characters. This (hopefully) matches historic practice.
1365 #define OUTCH(ch, nltrans) { \
1366 CHAR_T __ch = (ch); \
1367 u_int __value = KEY_VAL(sp, __ch); \
1368 if (nltrans && (__value == K_CR || __value == K_NL)) { \
1369 NEEDNEWLINE(sp); \
1370 sp->newl[sp->newl_cnt++] = lbclen; \
1371 } else if (conv != C_NOTSET) { \
1372 switch (conv) { \
1373 case C_ONELOWER: \
1374 conv = C_NOTSET; \
1375 /* FALLTHROUGH */ \
1376 case C_LOWER: \
1377 if (isupper(__ch)) \
1378 __ch = tolower(__ch); \
1379 break; \
1380 case C_ONEUPPER: \
1381 conv = C_NOTSET; \
1382 /* FALLTHROUGH */ \
1383 case C_UPPER: \
1384 if (islower(__ch)) \
1385 __ch = toupper(__ch); \
1386 break; \
1387 default: \
1388 abort(); \
1391 NEEDSP(sp, 1, p); \
1392 *p++ = __ch; \
1393 ++lbclen; \
1395 conv = C_NOTSET;
1396 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1397 switch (ch = *rp++) {
1398 case '&':
1399 if (O_ISSET(sp, O_MAGIC)) {
1400 no = 0;
1401 goto subzero;
1403 break;
1404 case '\\':
1405 if (rpl == 0)
1406 break;
1407 --rpl;
1408 switch (ch = *rp) {
1409 case '&':
1410 ++rp;
1411 if (!O_ISSET(sp, O_MAGIC)) {
1412 no = 0;
1413 goto subzero;
1415 break;
1416 case '0': case '1': case '2': case '3': case '4':
1417 case '5': case '6': case '7': case '8': case '9':
1418 no = *rp++ - '0';
1419 subzero: if (match[no].rm_so == -1 ||
1420 match[no].rm_eo == -1)
1421 break;
1422 mlen = match[no].rm_eo - match[no].rm_so;
1423 for (t = ip + match[no].rm_so; mlen--; ++t)
1424 OUTCH(*t, 0);
1425 continue;
1426 case 'e':
1427 case 'E':
1428 ++rp;
1429 conv = C_NOTSET;
1430 continue;
1431 case 'l':
1432 ++rp;
1433 conv = C_ONELOWER;
1434 continue;
1435 case 'L':
1436 ++rp;
1437 conv = C_LOWER;
1438 continue;
1439 case 'u':
1440 ++rp;
1441 conv = C_ONEUPPER;
1442 continue;
1443 case 'U':
1444 ++rp;
1445 conv = C_UPPER;
1446 continue;
1447 default:
1448 ++rp;
1449 break;
1452 OUTCH(ch, 1);
1455 *lbp = lb; /* Update caller's information. */
1456 *lbclenp = lbclen;
1457 *lblenp = lblen;
1458 return (0);