common/log.c: minor whitespace change
[nvi.git] / ex / ex_subst.c
blob61711afe310c41572a1baf478e5f76aa28715009
1 /*-
2 * Copyright (c) 1992, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
8 */
10 #include "config.h"
12 #ifndef lint
13 static const char sccsid[] = "$Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp $ (Berkeley) $Date: 2002/02/09 21:18:23 $";
14 #endif /* not lint */
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
29 #include "../common/common.h"
30 #include "../vi/vi.h"
32 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
33 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
35 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
36 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
37 static int re_sub __P((SCR *,
38 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
39 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
40 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
43 * ex_s --
44 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
46 * Substitute on lines matching a pattern.
48 * PUBLIC: int ex_s __P((SCR *, EXCMD *));
50 int
51 ex_s(SCR *sp, EXCMD *cmdp)
53 regex_t *re;
54 size_t blen, len;
55 u_int flags;
56 int delim;
57 CHAR_T *bp, *p, *ptrn, *rep, *t;
60 * Skip leading white space.
62 * !!!
63 * Historic vi allowed any non-alphanumeric to serve as the
64 * substitution command delimiter.
66 * !!!
67 * If the arguments are empty, it's the same as &, i.e. we
68 * repeat the last substitution.
70 if (cmdp->argc == 0)
71 goto subagain;
72 for (p = cmdp->argv[0]->bp,
73 len = cmdp->argv[0]->len; len > 0; --len, ++p) {
74 if (!isblank(*p))
75 break;
77 if (len == 0)
78 subagain: return (ex_subagain(sp, cmdp));
80 delim = *p++;
81 if (isalnum(delim) || delim == '\\')
82 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
85 * !!!
86 * The full-blown substitute command reset the remembered
87 * state of the 'c' and 'g' suffices.
89 sp->c_suffix = sp->g_suffix = 0;
92 * Get the pattern string, toss escaping characters.
94 * !!!
95 * Historic vi accepted any of the following forms:
97 * :s/abc/def/ change "abc" to "def"
98 * :s/abc/def change "abc" to "def"
99 * :s/abc/ delete "abc"
100 * :s/abc delete "abc"
102 * QUOTING NOTE:
104 * Only toss an escaping character if it escapes a delimiter.
105 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
106 * would be nice to be more regular, i.e. for each layer of
107 * escaping a single escaping character is removed, but that's
108 * not how the historic vi worked.
110 for (ptrn = t = p;;) {
111 if (p[0] == '\0' || p[0] == delim) {
112 if (p[0] == delim)
113 ++p;
115 * !!!
116 * Nul terminate the pattern string -- it's passed
117 * to regcomp which doesn't understand anything else.
119 *t = '\0';
120 break;
122 if (p[0] == '\\')
123 if (p[1] == delim)
124 ++p;
125 else if (p[1] == '\\')
126 *t++ = *p++;
127 *t++ = *p++;
131 * If the pattern string is empty, use the last RE (not just the
132 * last substitution RE).
134 if (*ptrn == '\0') {
135 if (sp->re == NULL) {
136 ex_emsg(sp, NULL, EXM_NOPREVRE);
137 return (1);
140 /* Re-compile the RE if necessary. */
141 if (!F_ISSET(sp, SC_RE_SEARCH) &&
142 re_compile(sp, sp->re, sp->re_len,
143 NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
144 return (1);
145 flags = 0;
146 } else {
148 * !!!
149 * Compile the RE. Historic practice is that substitutes set
150 * the search direction as well as both substitute and search
151 * RE's. We compile the RE twice, as we don't want to bother
152 * ref counting the pattern string and (opaque) structure.
154 if (re_compile(sp, ptrn, t - ptrn, &sp->re,
155 &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
156 return (1);
157 if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
158 &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
159 return (1);
161 flags = SUB_FIRST;
162 sp->searchdir = FORWARD;
164 re = &sp->re_c;
167 * Get the replacement string.
169 * The special character & (\& if O_MAGIC not set) matches the
170 * entire RE. No handling of & is required here, it's done by
171 * re_sub().
173 * The special character ~ (\~ if O_MAGIC not set) inserts the
174 * previous replacement string into this replacement string.
175 * Count ~'s to figure out how much space we need. We could
176 * special case nonexistent last patterns or whether or not
177 * O_MAGIC is set, but it's probably not worth the effort.
179 * QUOTING NOTE:
181 * Only toss an escaping character if it escapes a delimiter or
182 * if O_MAGIC is set and it escapes a tilde.
184 * !!!
185 * If the entire replacement pattern is "%", then use the last
186 * replacement pattern. This semantic was added to vi in System
187 * V and then percolated elsewhere, presumably around the time
188 * that it was added to their version of ed(1).
190 if (p[0] == L('\0') || p[0] == delim) {
191 if (p[0] == delim)
192 ++p;
193 if (sp->repl != NULL)
194 free(sp->repl);
195 sp->repl = NULL;
196 sp->repl_len = 0;
197 } else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
198 p += p[1] == delim ? 2 : 1;
199 else {
200 for (rep = p, len = 0;
201 p[0] != L('\0') && p[0] != delim; ++p, ++len)
202 if (p[0] == L('~'))
203 len += sp->repl_len;
204 GET_SPACE_RETW(sp, bp, blen, len);
205 for (t = bp, len = 0, p = rep;;) {
206 if (p[0] == L('\0') || p[0] == delim) {
207 if (p[0] == delim)
208 ++p;
209 break;
211 if (p[0] == L('\\')) {
212 if (p[1] == delim)
213 ++p;
214 else if (p[1] == L('\\')) {
215 *t++ = *p++;
216 ++len;
217 } else if (p[1] == L('~')) {
218 ++p;
219 if (!O_ISSET(sp, O_MAGIC))
220 goto tilde;
222 } else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
223 tilde: ++p;
224 MEMCPYW(t, sp->repl, sp->repl_len);
225 t += sp->repl_len;
226 len += sp->repl_len;
227 continue;
229 *t++ = *p++;
230 ++len;
232 if ((sp->repl_len = len) != 0) {
233 if (sp->repl != NULL)
234 free(sp->repl);
235 if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
236 msgq(sp, M_SYSERR, NULL);
237 FREE_SPACEW(sp, bp, blen);
238 return (1);
240 MEMCPYW(sp->repl, bp, len);
242 FREE_SPACEW(sp, bp, blen);
244 return (s(sp, cmdp, p, re, flags));
248 * ex_subagain --
249 * [line [,line]] & [cgr] [count] [#lp]]
251 * Substitute using the last substitute RE and replacement pattern.
253 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
256 ex_subagain(SCR *sp, EXCMD *cmdp)
258 if (sp->subre == NULL) {
259 ex_emsg(sp, NULL, EXM_NOPREVRE);
260 return (1);
262 if (!F_ISSET(sp, SC_RE_SUBST) &&
263 re_compile(sp, sp->subre, sp->subre_len,
264 NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
265 return (1);
266 return (s(sp,
267 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
271 * ex_subtilde --
272 * [line [,line]] ~ [cgr] [count] [#lp]]
274 * Substitute using the last RE and last substitute replacement pattern.
276 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
279 ex_subtilde(SCR *sp, EXCMD *cmdp)
281 if (sp->re == NULL) {
282 ex_emsg(sp, NULL, EXM_NOPREVRE);
283 return (1);
285 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
286 sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
287 return (1);
288 return (s(sp,
289 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
293 * s --
294 * Do the substitution. This stuff is *really* tricky. There are lots of
295 * special cases, and general nastiness. Don't mess with it unless you're
296 * pretty confident.
298 * The nasty part of the substitution is what happens when the replacement
299 * string contains newlines. It's a bit tricky -- consider the information
300 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
301 * to build a set of newline offsets which we use to break the line up later,
302 * when the replacement is done. Don't change it unless you're *damned*
303 * confident.
305 #define NEEDNEWLINE(sp) { \
306 if (sp->newl_len == sp->newl_cnt) { \
307 sp->newl_len += 25; \
308 REALLOC(sp, sp->newl, size_t *, \
309 sp->newl_len * sizeof(size_t)); \
310 if (sp->newl == NULL) { \
311 sp->newl_len = 0; \
312 return (1); \
317 #define BUILD(sp, l, len) { \
318 if (lbclen + (len) > lblen) { \
319 lblen += MAX(lbclen + (len), 256); \
320 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
321 if (lb == NULL) { \
322 lbclen = 0; \
323 return (1); \
326 MEMCPYW(lb + lbclen, l, len); \
327 lbclen += len; \
330 #define NEEDSP(sp, len, pnt) { \
331 if (lbclen + (len) > lblen) { \
332 lblen += MAX(lbclen + (len), 256); \
333 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
334 if (lb == NULL) { \
335 lbclen = 0; \
336 return (1); \
338 pnt = lb + lbclen; \
342 static int
343 s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags)
345 EVENT ev;
346 MARK from, to;
347 TEXTH tiq;
348 db_recno_t elno, lno, slno;
349 u_long ul;
350 regmatch_t match[10];
351 size_t blen, cnt, last, lbclen, lblen, len, llen;
352 size_t offset, saved_offset, scno;
353 int cflag, lflag, nflag, pflag, rflag;
354 int didsub, do_eol_match, eflags, empty_ok, eval;
355 int linechanged, matched, quit, rval;
356 CHAR_T *p, *lb, *bp;
357 enum nresult nret;
359 NEEDFILE(sp, cmdp);
361 slno = sp->lno;
362 scno = sp->cno;
365 * !!!
366 * Historically, the 'g' and 'c' suffices were always toggled as flags,
367 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
368 * not set, they were initialized to 0 for all substitute commands. If
369 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
370 * specified substitute/replacement patterns (see ex_s()).
372 if (!O_ISSET(sp, O_EDCOMPATIBLE))
373 sp->c_suffix = sp->g_suffix = 0;
376 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
377 * it only displayed the last change. I'd disallow them, but they are
378 * useful in combination with the [v]global commands. In the current
379 * model the problem is combining them with the 'c' flag -- the screen
380 * would have to flip back and forth between the confirm screen and the
381 * ex print screen, which would be pretty awful. We do display all
382 * changes, though, for what that's worth.
384 * !!!
385 * Historic vi was fairly strict about the order of "options", the
386 * count, and "flags". I'm somewhat fuzzy on the difference between
387 * options and flags, anyway, so this is a simpler approach, and we
388 * just take it them in whatever order the user gives them. (The ex
389 * usage statement doesn't reflect this.)
391 cflag = lflag = nflag = pflag = rflag = 0;
392 if (s == NULL)
393 goto noargs;
394 for (lno = OOBLNO; *s != '\0'; ++s)
395 switch (*s) {
396 case ' ':
397 case '\t':
398 continue;
399 case '+':
400 ++cmdp->flagoff;
401 break;
402 case '-':
403 --cmdp->flagoff;
404 break;
405 case '0': case '1': case '2': case '3': case '4':
406 case '5': case '6': case '7': case '8': case '9':
407 if (lno != OOBLNO)
408 goto usage;
409 errno = 0;
410 nret = nget_uslong(sp, &ul, s, &s, 10);
411 lno = ul;
412 if (*s == '\0') /* Loop increment correction. */
413 --s;
414 if (nret != NUM_OK) {
415 if (nret == NUM_OVER)
416 msgq(sp, M_ERR, "153|Count overflow");
417 else if (nret == NUM_UNDER)
418 msgq(sp, M_ERR, "154|Count underflow");
419 else
420 msgq(sp, M_SYSERR, NULL);
421 return (1);
424 * In historic vi, the count was inclusive from the
425 * second address.
427 cmdp->addr1.lno = cmdp->addr2.lno;
428 cmdp->addr2.lno += lno - 1;
429 if (!db_exist(sp, cmdp->addr2.lno) &&
430 db_last(sp, &cmdp->addr2.lno))
431 return (1);
432 break;
433 case '#':
434 nflag = 1;
435 break;
436 case 'c':
437 sp->c_suffix = !sp->c_suffix;
439 /* Ex text structure initialization. */
440 if (F_ISSET(sp, SC_EX)) {
441 memset(&tiq, 0, sizeof(TEXTH));
442 CIRCLEQ_INIT(&tiq);
444 break;
445 case 'g':
446 sp->g_suffix = !sp->g_suffix;
447 break;
448 case 'l':
449 lflag = 1;
450 break;
451 case 'p':
452 pflag = 1;
453 break;
454 case 'r':
455 if (LF_ISSET(SUB_FIRST)) {
456 msgq(sp, M_ERR,
457 "155|Regular expression specified; r flag meaningless");
458 return (1);
460 if (!F_ISSET(sp, SC_RE_SEARCH)) {
461 ex_emsg(sp, NULL, EXM_NOPREVRE);
462 return (1);
464 rflag = 1;
465 re = &sp->re_c;
466 break;
467 default:
468 goto usage;
471 if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
472 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
473 return (1);
476 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
477 msgq(sp, M_ERR,
478 "156|The #, l and p flags may not be combined with the c flag in vi mode");
479 return (1);
483 * bp: if interactive, line cache
484 * blen: if interactive, line cache length
485 * lb: build buffer pointer.
486 * lbclen: current length of built buffer.
487 * lblen; length of build buffer.
489 bp = lb = NULL;
490 blen = lbclen = lblen = 0;
492 /* For each line... */
493 lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
494 for (matched = quit = 0,
495 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
497 /* Someone's unhappy, time to stop. */
498 if (INTERRUPTED(sp))
499 break;
501 /* Get the line. */
502 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
503 goto err;
506 * Make a local copy if doing confirmation -- when calling
507 * the confirm routine we're likely to lose the cached copy.
509 if (sp->c_suffix) {
510 if (bp == NULL) {
511 GET_SPACE_RETW(sp, bp, blen, llen);
512 } else
513 ADD_SPACE_RETW(sp, bp, blen, llen);
514 MEMCPYW(bp, s, llen);
515 s = bp;
518 /* Start searching from the beginning. */
519 offset = 0;
520 len = llen;
522 /* Reset the build buffer offset. */
523 lbclen = 0;
525 /* Reset empty match flag. */
526 empty_ok = 1;
529 * We don't want to have to do a setline if the line didn't
530 * change -- keep track of whether or not this line changed.
531 * If doing confirmations, don't want to keep setting the
532 * line if change is refused -- keep track of substitutions.
534 didsub = linechanged = 0;
536 /* New line, do an EOL match. */
537 do_eol_match = 1;
539 /* It's not nul terminated, but we pretend it is. */
540 eflags = REG_STARTEND;
543 * The search area is from s + offset to the EOL.
545 * Generally, match[0].rm_so is the offset of the start
546 * of the match from the start of the search, and offset
547 * is the offset of the start of the last search.
549 nextmatch: match[0].rm_so = 0;
550 match[0].rm_eo = len;
552 /* Get the next match. */
553 eval = regexec(re, s + offset, 10, match, eflags);
556 * There wasn't a match or if there was an error, deal with
557 * it. If there was a previous match in this line, resolve
558 * the changes into the database. Otherwise, just move on.
560 if (eval == REG_NOMATCH)
561 goto endmatch;
562 if (eval != 0) {
563 re_error(sp, eval, re);
564 goto err;
566 matched = 1;
568 /* Only the first search can match an anchored expression. */
569 eflags |= REG_NOTBOL;
572 * !!!
573 * It's possible to match 0-length strings -- for example, the
574 * command s;a*;X;, when matched against the string "aabb" will
575 * result in "XbXbX", i.e. the matches are "aa", the space
576 * between the b's and the space between the b's and the end of
577 * the string. There is a similar space between the beginning
578 * of the string and the a's. The rule that we use (because vi
579 * historically used it) is that any 0-length match, occurring
580 * immediately after a match, is ignored. Otherwise, the above
581 * example would have resulted in "XXbXbX". Another example is
582 * incorrectly using " *" to replace groups of spaces with one
583 * space.
585 * The way we do this is that if we just had a successful match,
586 * the starting offset does not skip characters, and the match
587 * is empty, ignore the match and move forward. If there's no
588 * more characters in the string, we were attempting to match
589 * after the last character, so quit.
591 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
592 empty_ok = 1;
593 if (len == 0)
594 goto endmatch;
595 BUILD(sp, s + offset, 1)
596 ++offset;
597 --len;
598 goto nextmatch;
601 /* Confirm change. */
602 if (sp->c_suffix) {
604 * Set the cursor position for confirmation. Note,
605 * if we matched on a '$', the cursor may be past
606 * the end of line.
608 from.lno = to.lno = lno;
609 from.cno = match[0].rm_so + offset;
610 to.cno = match[0].rm_eo + offset;
612 * Both ex and vi have to correct for a change before
613 * the first character in the line.
615 if (llen == 0)
616 from.cno = to.cno = 0;
617 if (F_ISSET(sp, SC_VI)) {
619 * Only vi has to correct for a change after
620 * the last character in the line.
622 * XXX
623 * It would be nice to change the vi code so
624 * that we could display a cursor past EOL.
626 if (to.cno >= llen)
627 to.cno = llen - 1;
628 if (from.cno >= llen)
629 from.cno = llen - 1;
631 sp->lno = from.lno;
632 sp->cno = from.cno;
633 if (vs_refresh(sp, 1))
634 goto err;
636 vs_update(sp, msg_cat(sp,
637 "169|Confirm change? [n]", NULL), NULL);
639 if (v_event_get(sp, &ev, 0, 0))
640 goto err;
641 switch (ev.e_event) {
642 case E_CHARACTER:
643 break;
644 case E_EOF:
645 case E_ERR:
646 case E_INTERRUPT:
647 goto lquit;
648 default:
649 v_event_err(sp, &ev);
650 goto lquit;
652 } else {
653 if (ex_print(sp, cmdp, &from, &to, 0) ||
654 ex_scprint(sp, &from, &to))
655 goto lquit;
656 if (ex_txt(sp, &tiq, 0, TXT_CR))
657 goto err;
658 ev.e_c = tiq.cqh_first->lb[0];
661 switch (ev.e_c) {
662 case CH_YES:
663 break;
664 default:
665 case CH_NO:
666 didsub = 0;
667 BUILD(sp, s +offset, match[0].rm_eo);
668 goto skip;
669 case CH_QUIT:
670 /* Set the quit/interrupted flags. */
671 lquit: quit = 1;
672 F_SET(sp->gp, G_INTERRUPTED);
675 * Resolve any changes, then return to (and
676 * exit from) the main loop.
678 goto endmatch;
683 * Set the cursor to the last position changed, converting
684 * from 1-based to 0-based.
686 sp->lno = lno;
687 sp->cno = match[0].rm_so;
689 /* Copy the bytes before the match into the build buffer. */
690 BUILD(sp, s + offset, match[0].rm_so);
692 /* Substitute the matching bytes. */
693 didsub = 1;
694 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
695 goto err;
697 /* Set the change flag so we know this line was modified. */
698 linechanged = 1;
700 /* Move past the matched bytes. */
701 skip: offset += match[0].rm_eo;
702 len -= match[0].rm_eo;
704 /* A match cannot be followed by an empty pattern. */
705 empty_ok = 0;
708 * If doing a global change with confirmation, we have to
709 * update the screen. The basic idea is to store the line
710 * so the screen update routines can find it, and restart.
712 if (didsub && sp->c_suffix && sp->g_suffix) {
714 * The new search offset will be the end of the
715 * modified line.
717 saved_offset = lbclen;
719 /* Copy the rest of the line. */
720 if (len)
721 BUILD(sp, s + offset, len)
723 /* Set the new offset. */
724 offset = saved_offset;
726 /* Store inserted lines, adjusting the build buffer. */
727 last = 0;
728 if (sp->newl_cnt) {
729 for (cnt = 0;
730 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
731 if (db_insert(sp, lno,
732 lb + last, sp->newl[cnt] - last))
733 goto err;
734 last = sp->newl[cnt] + 1;
735 ++sp->rptlines[L_ADDED];
737 lbclen -= last;
738 offset -= last;
739 sp->newl_cnt = 0;
742 /* Store and retrieve the line. */
743 if (db_set(sp, lno, lb + last, lbclen))
744 goto err;
745 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
746 goto err;
747 ADD_SPACE_RETW(sp, bp, blen, llen)
748 MEMCPYW(bp, s, llen);
749 s = bp;
750 len = llen - offset;
752 /* Restart the build. */
753 lbclen = 0;
754 BUILD(sp, s, offset);
757 * If we haven't already done the after-the-string
758 * match, do one. Set REG_NOTEOL so the '$' pattern
759 * only matches once.
761 if (!do_eol_match)
762 goto endmatch;
763 if (offset == len) {
764 do_eol_match = 0;
765 eflags |= REG_NOTEOL;
767 goto nextmatch;
771 * If it's a global:
773 * If at the end of the string, do a test for the after
774 * the string match. Set REG_NOTEOL so the '$' pattern
775 * only matches once.
777 if (sp->g_suffix && do_eol_match) {
778 if (len == 0) {
779 do_eol_match = 0;
780 eflags |= REG_NOTEOL;
782 goto nextmatch;
785 endmatch: if (!linechanged)
786 continue;
788 /* Copy any remaining bytes into the build buffer. */
789 if (len)
790 BUILD(sp, s + offset, len)
792 /* Store inserted lines, adjusting the build buffer. */
793 last = 0;
794 if (sp->newl_cnt) {
795 for (cnt = 0;
796 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
797 if (db_insert(sp,
798 lno, lb + last, sp->newl[cnt] - last))
799 goto err;
800 last = sp->newl[cnt] + 1;
801 ++sp->rptlines[L_ADDED];
803 lbclen -= last;
804 sp->newl_cnt = 0;
807 /* Store the changed line. */
808 if (db_set(sp, lno, lb + last, lbclen))
809 goto err;
811 /* Update changed line counter. */
812 if (sp->rptlchange != lno) {
813 sp->rptlchange = lno;
814 ++sp->rptlines[L_CHANGED];
818 * !!!
819 * Display as necessary. Historic practice is to only
820 * display the last line of a line split into multiple
821 * lines.
823 if (lflag || nflag || pflag) {
824 from.lno = to.lno = lno;
825 from.cno = to.cno = 0;
826 if (lflag)
827 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
828 if (nflag)
829 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
830 if (pflag)
831 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
836 * !!!
837 * Historically, vi attempted to leave the cursor at the same place if
838 * the substitution was done at the current cursor position. Otherwise
839 * it moved it to the first non-blank of the last line changed. There
840 * were some problems: for example, :s/$/foo/ with the cursor on the
841 * last character of the line left the cursor on the last character, or
842 * the & command with multiple occurrences of the matching string in the
843 * line usually left the cursor in a fairly random position.
845 * We try to do the same thing, with the exception that if the user is
846 * doing substitution with confirmation, we move to the last line about
847 * which the user was consulted, as opposed to the last line that they
848 * actually changed. This prevents a screen flash if the user doesn't
849 * change many of the possible lines.
851 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
852 sp->cno = 0;
853 (void)nonblank(sp, sp->lno, &sp->cno);
857 * If not in a global command, and nothing matched, say so.
858 * Else, if none of the lines displayed, put something up.
860 rval = 0;
861 if (!matched) {
862 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
863 msgq(sp, M_ERR, "157|No match found");
864 goto err;
866 } else if (!lflag && !nflag && !pflag)
867 F_SET(cmdp, E_AUTOPRINT);
869 if (0) {
870 err: rval = 1;
873 if (bp != NULL)
874 FREE_SPACEW(sp, bp, blen);
875 if (lb != NULL)
876 free(lb);
877 return (rval);
881 * re_compile --
882 * Compile the RE.
884 * PUBLIC: int re_compile __P((SCR *,
885 * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
888 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
890 size_t len;
891 int reflags, replaced, rval;
892 CHAR_T *p;
894 /* Set RE flags. */
895 reflags = 0;
896 if (LF_ISSET(SEARCH_EXTEND))
897 reflags |= REG_EXTENDED;
898 if (LF_ISSET(SEARCH_IC))
899 reflags |= REG_ICASE;
900 if (LF_ISSET(SEARCH_LITERAL))
901 reflags |= REG_NOSPEC;
902 if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
903 if (O_ISSET(sp, O_EXTENDED))
904 reflags |= REG_EXTENDED;
905 if (O_ISSET(sp, O_IGNORECASE))
906 reflags |= REG_ICASE;
907 if (O_ISSET(sp, O_ICLOWER))
908 goto iclower;
910 if (LF_ISSET(SEARCH_ICL)) {
911 iclower: for (p = ptrn, len = plen; len > 0; ++p, --len)
912 if (isupper(*p))
913 break;
914 if (len == 0)
915 reflags |= REG_ICASE;
918 /* If we're replacing a saved value, clear the old one. */
919 if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
920 regfree(&sp->re_c);
921 F_CLR(sp, SC_RE_SEARCH);
923 if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
924 regfree(&sp->subre_c);
925 F_CLR(sp, SC_RE_SUBST);
929 * If we're saving the string, it's a pattern we haven't seen before,
930 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
931 * later recompilation. Free any previously saved value.
933 if (ptrnp != NULL) {
934 replaced = 0;
935 if (LF_ISSET(SEARCH_CSCOPE)) {
936 if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
937 return (1);
939 * XXX
940 * Currently, the match-any-<blank> expression used in
941 * re_cscope_conv() requires extended RE's. This may
942 * not be right or safe.
944 reflags |= REG_EXTENDED;
945 } else if (LF_ISSET(SEARCH_TAG)) {
946 if (re_tag_conv(sp, &ptrn, &plen, &replaced))
947 return (1);
948 } else if (!LF_ISSET(SEARCH_LITERAL))
949 if (re_conv(sp, &ptrn, &plen, &replaced))
950 return (1);
952 /* Discard previous pattern. */
953 if (*ptrnp != NULL) {
954 free(*ptrnp);
955 *ptrnp = NULL;
957 if (lenp != NULL)
958 *lenp = plen;
961 * Copy the string into allocated memory.
963 * XXX
964 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
965 * for now. There's just no other solution.
967 MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
968 if (*ptrnp != NULL) {
969 MEMCPYW(*ptrnp, ptrn, plen);
970 (*ptrnp)[plen] = '\0';
973 /* Free up conversion-routine-allocated memory. */
974 if (replaced)
975 FREE_SPACEW(sp, ptrn, 0);
977 if (*ptrnp == NULL)
978 return (1);
980 ptrn = *ptrnp;
984 * XXX
985 * Regcomp isn't 8-bit clean, so we just lost if the pattern
986 * contained a nul. Bummer!
988 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
989 if (LF_ISSET(SEARCH_MSG))
990 re_error(sp, rval, rep);
991 return (1);
994 if (LF_ISSET(SEARCH_CSEARCH))
995 F_SET(sp, SC_RE_SEARCH);
996 if (LF_ISSET(SEARCH_CSUBST))
997 F_SET(sp, SC_RE_SUBST);
999 return (0);
1003 * re_conv --
1004 * Convert vi's regular expressions into something that the
1005 * the POSIX 1003.2 RE functions can handle.
1007 * There are three conversions we make to make vi's RE's (specifically
1008 * the global, search, and substitute patterns) work with POSIX RE's.
1010 * 1: If O_MAGIC is not set, strip backslashes from the magic character
1011 * set (.[*~) that have them, and add them to the ones that don't.
1012 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1013 * from the last substitute command's replacement string. If O_MAGIC
1014 * is set, it's the string "~".
1015 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1016 * new RE escapes.
1018 * !!!/XXX
1019 * This doesn't exactly match the historic behavior of vi because we do
1020 * the ~ substitution before calling the RE engine, so magic characters
1021 * in the replacement string will be expanded by the RE engine, and they
1022 * weren't historically. It's a bug.
1024 static int
1025 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1027 size_t blen, len, needlen;
1028 int magic;
1029 CHAR_T *bp, *p, *t;
1032 * First pass through, we figure out how much space we'll need.
1033 * We do it in two passes, on the grounds that most of the time
1034 * the user is doing a search and won't have magic characters.
1035 * That way we can skip most of the memory allocation and copies.
1037 magic = 0;
1038 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1039 switch (*p) {
1040 case '\\':
1041 if (len > 1) {
1042 --len;
1043 switch (*++p) {
1044 case '<':
1045 magic = 1;
1046 needlen += RE_WSTART_LEN + 1;
1047 break;
1048 case '>':
1049 magic = 1;
1050 needlen += RE_WSTOP_LEN + 1;
1051 break;
1052 case '~':
1053 if (!O_ISSET(sp, O_MAGIC)) {
1054 magic = 1;
1055 needlen += sp->repl_len;
1057 break;
1058 case '.':
1059 case '[':
1060 case '*':
1061 if (!O_ISSET(sp, O_MAGIC)) {
1062 magic = 1;
1063 needlen += 1;
1065 break;
1066 default:
1067 needlen += 2;
1069 } else
1070 needlen += 1;
1071 break;
1072 case '~':
1073 if (O_ISSET(sp, O_MAGIC)) {
1074 magic = 1;
1075 needlen += sp->repl_len;
1077 break;
1078 case '.':
1079 case '[':
1080 case '*':
1081 if (!O_ISSET(sp, O_MAGIC)) {
1082 magic = 1;
1083 needlen += 2;
1085 break;
1086 default:
1087 needlen += 1;
1088 break;
1091 if (!magic) {
1092 *replacedp = 0;
1093 return (0);
1096 /* Get enough memory to hold the final pattern. */
1097 *replacedp = 1;
1098 GET_SPACE_RETW(sp, bp, blen, needlen);
1100 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1101 switch (*p) {
1102 case '\\':
1103 if (len > 1) {
1104 --len;
1105 switch (*++p) {
1106 case '<':
1107 MEMCPY(t,
1108 RE_WSTART, RE_WSTART_LEN);
1109 t += RE_WSTART_LEN;
1110 break;
1111 case '>':
1112 MEMCPY(t,
1113 RE_WSTOP, RE_WSTOP_LEN);
1114 t += RE_WSTOP_LEN;
1115 break;
1116 case '~':
1117 if (O_ISSET(sp, O_MAGIC))
1118 *t++ = '~';
1119 else {
1120 MEMCPYW(t,
1121 sp->repl, sp->repl_len);
1122 t += sp->repl_len;
1124 break;
1125 case '.':
1126 case '[':
1127 case '*':
1128 if (O_ISSET(sp, O_MAGIC))
1129 *t++ = '\\';
1130 *t++ = *p;
1131 break;
1132 default:
1133 *t++ = '\\';
1134 *t++ = *p;
1136 } else
1137 *t++ = '\\';
1138 break;
1139 case '~':
1140 if (O_ISSET(sp, O_MAGIC)) {
1141 MEMCPYW(t, sp->repl, sp->repl_len);
1142 t += sp->repl_len;
1143 } else
1144 *t++ = '~';
1145 break;
1146 case '.':
1147 case '[':
1148 case '*':
1149 if (!O_ISSET(sp, O_MAGIC))
1150 *t++ = '\\';
1151 *t++ = *p;
1152 break;
1153 default:
1154 *t++ = *p;
1155 break;
1158 *ptrnp = bp;
1159 *plenp = t - bp;
1160 return (0);
1164 * re_tag_conv --
1165 * Convert a tags search path into something that the POSIX
1166 * 1003.2 RE functions can handle.
1168 static int
1169 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1171 size_t blen, len;
1172 int lastdollar;
1173 CHAR_T *bp, *p, *t;
1175 len = *plenp;
1177 /* Max memory usage is 2 times the length of the string. */
1178 *replacedp = 1;
1179 GET_SPACE_RETW(sp, bp, blen, len * 2);
1181 p = *ptrnp;
1182 t = bp;
1184 /* If the last character is a '/' or '?', we just strip it. */
1185 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1186 --len;
1188 /* If the next-to-last or last character is a '$', it's magic. */
1189 if (len > 0 && p[len - 1] == '$') {
1190 --len;
1191 lastdollar = 1;
1192 } else
1193 lastdollar = 0;
1195 /* If the first character is a '/' or '?', we just strip it. */
1196 if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1197 ++p;
1198 --len;
1201 /* If the first or second character is a '^', it's magic. */
1202 if (p[0] == '^') {
1203 *t++ = *p++;
1204 --len;
1208 * Escape every other magic character we can find, meanwhile stripping
1209 * the backslashes ctags inserts when escaping the search delimiter
1210 * characters.
1212 for (; len > 0; --len) {
1213 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1214 ++p;
1215 --len;
1216 } else if (strchr("^.[]$*", p[0]))
1217 *t++ = '\\';
1218 *t++ = *p++;
1220 if (lastdollar)
1221 *t++ = '$';
1223 *ptrnp = bp;
1224 *plenp = t - bp;
1225 return (0);
1229 * re_cscope_conv --
1230 * Convert a cscope search path into something that the POSIX
1231 * 1003.2 RE functions can handle.
1233 static int
1234 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1236 size_t blen, len, nspaces;
1237 CHAR_T *bp, *t;
1238 CHAR_T *p;
1239 CHAR_T *wp;
1240 size_t wlen;
1243 * Each space in the source line printed by cscope represents an
1244 * arbitrary sequence of spaces, tabs, and comments.
1246 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1247 #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
1248 CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1249 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1250 if (*p == ' ')
1251 ++nspaces;
1254 * Allocate plenty of space:
1255 * the string, plus potential escaping characters;
1256 * nspaces + 2 copies of CSCOPE_RE_SPACE;
1257 * ^, $, nul terminator characters.
1259 *replacedp = 1;
1260 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1261 GET_SPACE_RETW(sp, bp, blen, len);
1263 p = *ptrnp;
1264 t = bp;
1266 *t++ = '^';
1267 MEMCPYW(t, wp, wlen);
1268 t += wlen;
1270 for (len = *plenp; len > 0; ++p, --len)
1271 if (*p == ' ') {
1272 MEMCPYW(t, wp, wlen);
1273 t += wlen;
1274 } else {
1275 if (strchr("\\^.[]$*+?()|{}", *p))
1276 *t++ = '\\';
1277 *t++ = *p;
1280 MEMCPYW(t, wp, wlen);
1281 t += wlen;
1282 *t++ = '$';
1284 *ptrnp = bp;
1285 *plenp = t - bp;
1286 return (0);
1290 * re_error --
1291 * Report a regular expression error.
1293 * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1295 void
1296 re_error(SCR *sp, int errcode, regex_t *preg)
1298 size_t s;
1299 char *oe;
1301 s = regerror(errcode, preg, "", 0);
1302 if ((oe = malloc(s)) == NULL)
1303 msgq(sp, M_SYSERR, NULL);
1304 else {
1305 (void)regerror(errcode, preg, oe, s);
1306 msgq(sp, M_ERR, "RE error: %s", oe);
1307 free(oe);
1312 * re_sub --
1313 * Do the substitution for a regular expression.
1315 static int
1316 re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t *match)
1318 /* Input line. */
1323 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1324 size_t lbclen, lblen; /* Local copies. */
1325 size_t mlen; /* Match length. */
1326 size_t rpl; /* Remaining replacement length. */
1327 CHAR_T *rp; /* Replacement pointer. */
1328 int ch;
1329 int no; /* Match replacement offset. */
1330 CHAR_T *p, *t; /* Buffer pointers. */
1331 CHAR_T *lb; /* Local copies. */
1333 lb = *lbp; /* Get local copies. */
1334 lbclen = *lbclenp;
1335 lblen = *lblenp;
1338 * QUOTING NOTE:
1340 * There are some special sequences that vi provides in the
1341 * replacement patterns.
1342 * & string the RE matched (\& if nomagic set)
1343 * \# n-th regular subexpression
1344 * \E end \U, \L conversion
1345 * \e end \U, \L conversion
1346 * \l convert the next character to lower-case
1347 * \L convert to lower-case, until \E, \e, or end of replacement
1348 * \u convert the next character to upper-case
1349 * \U convert to upper-case, until \E, \e, or end of replacement
1351 * Otherwise, since this is the lowest level of replacement, discard
1352 * all escaping characters. This (hopefully) matches historic practice.
1354 #define OUTCH(ch, nltrans) { \
1355 CHAR_T __ch = (ch); \
1356 u_int __value = KEY_VAL(sp, __ch); \
1357 if (nltrans && (__value == K_CR || __value == K_NL)) { \
1358 NEEDNEWLINE(sp); \
1359 sp->newl[sp->newl_cnt++] = lbclen; \
1360 } else if (conv != C_NOTSET) { \
1361 switch (conv) { \
1362 case C_ONELOWER: \
1363 conv = C_NOTSET; \
1364 /* FALLTHROUGH */ \
1365 case C_LOWER: \
1366 if (isupper(__ch)) \
1367 __ch = tolower(__ch); \
1368 break; \
1369 case C_ONEUPPER: \
1370 conv = C_NOTSET; \
1371 /* FALLTHROUGH */ \
1372 case C_UPPER: \
1373 if (islower(__ch)) \
1374 __ch = toupper(__ch); \
1375 break; \
1376 default: \
1377 abort(); \
1380 NEEDSP(sp, 1, p); \
1381 *p++ = __ch; \
1382 ++lbclen; \
1384 conv = C_NOTSET;
1385 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1386 switch (ch = *rp++) {
1387 case '&':
1388 if (O_ISSET(sp, O_MAGIC)) {
1389 no = 0;
1390 goto subzero;
1392 break;
1393 case '\\':
1394 if (rpl == 0)
1395 break;
1396 --rpl;
1397 switch (ch = *rp) {
1398 case '&':
1399 ++rp;
1400 if (!O_ISSET(sp, O_MAGIC)) {
1401 no = 0;
1402 goto subzero;
1404 break;
1405 case '0': case '1': case '2': case '3': case '4':
1406 case '5': case '6': case '7': case '8': case '9':
1407 no = *rp++ - '0';
1408 subzero: if (match[no].rm_so == -1 ||
1409 match[no].rm_eo == -1)
1410 break;
1411 mlen = match[no].rm_eo - match[no].rm_so;
1412 for (t = ip + match[no].rm_so; mlen--; ++t)
1413 OUTCH(*t, 0);
1414 continue;
1415 case 'e':
1416 case 'E':
1417 ++rp;
1418 conv = C_NOTSET;
1419 continue;
1420 case 'l':
1421 ++rp;
1422 conv = C_ONELOWER;
1423 continue;
1424 case 'L':
1425 ++rp;
1426 conv = C_LOWER;
1427 continue;
1428 case 'u':
1429 ++rp;
1430 conv = C_ONEUPPER;
1431 continue;
1432 case 'U':
1433 ++rp;
1434 conv = C_UPPER;
1435 continue;
1436 default:
1437 ++rp;
1438 break;
1441 OUTCH(ch, 1);
1444 *lbp = lb; /* Update caller's information. */
1445 *lbclenp = lbclen;
1446 *lblenp = lblen;
1447 return (0);