Sync usage with man page.
[netbsd-mini2440.git] / dist / nvi / ex / ex_subst.c
blobb0698bc2eddacce2dfd892d6489db2ce67bc3e4c
1 /* $NetBSD: ex_subst.c,v 1.2 2008/12/05 22:51:42 christos Exp $ */
3 /*-
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 * Keith Bostic. All rights reserved.
9 * See the LICENSE file for redistribution information.
12 #include "config.h"
14 #ifndef lint
15 static const char sccsid[] = "Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp (Berkeley) Date: 2002/02/09 21:18:23";
16 #endif /* not lint */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/time.h>
22 #include <bitstring.h>
23 #include <ctype.h>
24 #include <errno.h>
25 #include <limits.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
31 #include "../common/common.h"
32 #include "../vi/vi.h"
34 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
35 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
37 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
38 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
39 static int re_sub __P((SCR *,
40 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
41 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
42 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
45 * ex_s --
46 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
48 * Substitute on lines matching a pattern.
50 * PUBLIC: int ex_s __P((SCR *, EXCMD *));
52 int
53 ex_s(SCR *sp, EXCMD *cmdp)
55 regex_t *re;
56 size_t blen, len;
57 u_int flags;
58 int delim;
59 CHAR_T *bp, *p, *ptrn, *rep, *t;
62 * Skip leading white space.
64 * !!!
65 * Historic vi allowed any non-alphanumeric to serve as the
66 * substitution command delimiter.
68 * !!!
69 * If the arguments are empty, it's the same as &, i.e. we
70 * repeat the last substitution.
72 if (cmdp->argc == 0)
73 goto subagain;
74 for (p = cmdp->argv[0]->bp,
75 len = cmdp->argv[0]->len; len > 0; --len, ++p) {
76 if (!isblank(*p))
77 break;
79 if (len == 0)
80 subagain: return (ex_subagain(sp, cmdp));
82 delim = *p++;
83 if (isalnum(delim) || delim == '\\')
84 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
87 * !!!
88 * The full-blown substitute command reset the remembered
89 * state of the 'c' and 'g' suffices.
91 sp->c_suffix = sp->g_suffix = 0;
94 * Get the pattern string, toss escaping characters.
96 * !!!
97 * Historic vi accepted any of the following forms:
99 * :s/abc/def/ change "abc" to "def"
100 * :s/abc/def change "abc" to "def"
101 * :s/abc/ delete "abc"
102 * :s/abc delete "abc"
104 * QUOTING NOTE:
106 * Only toss an escaping character if it escapes a delimiter.
107 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
108 * would be nice to be more regular, i.e. for each layer of
109 * escaping a single escaping character is removed, but that's
110 * not how the historic vi worked.
112 for (ptrn = t = p;;) {
113 if (p[0] == '\0' || p[0] == delim) {
114 if (p[0] == delim)
115 ++p;
117 * !!!
118 * Nul terminate the pattern string -- it's passed
119 * to regcomp which doesn't understand anything else.
121 *t = '\0';
122 break;
124 if (p[0] == '\\') {
125 if (p[1] == delim)
126 ++p;
127 else if (p[1] == '\\')
128 *t++ = *p++;
130 *t++ = *p++;
134 * If the pattern string is empty, use the last RE (not just the
135 * last substitution RE).
137 if (*ptrn == '\0') {
138 if (sp->re == NULL) {
139 ex_emsg(sp, NULL, EXM_NOPREVRE);
140 return (1);
143 /* Re-compile the RE if necessary. */
144 if (!F_ISSET(sp, SC_RE_SEARCH) &&
145 re_compile(sp, sp->re, sp->re_len,
146 NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
147 return (1);
148 flags = 0;
149 } else {
151 * !!!
152 * Compile the RE. Historic practice is that substitutes set
153 * the search direction as well as both substitute and search
154 * RE's. We compile the RE twice, as we don't want to bother
155 * ref counting the pattern string and (opaque) structure.
157 if (re_compile(sp, ptrn, t - ptrn, &sp->re,
158 &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
159 return (1);
160 if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
161 &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
162 return (1);
164 flags = SUB_FIRST;
165 sp->searchdir = FORWARD;
167 re = &sp->re_c;
170 * Get the replacement string.
172 * The special character & (\& if O_MAGIC not set) matches the
173 * entire RE. No handling of & is required here, it's done by
174 * re_sub().
176 * The special character ~ (\~ if O_MAGIC not set) inserts the
177 * previous replacement string into this replacement string.
178 * Count ~'s to figure out how much space we need. We could
179 * special case nonexistent last patterns or whether or not
180 * O_MAGIC is set, but it's probably not worth the effort.
182 * QUOTING NOTE:
184 * Only toss an escaping character if it escapes a delimiter or
185 * if O_MAGIC is set and it escapes a tilde.
187 * !!!
188 * If the entire replacement pattern is "%", then use the last
189 * replacement pattern. This semantic was added to vi in System
190 * V and then percolated elsewhere, presumably around the time
191 * that it was added to their version of ed(1).
193 if (p[0] == L('\0') || p[0] == delim) {
194 if (p[0] == delim)
195 ++p;
196 if (sp->repl != NULL)
197 free(sp->repl);
198 sp->repl = NULL;
199 sp->repl_len = 0;
200 } else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
201 p += p[1] == delim ? 2 : 1;
202 else {
203 for (rep = p, len = 0;
204 p[0] != L('\0') && p[0] != delim; ++p, ++len)
205 if (p[0] == L('~'))
206 len += sp->repl_len;
207 GET_SPACE_RETW(sp, bp, blen, len);
208 for (t = bp, len = 0, p = rep;;) {
209 if (p[0] == L('\0') || p[0] == delim) {
210 if (p[0] == delim)
211 ++p;
212 break;
214 if (p[0] == L('\\')) {
215 if (p[1] == delim)
216 ++p;
217 else if (p[1] == L('\\')) {
218 *t++ = *p++;
219 ++len;
220 } else if (p[1] == L('~')) {
221 ++p;
222 if (!O_ISSET(sp, O_MAGIC))
223 goto tilde;
225 } else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
226 tilde: ++p;
227 MEMCPYW(t, sp->repl, sp->repl_len);
228 t += sp->repl_len;
229 len += sp->repl_len;
230 continue;
232 *t++ = *p++;
233 ++len;
235 if ((sp->repl_len = len) != 0) {
236 if (sp->repl != NULL)
237 free(sp->repl);
238 if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
239 msgq(sp, M_SYSERR, NULL);
240 FREE_SPACEW(sp, bp, blen);
241 return (1);
243 MEMCPYW(sp->repl, bp, len);
245 FREE_SPACEW(sp, bp, blen);
247 return (s(sp, cmdp, p, re, flags));
251 * ex_subagain --
252 * [line [,line]] & [cgr] [count] [#lp]]
254 * Substitute using the last substitute RE and replacement pattern.
256 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
259 ex_subagain(SCR *sp, EXCMD *cmdp)
261 if (sp->subre == NULL) {
262 ex_emsg(sp, NULL, EXM_NOPREVRE);
263 return (1);
265 if (!F_ISSET(sp, SC_RE_SUBST) &&
266 re_compile(sp, sp->subre, sp->subre_len,
267 NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
268 return (1);
269 return (s(sp,
270 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
274 * ex_subtilde --
275 * [line [,line]] ~ [cgr] [count] [#lp]]
277 * Substitute using the last RE and last substitute replacement pattern.
279 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
282 ex_subtilde(SCR *sp, EXCMD *cmdp)
284 if (sp->re == NULL) {
285 ex_emsg(sp, NULL, EXM_NOPREVRE);
286 return (1);
288 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
289 sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
290 return (1);
291 return (s(sp,
292 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
296 * s --
297 * Do the substitution. This stuff is *really* tricky. There are lots of
298 * special cases, and general nastiness. Don't mess with it unless you're
299 * pretty confident.
301 * The nasty part of the substitution is what happens when the replacement
302 * string contains newlines. It's a bit tricky -- consider the information
303 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
304 * to build a set of newline offsets which we use to break the line up later,
305 * when the replacement is done. Don't change it unless you're *damned*
306 * confident.
308 #define NEEDNEWLINE(sp) { \
309 if (sp->newl_len == sp->newl_cnt) { \
310 sp->newl_len += 25; \
311 REALLOC(sp, sp->newl, size_t *, \
312 sp->newl_len * sizeof(size_t)); \
313 if (sp->newl == NULL) { \
314 sp->newl_len = 0; \
315 return (1); \
320 #define BUILD(sp, l, len) { \
321 if (lbclen + (len) > lblen) { \
322 lblen += MAX(lbclen + (len), 256); \
323 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
324 if (lb == NULL) { \
325 lbclen = 0; \
326 return (1); \
329 MEMCPYW(lb + lbclen, l, len); \
330 lbclen += len; \
333 #define NEEDSP(sp, len, pnt) { \
334 if (lbclen + (len) > lblen) { \
335 lblen += MAX(lbclen + (len), 256); \
336 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
337 if (lb == NULL) { \
338 lbclen = 0; \
339 return (1); \
341 pnt = lb + lbclen; \
345 static int
346 s(SCR *sp, EXCMD *cmdp, CHAR_T *st, regex_t *re, u_int flags)
348 EVENT ev;
349 MARK from, to;
350 TEXTH tiq;
351 db_recno_t elno, lno, slno;
352 u_long ul;
353 regmatch_t match[10];
354 size_t blen, cnt, last, lbclen, lblen, len, llen;
355 size_t offset, saved_offset, scno;
356 int cflag, lflag, nflag, pflag, rflag;
357 int didsub, do_eol_match, eflags, empty_ok, eval;
358 int linechanged, matched, quit, rval;
359 CHAR_T *lb, *bp;
360 enum nresult nret;
362 NEEDFILE(sp, cmdp);
364 slno = sp->lno;
365 scno = sp->cno;
368 * !!!
369 * Historically, the 'g' and 'c' suffices were always toggled as flags,
370 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
371 * not set, they were initialized to 0 for all substitute commands. If
372 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
373 * specified substitute/replacement patterns (see ex_s()).
375 if (!O_ISSET(sp, O_EDCOMPATIBLE))
376 sp->c_suffix = sp->g_suffix = 0;
379 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
380 * it only displayed the last change. I'd disallow them, but they are
381 * useful in combination with the [v]global commands. In the current
382 * model the problem is combining them with the 'c' flag -- the screen
383 * would have to flip back and forth between the confirm screen and the
384 * ex print screen, which would be pretty awful. We do display all
385 * changes, though, for what that's worth.
387 * !!!
388 * Historic vi was fairly strict about the order of "options", the
389 * count, and "flags". I'm somewhat fuzzy on the difference between
390 * options and flags, anyway, so this is a simpler approach, and we
391 * just take it them in whatever order the user gives them. (The ex
392 * usage statement doesn't reflect this.)
394 cflag = lflag = nflag = pflag = rflag = 0;
395 if (st == NULL)
396 goto noargs;
397 for (lno = OOBLNO; *st != '\0'; ++st)
398 switch (*st) {
399 case ' ':
400 case '\t':
401 continue;
402 case '+':
403 ++cmdp->flagoff;
404 break;
405 case '-':
406 --cmdp->flagoff;
407 break;
408 case '0': case '1': case '2': case '3': case '4':
409 case '5': case '6': case '7': case '8': case '9':
410 if (lno != OOBLNO)
411 goto usage;
412 errno = 0;
413 nret = nget_uslong(sp, &ul, st, &st, 10);
414 lno = ul;
415 if (*st == '\0') /* Loop increment correction. */
416 --st;
417 if (nret != NUM_OK) {
418 if (nret == NUM_OVER)
419 msgq(sp, M_ERR, "153|Count overflow");
420 else if (nret == NUM_UNDER)
421 msgq(sp, M_ERR, "154|Count underflow");
422 else
423 msgq(sp, M_SYSERR, NULL);
424 return (1);
427 * In historic vi, the count was inclusive from the
428 * second address.
430 cmdp->addr1.lno = cmdp->addr2.lno;
431 cmdp->addr2.lno += lno - 1;
432 if (!db_exist(sp, cmdp->addr2.lno) &&
433 db_last(sp, &cmdp->addr2.lno))
434 return (1);
435 break;
436 case '#':
437 nflag = 1;
438 break;
439 case 'c':
440 sp->c_suffix = !sp->c_suffix;
442 /* Ex text structure initialization. */
443 if (F_ISSET(sp, SC_EX)) {
444 memset(&tiq, 0, sizeof(TEXTH));
445 CIRCLEQ_INIT(&tiq);
447 break;
448 case 'g':
449 sp->g_suffix = !sp->g_suffix;
450 break;
451 case 'l':
452 lflag = 1;
453 break;
454 case 'p':
455 pflag = 1;
456 break;
457 case 'r':
458 if (LF_ISSET(SUB_FIRST)) {
459 msgq(sp, M_ERR,
460 "155|Regular expression specified; r flag meaningless");
461 return (1);
463 if (!F_ISSET(sp, SC_RE_SEARCH)) {
464 ex_emsg(sp, NULL, EXM_NOPREVRE);
465 return (1);
467 rflag = 1;
468 re = &sp->re_c;
469 break;
470 default:
471 goto usage;
474 if (*st != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
475 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
476 return (1);
479 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
480 msgq(sp, M_ERR,
481 "156|The #, l and p flags may not be combined with the c flag in vi mode");
482 return (1);
486 * bp: if interactive, line cache
487 * blen: if interactive, line cache length
488 * lb: build buffer pointer.
489 * lbclen: current length of built buffer.
490 * lblen; length of build buffer.
492 bp = lb = NULL;
493 blen = lbclen = lblen = 0;
495 /* For each line... */
496 lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
497 for (matched = quit = 0,
498 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
500 /* Someone's unhappy, time to stop. */
501 if (INTERRUPTED(sp))
502 break;
504 /* Get the line. */
505 if (db_get(sp, lno, DBG_FATAL, &st, &llen))
506 goto err;
509 * Make a local copy if doing confirmation -- when calling
510 * the confirm routine we're likely to lose the cached copy.
512 if (sp->c_suffix) {
513 if (bp == NULL) {
514 GET_SPACE_RETW(sp, bp, blen, llen);
515 } else
516 ADD_SPACE_RETW(sp, bp, blen, llen);
517 MEMCPYW(bp, st, llen);
518 st = bp;
521 /* Start searching from the beginning. */
522 offset = 0;
523 len = llen;
525 /* Reset the build buffer offset. */
526 lbclen = 0;
528 /* Reset empty match flag. */
529 empty_ok = 1;
532 * We don't want to have to do a setline if the line didn't
533 * change -- keep track of whether or not this line changed.
534 * If doing confirmations, don't want to keep setting the
535 * line if change is refused -- keep track of substitutions.
537 didsub = linechanged = 0;
539 /* New line, do an EOL match. */
540 do_eol_match = 1;
542 /* It's not nul terminated, but we pretend it is. */
543 eflags = REG_STARTEND;
546 * The search area is from st + offset to the EOL.
548 * Generally, match[0].rm_so is the offset of the start
549 * of the match from the start of the search, and offset
550 * is the offset of the start of the last search.
552 nextmatch: match[0].rm_so = 0;
553 match[0].rm_eo = len;
555 /* Get the next match. */
556 eval = regexec(re, st + offset, 10, match, eflags);
559 * There wasn't a match or if there was an error, deal with
560 * it. If there was a previous match in this line, resolve
561 * the changes into the database. Otherwise, just move on.
563 if (eval == REG_NOMATCH)
564 goto endmatch;
565 if (eval != 0) {
566 re_error(sp, eval, re);
567 goto err;
569 matched = 1;
571 /* Only the first search can match an anchored expression. */
572 eflags |= REG_NOTBOL;
575 * !!!
576 * It's possible to match 0-length strings -- for example, the
577 * command s;a*;X;, when matched against the string "aabb" will
578 * result in "XbXbX", i.e. the matches are "aa", the space
579 * between the b's and the space between the b's and the end of
580 * the string. There is a similar space between the beginning
581 * of the string and the a's. The rule that we use (because vi
582 * historically used it) is that any 0-length match, occurring
583 * immediately after a match, is ignored. Otherwise, the above
584 * example would have resulted in "XXbXbX". Another example is
585 * incorrectly using " *" to replace groups of spaces with one
586 * space.
588 * The way we do this is that if we just had a successful match,
589 * the starting offset does not skip characters, and the match
590 * is empty, ignore the match and move forward. If there's no
591 * more characters in the string, we were attempting to match
592 * after the last character, so quit.
594 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
595 empty_ok = 1;
596 if (len == 0)
597 goto endmatch;
598 BUILD(sp, st + offset, 1)
599 ++offset;
600 --len;
601 goto nextmatch;
604 /* Confirm change. */
605 if (sp->c_suffix) {
607 * Set the cursor position for confirmation. Note,
608 * if we matched on a '$', the cursor may be past
609 * the end of line.
611 from.lno = to.lno = lno;
612 from.cno = match[0].rm_so + offset;
613 to.cno = match[0].rm_eo + offset;
615 * Both ex and vi have to correct for a change before
616 * the first character in the line.
618 if (llen == 0)
619 from.cno = to.cno = 0;
620 if (F_ISSET(sp, SC_VI)) {
622 * Only vi has to correct for a change after
623 * the last character in the line.
625 * XXX
626 * It would be nice to change the vi code so
627 * that we could display a cursor past EOL.
629 if (to.cno >= llen)
630 to.cno = llen - 1;
631 if (from.cno >= llen)
632 from.cno = llen - 1;
634 sp->lno = from.lno;
635 sp->cno = from.cno;
636 if (vs_refresh(sp, 1))
637 goto err;
639 vs_update(sp, msg_cat(sp,
640 "169|Confirm change? [n]", NULL), NULL);
642 if (v_event_get(sp, &ev, 0, 0))
643 goto err;
644 switch (ev.e_event) {
645 case E_CHARACTER:
646 break;
647 case E_EOF:
648 case E_ERR:
649 case E_INTERRUPT:
650 goto lquit;
651 default:
652 v_event_err(sp, &ev);
653 goto lquit;
655 } else {
656 if (ex_print(sp, cmdp, &from, &to, 0) ||
657 ex_scprint(sp, &from, &to))
658 goto lquit;
659 if (ex_txt(sp, &tiq, 0, TXT_CR))
660 goto err;
661 ev.e_c = tiq.cqh_first->lb[0];
664 switch (ev.e_c) {
665 case CH_YES:
666 break;
667 default:
668 case CH_NO:
669 didsub = 0;
670 BUILD(sp, st + offset, match[0].rm_eo);
671 goto skip;
672 case CH_QUIT:
673 /* Set the quit/interrupted flags. */
674 lquit: quit = 1;
675 F_SET(sp->gp, G_INTERRUPTED);
678 * Resolve any changes, then return to (and
679 * exit from) the main loop.
681 goto endmatch;
686 * Set the cursor to the last position changed, converting
687 * from 1-based to 0-based.
689 sp->lno = lno;
690 sp->cno = match[0].rm_so;
692 /* Copy the bytes before the match into the build buffer. */
693 BUILD(sp, st + offset, match[0].rm_so);
695 /* Substitute the matching bytes. */
696 didsub = 1;
697 if (re_sub(sp, st + offset, &lb, &lbclen, &lblen, match))
698 goto err;
700 /* Set the change flag so we know this line was modified. */
701 linechanged = 1;
703 /* Move past the matched bytes. */
704 skip: offset += match[0].rm_eo;
705 len -= match[0].rm_eo;
707 /* A match cannot be followed by an empty pattern. */
708 empty_ok = 0;
711 * If doing a global change with confirmation, we have to
712 * update the screen. The basic idea is to store the line
713 * so the screen update routines can find it, and restart.
715 if (didsub && sp->c_suffix && sp->g_suffix) {
717 * The new search offset will be the end of the
718 * modified line.
720 saved_offset = lbclen;
722 /* Copy the rest of the line. */
723 if (len)
724 BUILD(sp, st + offset, len)
726 /* Set the new offset. */
727 offset = saved_offset;
729 /* Store inserted lines, adjusting the build buffer. */
730 last = 0;
731 if (sp->newl_cnt) {
732 for (cnt = 0;
733 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
734 if (db_insert(sp, lno,
735 lb + last, sp->newl[cnt] - last))
736 goto err;
737 last = sp->newl[cnt] + 1;
738 ++sp->rptlines[L_ADDED];
740 lbclen -= last;
741 offset -= last;
742 sp->newl_cnt = 0;
745 /* Store and retrieve the line. */
746 if (db_set(sp, lno, lb + last, lbclen))
747 goto err;
748 if (db_get(sp, lno, DBG_FATAL, &st, &llen))
749 goto err;
750 ADD_SPACE_RETW(sp, bp, blen, llen)
751 MEMCPYW(bp, st, llen);
752 st = bp;
753 len = llen - offset;
755 /* Restart the build. */
756 lbclen = 0;
757 BUILD(sp, st, offset);
760 * If we haven't already done the after-the-string
761 * match, do one. Set REG_NOTEOL so the '$' pattern
762 * only matches once.
764 if (!do_eol_match)
765 goto endmatch;
766 if (offset == len) {
767 do_eol_match = 0;
768 eflags |= REG_NOTEOL;
770 goto nextmatch;
774 * If it's a global:
776 * If at the end of the string, do a test for the after
777 * the string match. Set REG_NOTEOL so the '$' pattern
778 * only matches once.
780 if (sp->g_suffix && do_eol_match) {
781 if (len == 0) {
782 do_eol_match = 0;
783 eflags |= REG_NOTEOL;
785 goto nextmatch;
788 endmatch: if (!linechanged)
789 continue;
791 /* Copy any remaining bytes into the build buffer. */
792 if (len)
793 BUILD(sp, st + offset, len)
795 /* Store inserted lines, adjusting the build buffer. */
796 last = 0;
797 if (sp->newl_cnt) {
798 for (cnt = 0;
799 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
800 if (db_insert(sp,
801 lno, lb + last, sp->newl[cnt] - last))
802 goto err;
803 last = sp->newl[cnt] + 1;
804 ++sp->rptlines[L_ADDED];
806 lbclen -= last;
807 sp->newl_cnt = 0;
810 /* Store the changed line. */
811 if (db_set(sp, lno, lb + last, lbclen))
812 goto err;
814 /* Update changed line counter. */
815 if (sp->rptlchange != lno) {
816 sp->rptlchange = lno;
817 ++sp->rptlines[L_CHANGED];
821 * !!!
822 * Display as necessary. Historic practice is to only
823 * display the last line of a line split into multiple
824 * lines.
826 if (lflag || nflag || pflag) {
827 from.lno = to.lno = lno;
828 from.cno = to.cno = 0;
829 if (lflag)
830 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
831 if (nflag)
832 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
833 if (pflag)
834 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
839 * !!!
840 * Historically, vi attempted to leave the cursor at the same place if
841 * the substitution was done at the current cursor position. Otherwise
842 * it moved it to the first non-blank of the last line changed. There
843 * were some problems: for example, :s/$/foo/ with the cursor on the
844 * last character of the line left the cursor on the last character, or
845 * the & command with multiple occurrences of the matching string in the
846 * line usually left the cursor in a fairly random position.
848 * We try to do the same thing, with the exception that if the user is
849 * doing substitution with confirmation, we move to the last line about
850 * which the user was consulted, as opposed to the last line that they
851 * actually changed. This prevents a screen flash if the user doesn't
852 * change many of the possible lines.
854 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
855 sp->cno = 0;
856 (void)nonblank(sp, sp->lno, &sp->cno);
860 * If not in a global command, and nothing matched, say so.
861 * Else, if none of the lines displayed, put something up.
863 rval = 0;
864 if (!matched) {
865 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
866 msgq(sp, M_ERR, "157|No match found");
867 goto err;
869 } else if (!lflag && !nflag && !pflag)
870 F_SET(cmdp, E_AUTOPRINT);
872 if (0) {
873 err: rval = 1;
876 if (bp != NULL)
877 FREE_SPACEW(sp, bp, blen);
878 if (lb != NULL)
879 free(lb);
880 return (rval);
884 * re_compile --
885 * Compile the RE.
887 * PUBLIC: int re_compile __P((SCR *,
888 * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
891 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
893 size_t len;
894 int reflags, replaced, rval;
895 CHAR_T *p;
897 /* Set RE flags. */
898 reflags = 0;
899 if (LF_ISSET(SEARCH_EXTEND))
900 reflags |= REG_EXTENDED;
901 if (LF_ISSET(SEARCH_IC))
902 reflags |= REG_ICASE;
903 if (LF_ISSET(SEARCH_LITERAL))
904 reflags |= REG_NOSPEC;
905 if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
906 if (O_ISSET(sp, O_EXTENDED))
907 reflags |= REG_EXTENDED;
908 if (O_ISSET(sp, O_IGNORECASE))
909 reflags |= REG_ICASE;
910 if (O_ISSET(sp, O_ICLOWER))
911 goto iclower;
913 if (LF_ISSET(SEARCH_ICL)) {
914 iclower: for (p = ptrn, len = plen; len > 0; ++p, --len)
915 if (ISUPPER(*p))
916 break;
917 if (len == 0)
918 reflags |= REG_ICASE;
921 /* If we're replacing a saved value, clear the old one. */
922 if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
923 regfree(&sp->re_c);
924 F_CLR(sp, SC_RE_SEARCH);
926 if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
927 regfree(&sp->subre_c);
928 F_CLR(sp, SC_RE_SUBST);
932 * If we're saving the string, it's a pattern we haven't seen before,
933 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
934 * later recompilation. Free any previously saved value.
936 if (ptrnp != NULL) {
937 replaced = 0;
938 if (LF_ISSET(SEARCH_CSCOPE)) {
939 if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
940 return (1);
942 * XXX
943 * Currently, the match-any-<blank> expression used in
944 * re_cscope_conv() requires extended RE's. This may
945 * not be right or safe.
947 reflags |= REG_EXTENDED;
948 } else if (LF_ISSET(SEARCH_TAG)) {
949 if (re_tag_conv(sp, &ptrn, &plen, &replaced))
950 return (1);
951 } else if (!LF_ISSET(SEARCH_LITERAL))
952 if (re_conv(sp, &ptrn, &plen, &replaced))
953 return (1);
955 /* Discard previous pattern. */
956 if (*ptrnp != NULL) {
957 free(*ptrnp);
958 *ptrnp = NULL;
960 if (lenp != NULL)
961 *lenp = plen;
964 * Copy the string into allocated memory.
966 * XXX
967 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
968 * for now. There's just no other solution.
970 MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
971 if (*ptrnp != NULL) {
972 MEMCPYW(*ptrnp, ptrn, plen);
973 (*ptrnp)[plen] = '\0';
976 /* Free up conversion-routine-allocated memory. */
977 if (replaced)
978 FREE_SPACEW(sp, ptrn, 0);
980 if (*ptrnp == NULL)
981 return (1);
983 ptrn = *ptrnp;
987 * XXX
988 * Regcomp isn't 8-bit clean, so we just lost if the pattern
989 * contained a nul. Bummer!
991 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
992 if (LF_ISSET(SEARCH_MSG))
993 re_error(sp, rval, rep);
994 return (1);
997 if (LF_ISSET(SEARCH_CSEARCH))
998 F_SET(sp, SC_RE_SEARCH);
999 if (LF_ISSET(SEARCH_CSUBST))
1000 F_SET(sp, SC_RE_SUBST);
1002 return (0);
1006 * re_conv --
1007 * Convert vi's regular expressions into something that the
1008 * the POSIX 1003.2 RE functions can handle.
1010 * There are three conversions we make to make vi's RE's (specifically
1011 * the global, search, and substitute patterns) work with POSIX RE's.
1013 * 1: If O_MAGIC is not set, strip backslashes from the magic character
1014 * set (.[*~) that have them, and add them to the ones that don't.
1015 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1016 * from the last substitute command's replacement string. If O_MAGIC
1017 * is set, it's the string "~".
1018 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1019 * new RE escapes.
1021 * !!!/XXX
1022 * This doesn't exactly match the historic behavior of vi because we do
1023 * the ~ substitution before calling the RE engine, so magic characters
1024 * in the replacement string will be expanded by the RE engine, and they
1025 * weren't historically. It's a bug.
1027 static int
1028 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1030 size_t blen, len, needlen;
1031 int magic;
1032 CHAR_T *bp, *p, *t;
1035 * First pass through, we figure out how much space we'll need.
1036 * We do it in two passes, on the grounds that most of the time
1037 * the user is doing a search and won't have magic characters.
1038 * That way we can skip most of the memory allocation and copies.
1040 magic = 0;
1041 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1042 switch (*p) {
1043 case '\\':
1044 if (len > 1) {
1045 --len;
1046 switch (*++p) {
1047 case '<':
1048 magic = 1;
1049 needlen += RE_WSTART_LEN + 1;
1050 break;
1051 case '>':
1052 magic = 1;
1053 needlen += RE_WSTOP_LEN + 1;
1054 break;
1055 case '~':
1056 if (!O_ISSET(sp, O_MAGIC)) {
1057 magic = 1;
1058 needlen += sp->repl_len;
1060 break;
1061 case '.':
1062 case '[':
1063 case '*':
1064 if (!O_ISSET(sp, O_MAGIC)) {
1065 magic = 1;
1066 needlen += 1;
1068 break;
1069 default:
1070 needlen += 2;
1072 } else
1073 needlen += 1;
1074 break;
1075 case '~':
1076 if (O_ISSET(sp, O_MAGIC)) {
1077 magic = 1;
1078 needlen += sp->repl_len;
1080 break;
1081 case '.':
1082 case '[':
1083 case '*':
1084 if (!O_ISSET(sp, O_MAGIC)) {
1085 magic = 1;
1086 needlen += 2;
1088 break;
1089 default:
1090 needlen += 1;
1091 break;
1094 if (!magic) {
1095 *replacedp = 0;
1096 return (0);
1099 /* Get enough memory to hold the final pattern. */
1100 *replacedp = 1;
1101 GET_SPACE_RETW(sp, bp, blen, needlen);
1103 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1104 switch (*p) {
1105 case '\\':
1106 if (len > 1) {
1107 --len;
1108 switch (*++p) {
1109 case '<':
1110 MEMCPY(t,
1111 RE_WSTART, RE_WSTART_LEN);
1112 t += RE_WSTART_LEN;
1113 break;
1114 case '>':
1115 MEMCPY(t,
1116 RE_WSTOP, RE_WSTOP_LEN);
1117 t += RE_WSTOP_LEN;
1118 break;
1119 case '~':
1120 if (O_ISSET(sp, O_MAGIC))
1121 *t++ = '~';
1122 else {
1123 MEMCPYW(t,
1124 sp->repl, sp->repl_len);
1125 t += sp->repl_len;
1127 break;
1128 case '.':
1129 case '[':
1130 case '*':
1131 if (O_ISSET(sp, O_MAGIC))
1132 *t++ = '\\';
1133 *t++ = *p;
1134 break;
1135 default:
1136 *t++ = '\\';
1137 *t++ = *p;
1139 } else
1140 *t++ = '\\';
1141 break;
1142 case '~':
1143 if (O_ISSET(sp, O_MAGIC)) {
1144 MEMCPYW(t, sp->repl, sp->repl_len);
1145 t += sp->repl_len;
1146 } else
1147 *t++ = '~';
1148 break;
1149 case '.':
1150 case '[':
1151 case '*':
1152 if (!O_ISSET(sp, O_MAGIC))
1153 *t++ = '\\';
1154 *t++ = *p;
1155 break;
1156 default:
1157 *t++ = *p;
1158 break;
1161 *ptrnp = bp;
1162 *plenp = t - bp;
1163 return (0);
1167 * re_tag_conv --
1168 * Convert a tags search path into something that the POSIX
1169 * 1003.2 RE functions can handle.
1171 static int
1172 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1174 size_t blen, len;
1175 int lastdollar;
1176 CHAR_T *bp, *p, *t;
1178 len = *plenp;
1180 /* Max memory usage is 2 times the length of the string. */
1181 *replacedp = 1;
1182 GET_SPACE_RETW(sp, bp, blen, len * 2);
1184 p = *ptrnp;
1185 t = bp;
1187 /* If the last character is a '/' or '?', we just strip it. */
1188 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1189 --len;
1191 /* If the next-to-last or last character is a '$', it's magic. */
1192 if (len > 0 && p[len - 1] == '$') {
1193 --len;
1194 lastdollar = 1;
1195 } else
1196 lastdollar = 0;
1198 /* If the first character is a '/' or '?', we just strip it. */
1199 if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1200 ++p;
1201 --len;
1204 /* If the first or second character is a '^', it's magic. */
1205 if (p[0] == '^') {
1206 *t++ = *p++;
1207 --len;
1211 * Escape every other magic character we can find, meanwhile stripping
1212 * the backslashes ctags inserts when escaping the search delimiter
1213 * characters.
1215 for (; len > 0; --len) {
1216 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1217 ++p;
1218 --len;
1219 } else if (strchr("^.[]$*", p[0]))
1220 *t++ = '\\';
1221 *t++ = *p++;
1223 if (lastdollar)
1224 *t++ = '$';
1226 *ptrnp = bp;
1227 *plenp = t - bp;
1228 return (0);
1232 * re_cscope_conv --
1233 * Convert a cscope search path into something that the POSIX
1234 * 1003.2 RE functions can handle.
1236 static int
1237 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1239 size_t blen, len, nspaces;
1240 CHAR_T *bp, *t;
1241 CHAR_T *p;
1242 const CHAR_T *wp;
1243 size_t wlen;
1246 * Each space in the source line printed by cscope represents an
1247 * arbitrary sequence of spaces, tabs, and comments.
1249 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1250 #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
1251 CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1252 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1253 if (*p == ' ')
1254 ++nspaces;
1257 * Allocate plenty of space:
1258 * the string, plus potential escaping characters;
1259 * nspaces + 2 copies of CSCOPE_RE_SPACE;
1260 * ^, $, nul terminator characters.
1262 *replacedp = 1;
1263 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1264 GET_SPACE_RETW(sp, bp, blen, len);
1266 p = *ptrnp;
1267 t = bp;
1269 *t++ = '^';
1270 MEMCPYW(t, wp, wlen);
1271 t += wlen;
1273 for (len = *plenp; len > 0; ++p, --len)
1274 if (*p == ' ') {
1275 MEMCPYW(t, wp, wlen);
1276 t += wlen;
1277 } else {
1278 if (strchr("\\^.[]$*+?()|{}", *p))
1279 *t++ = '\\';
1280 *t++ = *p;
1283 MEMCPYW(t, wp, wlen);
1284 t += wlen;
1285 *t++ = '$';
1287 *ptrnp = bp;
1288 *plenp = t - bp;
1289 return (0);
1293 * re_error --
1294 * Report a regular expression error.
1296 * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1298 void
1299 re_error(SCR *sp, int errcode, regex_t *preg)
1301 size_t sz;
1302 char *oe;
1304 sz = regerror(errcode, preg, NULL, 0);
1305 if ((oe = malloc(sz)) == NULL)
1306 msgq(sp, M_SYSERR, NULL);
1307 else {
1308 (void)regerror(errcode, preg, oe, sz);
1309 msgq(sp, M_ERR, "RE error: %s", oe);
1310 free(oe);
1315 * re_sub --
1316 * Do the substitution for a regular expression.
1318 static int
1319 re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t *match)
1321 /* Input line. */
1326 enum { C_NOT_SET, C_LOWER, C_ONE_LOWER, C_ONE_UPPER, C_UPPER } conv;
1327 size_t lbclen, lblen; /* Local copies. */
1328 size_t mlen; /* Match length. */
1329 size_t rpl; /* Remaining replacement length. */
1330 CHAR_T *rp; /* Replacement pointer. */
1331 int ch;
1332 int no; /* Match replacement offset. */
1333 CHAR_T *p, *t; /* Buffer pointers. */
1334 CHAR_T *lb; /* Local copies. */
1336 lb = *lbp; /* Get local copies. */
1337 lbclen = *lbclenp;
1338 lblen = *lblenp;
1341 * QUOTING NOTE:
1343 * There are some special sequences that vi provides in the
1344 * replacement patterns.
1345 * & string the RE matched (\& if nomagic set)
1346 * \# n-th regular subexpression
1347 * \E end \U, \L conversion
1348 * \e end \U, \L conversion
1349 * \l convert the next character to lower-case
1350 * \L convert to lower-case, until \E, \e, or end of replacement
1351 * \u convert the next character to upper-case
1352 * \U convert to upper-case, until \E, \e, or end of replacement
1354 * Otherwise, since this is the lowest level of replacement, discard
1355 * all escaping characters. This (hopefully) matches historic practice.
1357 #define OUTCH(ch, nltrans) { \
1358 CHAR_T __ch = (ch); \
1359 u_int __value = KEY_VAL(sp, __ch); \
1360 if (nltrans && (__value == K_CR || __value == K_NL)) { \
1361 NEEDNEWLINE(sp); \
1362 sp->newl[sp->newl_cnt++] = lbclen; \
1363 } else if (conv != C_NOT_SET) { \
1364 switch (conv) { \
1365 case C_ONE_LOWER: \
1366 conv = C_NOT_SET; \
1367 /* FALLTHROUGH */ \
1368 case C_LOWER: \
1369 if (ISUPPER(__ch)) \
1370 __ch = TOLOWER(__ch); \
1371 break; \
1372 case C_ONE_UPPER: \
1373 conv = C_NOT_SET; \
1374 /* FALLTHROUGH */ \
1375 case C_UPPER: \
1376 if (ISLOWER(__ch)) \
1377 __ch = TOUPPER(__ch); \
1378 break; \
1379 default: \
1380 abort(); \
1383 NEEDSP(sp, 1, p); \
1384 *p++ = __ch; \
1385 ++lbclen; \
1387 conv = C_NOT_SET;
1388 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1389 switch (ch = *rp++) {
1390 case '&':
1391 if (O_ISSET(sp, O_MAGIC)) {
1392 no = 0;
1393 goto subzero;
1395 break;
1396 case '\\':
1397 if (rpl == 0)
1398 break;
1399 --rpl;
1400 switch (ch = *rp) {
1401 case '&':
1402 ++rp;
1403 if (!O_ISSET(sp, O_MAGIC)) {
1404 no = 0;
1405 goto subzero;
1407 break;
1408 case '0': case '1': case '2': case '3': case '4':
1409 case '5': case '6': case '7': case '8': case '9':
1410 no = *rp++ - '0';
1411 subzero: if (match[no].rm_so == -1 ||
1412 match[no].rm_eo == -1)
1413 break;
1414 mlen = match[no].rm_eo - match[no].rm_so;
1415 for (t = ip + match[no].rm_so; mlen--; ++t)
1416 OUTCH(*t, 0);
1417 continue;
1418 case 'e':
1419 case 'E':
1420 ++rp;
1421 conv = C_NOT_SET;
1422 continue;
1423 case 'l':
1424 ++rp;
1425 conv = C_ONE_LOWER;
1426 continue;
1427 case 'L':
1428 ++rp;
1429 conv = C_LOWER;
1430 continue;
1431 case 'u':
1432 ++rp;
1433 conv = C_ONE_UPPER;
1434 continue;
1435 case 'U':
1436 ++rp;
1437 conv = C_UPPER;
1438 continue;
1439 default:
1440 ++rp;
1441 break;
1444 OUTCH(ch, 1);
1447 *lbp = lb; /* Update caller's information. */
1448 *lbclenp = lbclen;
1449 *lblenp = lblen;
1450 return (0);