Fix UTIME_OMIT handling
[dragonfly.git] / contrib / nvi2 / ex / ex_subst.c
blobd32d583f6a3b0bdf91a3d2edbe6526d882d65f71
1 /*-
2 * Copyright (c) 1992, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
7 * See the LICENSE file for redistribution information.
8 */
10 #include "config.h"
12 #include <sys/types.h>
13 #include <sys/queue.h>
14 #include <sys/time.h>
16 #include <bitstring.h>
17 #include <ctype.h>
18 #include <errno.h>
19 #include <limits.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
25 #include "../common/common.h"
26 #include "../vi/vi.h"
28 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
29 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
31 static int re_conv(SCR *, CHAR_T **, size_t *, int *);
32 static int re_cscope_conv(SCR *, CHAR_T **, size_t *, int *);
33 static int re_sub(SCR *,
34 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]);
35 static int re_tag_conv(SCR *, CHAR_T **, size_t *, int *);
36 static int s(SCR *, EXCMD *, CHAR_T *, regex_t *, u_int);
39 * ex_s --
40 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
42 * Substitute on lines matching a pattern.
44 * PUBLIC: int ex_s(SCR *, EXCMD *);
46 int
47 ex_s(SCR *sp, EXCMD *cmdp)
49 regex_t *re;
50 size_t blen, len;
51 u_int flags;
52 int delim;
53 CHAR_T *bp, *p, *ptrn, *rep, *t;
56 * Skip leading white space.
58 * !!!
59 * Historic vi allowed any non-alphanumeric to serve as the
60 * substitution command delimiter.
62 * !!!
63 * If the arguments are empty, it's the same as &, i.e. we
64 * repeat the last substitution.
66 if (cmdp->argc == 0)
67 goto subagain;
68 for (p = cmdp->argv[0]->bp,
69 len = cmdp->argv[0]->len; len > 0; --len, ++p) {
70 if (!cmdskip(*p))
71 break;
73 if (len == 0)
74 subagain: return (ex_subagain(sp, cmdp));
76 delim = *p++;
77 if (is09azAZ(delim) || delim == '\\')
78 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
81 * !!!
82 * The full-blown substitute command reset the remembered
83 * state of the 'c' and 'g' suffices.
85 sp->c_suffix = sp->g_suffix = 0;
88 * Get the pattern string, toss escaping characters.
90 * !!!
91 * Historic vi accepted any of the following forms:
93 * :s/abc/def/ change "abc" to "def"
94 * :s/abc/def change "abc" to "def"
95 * :s/abc/ delete "abc"
96 * :s/abc delete "abc"
98 * QUOTING NOTE:
100 * Only toss an escaping character if it escapes a delimiter.
101 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
102 * would be nice to be more regular, i.e. for each layer of
103 * escaping a single escaping character is removed, but that's
104 * not how the historic vi worked.
106 for (ptrn = t = p;;) {
107 if (p[0] == '\0' || p[0] == delim) {
108 if (p[0] == delim)
109 ++p;
111 * !!!
112 * Nul terminate the pattern string -- it's passed
113 * to regcomp which doesn't understand anything else.
115 *t = '\0';
116 break;
118 if (p[0] == '\\')
119 if (p[1] == delim)
120 ++p;
121 else if (p[1] == '\\')
122 *t++ = *p++;
123 *t++ = *p++;
127 * If the pattern string is empty, use the last RE (not just the
128 * last substitution RE).
130 if (*ptrn == '\0') {
131 if (sp->re == NULL) {
132 ex_emsg(sp, NULL, EXM_NOPREVRE);
133 return (1);
136 /* Re-compile the RE if necessary. */
137 if (!F_ISSET(sp, SC_RE_SEARCH) &&
138 re_compile(sp, sp->re, sp->re_len,
139 NULL, NULL, &sp->re_c, RE_C_SEARCH))
140 return (1);
141 flags = 0;
142 } else {
144 * !!!
145 * Compile the RE. Historic practice is that substitutes set
146 * the search direction as well as both substitute and search
147 * RE's. We compile the RE twice, as we don't want to bother
148 * ref counting the pattern string and (opaque) structure.
150 if (re_compile(sp, ptrn, t - ptrn, &sp->re,
151 &sp->re_len, &sp->re_c, RE_C_SEARCH))
152 return (1);
153 if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
154 &sp->subre_len, &sp->subre_c, RE_C_SUBST))
155 return (1);
157 flags = SUB_FIRST;
158 sp->searchdir = FORWARD;
160 re = &sp->re_c;
163 * Get the replacement string.
165 * The special character & (\& if O_MAGIC not set) matches the
166 * entire RE. No handling of & is required here, it's done by
167 * re_sub().
169 * The special character ~ (\~ if O_MAGIC not set) inserts the
170 * previous replacement string into this replacement string.
171 * Count ~'s to figure out how much space we need. We could
172 * special case nonexistent last patterns or whether or not
173 * O_MAGIC is set, but it's probably not worth the effort.
175 * QUOTING NOTE:
177 * Only toss an escaping character if it escapes a delimiter or
178 * if O_MAGIC is set and it escapes a tilde.
180 * !!!
181 * If the entire replacement pattern is "%", then use the last
182 * replacement pattern. This semantic was added to vi in System
183 * V and then percolated elsewhere, presumably around the time
184 * that it was added to their version of ed(1).
186 if (p[0] == '\0' || p[0] == delim) {
187 if (p[0] == delim)
188 ++p;
189 free(sp->repl);
190 sp->repl = NULL;
191 sp->repl_len = 0;
192 } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
193 p += p[1] == delim ? 2 : 1;
194 else {
195 for (rep = p, len = 0;
196 p[0] != '\0' && p[0] != delim; ++p, ++len)
197 if (p[0] == '~')
198 len += sp->repl_len;
199 GET_SPACE_RETW(sp, bp, blen, len);
200 for (t = bp, len = 0, p = rep;;) {
201 if (p[0] == '\0' || p[0] == delim) {
202 if (p[0] == delim)
203 ++p;
204 break;
206 if (p[0] == '\\') {
207 if (p[1] == delim)
208 ++p;
209 else if (p[1] == '\\') {
210 *t++ = *p++;
211 ++len;
212 } else if (p[1] == '~') {
213 ++p;
214 if (!O_ISSET(sp, O_MAGIC))
215 goto tilde;
217 } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
218 tilde: ++p;
219 MEMCPY(t, sp->repl, sp->repl_len);
220 t += sp->repl_len;
221 len += sp->repl_len;
222 continue;
224 *t++ = *p++;
225 ++len;
227 if ((sp->repl_len = len) != 0) {
228 free(sp->repl);
229 MALLOC(sp, sp->repl, len * sizeof(CHAR_T));
230 if (sp->repl == NULL) {
231 FREE_SPACEW(sp, bp, blen);
232 return (1);
234 MEMCPY(sp->repl, bp, len);
236 FREE_SPACEW(sp, bp, blen);
238 return (s(sp, cmdp, p, re, flags));
242 * ex_subagain --
243 * [line [,line]] & [cgr] [count] [#lp]]
245 * Substitute using the last substitute RE and replacement pattern.
247 * PUBLIC: int ex_subagain(SCR *, EXCMD *);
250 ex_subagain(SCR *sp, EXCMD *cmdp)
252 if (sp->subre == NULL) {
253 ex_emsg(sp, NULL, EXM_NOPREVRE);
254 return (1);
256 if (!F_ISSET(sp, SC_RE_SUBST) &&
257 re_compile(sp, sp->subre, sp->subre_len,
258 NULL, NULL, &sp->subre_c, RE_C_SUBST))
259 return (1);
260 return (s(sp,
261 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
265 * ex_subtilde --
266 * [line [,line]] ~ [cgr] [count] [#lp]]
268 * Substitute using the last RE and last substitute replacement pattern.
270 * PUBLIC: int ex_subtilde(SCR *, EXCMD *);
273 ex_subtilde(SCR *sp, EXCMD *cmdp)
275 if (sp->re == NULL) {
276 ex_emsg(sp, NULL, EXM_NOPREVRE);
277 return (1);
279 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
280 sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
281 return (1);
282 return (s(sp,
283 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
287 * s --
288 * Do the substitution. This stuff is *really* tricky. There are lots of
289 * special cases, and general nastiness. Don't mess with it unless you're
290 * pretty confident.
292 * The nasty part of the substitution is what happens when the replacement
293 * string contains newlines. It's a bit tricky -- consider the information
294 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
295 * to build a set of newline offsets which we use to break the line up later,
296 * when the replacement is done. Don't change it unless you're *damned*
297 * confident.
299 #define NEEDNEWLINE(sp) { \
300 if (sp->newl_len == sp->newl_cnt) { \
301 sp->newl_len += 25; \
302 REALLOC(sp, sp->newl, size_t *, \
303 sp->newl_len * sizeof(size_t)); \
304 if (sp->newl == NULL) { \
305 sp->newl_len = 0; \
306 return (1); \
311 #define BUILD(sp, l, len) { \
312 if (lbclen + (len) > lblen) { \
313 lblen = p2roundup(MAX(lbclen + (len), 256)); \
314 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
315 if (lb == NULL) { \
316 lbclen = 0; \
317 return (1); \
320 MEMCPY(lb + lbclen, l, len); \
321 lbclen += len; \
324 #define NEEDSP(sp, len, pnt) { \
325 if (lbclen + (len) > lblen) { \
326 lblen = p2roundup(MAX(lbclen + (len), 256)); \
327 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
328 if (lb == NULL) { \
329 lbclen = 0; \
330 return (1); \
332 pnt = lb + lbclen; \
336 static int
337 s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags)
339 EVENT ev;
340 MARK from, to;
341 TEXTH tiq[] = {{ 0 }};
342 recno_t elno, lno, slno;
343 u_long ul;
344 regmatch_t match[10];
345 size_t blen, cnt, last, lbclen, lblen, len, llen;
346 size_t offset, saved_offset, scno;
347 int cflag, lflag, nflag, pflag, rflag;
348 int didsub, do_eol_match, eflags, empty_ok, eval;
349 int linechanged, matched, quit, rval;
350 CHAR_T *bp, *lb;
351 enum nresult nret;
353 NEEDFILE(sp, cmdp);
355 slno = sp->lno;
356 scno = sp->cno;
359 * !!!
360 * Historically, the 'g' and 'c' suffices were always toggled as flags,
361 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
362 * not set, they were initialized to 0 for all substitute commands. If
363 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
364 * specified substitute/replacement patterns (see ex_s()).
366 if (!O_ISSET(sp, O_EDCOMPATIBLE))
367 sp->c_suffix = sp->g_suffix = 0;
370 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
371 * it only displayed the last change. I'd disallow them, but they are
372 * useful in combination with the [v]global commands. In the current
373 * model the problem is combining them with the 'c' flag -- the screen
374 * would have to flip back and forth between the confirm screen and the
375 * ex print screen, which would be pretty awful. We do display all
376 * changes, though, for what that's worth.
378 * !!!
379 * Historic vi was fairly strict about the order of "options", the
380 * count, and "flags". I'm somewhat fuzzy on the difference between
381 * options and flags, anyway, so this is a simpler approach, and we
382 * just take it them in whatever order the user gives them. (The ex
383 * usage statement doesn't reflect this.)
385 cflag = lflag = nflag = pflag = rflag = 0;
386 if (s == NULL)
387 goto noargs;
388 for (lno = OOBLNO; *s != '\0'; ++s)
389 switch (*s) {
390 case ' ':
391 case '\t':
392 continue;
393 case '+':
394 ++cmdp->flagoff;
395 break;
396 case '-':
397 --cmdp->flagoff;
398 break;
399 case '0': case '1': case '2': case '3': case '4':
400 case '5': case '6': case '7': case '8': case '9':
401 if (lno != OOBLNO)
402 goto usage;
403 errno = 0;
404 nret = nget_uslong(&ul, s, &s, 10);
405 lno = ul;
406 if (*s == '\0') /* Loop increment correction. */
407 --s;
408 if (nret != NUM_OK) {
409 if (nret == NUM_OVER)
410 msgq(sp, M_ERR, "153|Count overflow");
411 else if (nret == NUM_UNDER)
412 msgq(sp, M_ERR, "154|Count underflow");
413 else
414 msgq(sp, M_SYSERR, NULL);
415 return (1);
418 * In historic vi, the count was inclusive from the
419 * second address.
421 cmdp->addr1.lno = cmdp->addr2.lno;
422 cmdp->addr2.lno += lno - 1;
423 if (!db_exist(sp, cmdp->addr2.lno) &&
424 db_last(sp, &cmdp->addr2.lno))
425 return (1);
426 break;
427 case '#':
428 nflag = 1;
429 break;
430 case 'c':
431 sp->c_suffix = !sp->c_suffix;
433 /* Ex text structure initialization. */
434 if (F_ISSET(sp, SC_EX))
435 TAILQ_INIT(tiq);
436 break;
437 case 'g':
438 sp->g_suffix = !sp->g_suffix;
439 break;
440 case 'l':
441 lflag = 1;
442 break;
443 case 'p':
444 pflag = 1;
445 break;
446 case 'r':
447 if (LF_ISSET(SUB_FIRST)) {
448 msgq(sp, M_ERR,
449 "155|Regular expression specified; r flag meaningless");
450 return (1);
452 if (!F_ISSET(sp, SC_RE_SEARCH)) {
453 ex_emsg(sp, NULL, EXM_NOPREVRE);
454 return (1);
456 rflag = 1;
457 re = &sp->re_c;
458 break;
459 default:
460 goto usage;
463 if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
464 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
465 return (1);
468 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
469 msgq(sp, M_ERR,
470 "156|The #, l and p flags may not be combined with the c flag in vi mode");
471 return (1);
475 * bp: if interactive, line cache
476 * blen: if interactive, line cache length
477 * lb: build buffer pointer.
478 * lbclen: current length of built buffer.
479 * lblen; length of build buffer.
481 bp = lb = NULL;
482 blen = lbclen = lblen = 0;
484 /* For each line... */
485 lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
486 for (matched = quit = 0,
487 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
489 /* Someone's unhappy, time to stop. */
490 if (INTERRUPTED(sp))
491 break;
493 /* Get the line. */
494 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
495 goto err;
498 * Make a local copy if doing confirmation -- when calling
499 * the confirm routine we're likely to lose the cached copy.
501 if (sp->c_suffix) {
502 if (bp == NULL) {
503 GET_SPACE_RETW(sp, bp, blen, llen);
504 } else
505 ADD_SPACE_RETW(sp, bp, blen, llen);
506 MEMCPY(bp, s, llen);
507 s = bp;
510 /* Start searching from the beginning. */
511 offset = 0;
512 len = llen;
514 /* Reset the build buffer offset. */
515 lbclen = 0;
517 /* Reset empty match flag. */
518 empty_ok = 1;
521 * We don't want to have to do a setline if the line didn't
522 * change -- keep track of whether or not this line changed.
523 * If doing confirmations, don't want to keep setting the
524 * line if change is refused -- keep track of substitutions.
526 didsub = linechanged = 0;
528 /* New line, do an EOL match. */
529 do_eol_match = 1;
531 /* It's not nul terminated, but we pretend it is. */
532 eflags = REG_STARTEND;
535 * The search area is from s + offset to the EOL.
537 * Generally, match[0].rm_so is the offset of the start
538 * of the match from the start of the search, and offset
539 * is the offset of the start of the last search.
541 nextmatch: match[0].rm_so = 0;
542 match[0].rm_eo = len;
544 /* Get the next match. */
545 eval = regexec(re, s + offset, 10, match, eflags);
548 * There wasn't a match or if there was an error, deal with
549 * it. If there was a previous match in this line, resolve
550 * the changes into the database. Otherwise, just move on.
552 if (eval == REG_NOMATCH)
553 goto endmatch;
554 if (eval != 0) {
555 re_error(sp, eval, re);
556 goto err;
558 matched = 1;
560 /* Only the first search can match an anchored expression. */
561 eflags |= REG_NOTBOL;
564 * !!!
565 * It's possible to match 0-length strings -- for example, the
566 * command s;a*;X;, when matched against the string "aabb" will
567 * result in "XbXbX", i.e. the matches are "aa", the space
568 * between the b's and the space between the b's and the end of
569 * the string. There is a similar space between the beginning
570 * of the string and the a's. The rule that we use (because vi
571 * historically used it) is that any 0-length match, occurring
572 * immediately after a match, is ignored. Otherwise, the above
573 * example would have resulted in "XXbXbX". Another example is
574 * incorrectly using " *" to replace groups of spaces with one
575 * space.
577 * The way we do this is that if we just had a successful match,
578 * the starting offset does not skip characters, and the match
579 * is empty, ignore the match and move forward. If there's no
580 * more characters in the string, we were attempting to match
581 * after the last character, so quit.
583 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
584 empty_ok = 1;
585 if (len == 0)
586 goto endmatch;
587 BUILD(sp, s + offset, 1)
588 ++offset;
589 --len;
590 goto nextmatch;
593 /* Confirm change. */
594 if (sp->c_suffix) {
596 * Set the cursor position for confirmation. Note,
597 * if we matched on a '$', the cursor may be past
598 * the end of line.
600 from.lno = to.lno = lno;
601 from.cno = match[0].rm_so + offset;
602 to.cno = match[0].rm_eo + offset;
604 * Both ex and vi have to correct for a change before
605 * the first character in the line.
607 if (llen == 0)
608 from.cno = to.cno = 0;
609 if (F_ISSET(sp, SC_VI)) {
611 * Only vi has to correct for a change after
612 * the last character in the line.
614 * XXX
615 * It would be nice to change the vi code so
616 * that we could display a cursor past EOL.
618 if (to.cno >= llen)
619 to.cno = llen - 1;
620 if (from.cno >= llen)
621 from.cno = llen - 1;
623 sp->lno = from.lno;
624 sp->cno = from.cno;
625 if (vs_refresh(sp, 1))
626 goto err;
628 vs_update(sp, msg_cat(sp,
629 "169|Confirm change? [n]", NULL), NULL);
631 if (v_event_get(sp, &ev, 0, 0))
632 goto err;
633 switch (ev.e_event) {
634 case E_CHARACTER:
635 break;
636 case E_EOF:
637 case E_ERR:
638 case E_INTERRUPT:
639 goto lquit;
640 default:
641 v_event_err(sp, &ev);
642 goto lquit;
644 } else {
645 if (ex_print(sp, cmdp, &from, &to, 0) ||
646 ex_scprint(sp, &from, &to))
647 goto lquit;
648 if (ex_txt(sp, tiq, 0, TXT_CR))
649 goto err;
650 ev.e_c = TAILQ_FIRST(tiq)->lb[0];
653 switch (ev.e_c) {
654 case CH_YES:
655 break;
656 default:
657 case CH_NO:
658 didsub = 0;
659 BUILD(sp, s +offset, match[0].rm_eo);
660 goto skip;
661 case CH_QUIT:
662 /* Set the quit/interrupted flags. */
663 lquit: quit = 1;
664 F_SET(sp->gp, G_INTERRUPTED);
667 * Resolve any changes, then return to (and
668 * exit from) the main loop.
670 goto endmatch;
675 * Set the cursor to the last position changed, converting
676 * from 1-based to 0-based.
678 sp->lno = lno;
679 sp->cno = match[0].rm_so;
681 /* Copy the bytes before the match into the build buffer. */
682 BUILD(sp, s + offset, match[0].rm_so);
684 /* Substitute the matching bytes. */
685 didsub = 1;
686 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
687 goto err;
689 /* Set the change flag so we know this line was modified. */
690 linechanged = 1;
692 /* Move past the matched bytes. */
693 skip: offset += match[0].rm_eo;
694 len -= match[0].rm_eo;
696 /* A match cannot be followed by an empty pattern. */
697 empty_ok = 0;
700 * If doing a global change with confirmation, we have to
701 * update the screen. The basic idea is to store the line
702 * so the screen update routines can find it, and restart.
704 if (didsub && sp->c_suffix && sp->g_suffix) {
706 * The new search offset will be the end of the
707 * modified line.
709 saved_offset = lbclen;
711 /* Copy the rest of the line. */
712 if (len)
713 BUILD(sp, s + offset, len)
715 /* Set the new offset. */
716 offset = saved_offset;
718 /* Store inserted lines, adjusting the build buffer. */
719 last = 0;
720 if (sp->newl_cnt) {
721 for (cnt = 0;
722 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
723 if (db_insert(sp, lno,
724 lb + last, sp->newl[cnt] - last))
725 goto err;
726 last = sp->newl[cnt] + 1;
727 ++sp->rptlines[L_ADDED];
729 lbclen -= last;
730 offset -= last;
731 sp->newl_cnt = 0;
734 /* Store and retrieve the line. */
735 if (db_set(sp, lno, lb + last, lbclen))
736 goto err;
737 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
738 goto err;
739 ADD_SPACE_RETW(sp, bp, blen, llen)
740 MEMCPY(bp, s, llen);
741 s = bp;
742 len = llen - offset;
744 /* Restart the build. */
745 lbclen = 0;
746 BUILD(sp, s, offset);
749 * If we haven't already done the after-the-string
750 * match, do one. Set REG_NOTEOL so the '$' pattern
751 * only matches once.
753 if (!do_eol_match)
754 goto endmatch;
755 if (offset == len) {
756 do_eol_match = 0;
757 eflags |= REG_NOTEOL;
759 goto nextmatch;
763 * If it's a global:
765 * If at the end of the string, do a test for the after
766 * the string match. Set REG_NOTEOL so the '$' pattern
767 * only matches once.
769 if (sp->g_suffix && do_eol_match) {
770 if (len == 0) {
771 do_eol_match = 0;
772 eflags |= REG_NOTEOL;
774 goto nextmatch;
777 endmatch: if (!linechanged)
778 continue;
780 /* Copy any remaining bytes into the build buffer. */
781 if (len)
782 BUILD(sp, s + offset, len)
784 /* Store inserted lines, adjusting the build buffer. */
785 last = 0;
786 if (sp->newl_cnt) {
787 for (cnt = 0;
788 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
789 if (db_insert(sp,
790 lno, lb + last, sp->newl[cnt] - last))
791 goto err;
792 last = sp->newl[cnt] + 1;
793 ++sp->rptlines[L_ADDED];
795 lbclen -= last;
796 sp->newl_cnt = 0;
799 /* Store the changed line. */
800 if (db_set(sp, lno, lb + last, lbclen))
801 goto err;
803 /* Update changed line counter. */
804 if (sp->rptlchange != lno) {
805 sp->rptlchange = lno;
806 ++sp->rptlines[L_CHANGED];
810 * !!!
811 * Display as necessary. Historic practice is to only
812 * display the last line of a line split into multiple
813 * lines.
815 if (lflag || nflag || pflag) {
816 from.lno = to.lno = lno;
817 from.cno = to.cno = 0;
818 if (lflag)
819 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
820 if (nflag)
821 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
822 if (pflag)
823 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
828 * !!!
829 * Historically, vi attempted to leave the cursor at the same place if
830 * the substitution was done at the current cursor position. Otherwise
831 * it moved it to the first non-blank of the last line changed. There
832 * were some problems: for example, :s/$/foo/ with the cursor on the
833 * last character of the line left the cursor on the last character, or
834 * the & command with multiple occurrences of the matching string in the
835 * line usually left the cursor in a fairly random position.
837 * We try to do the same thing, with the exception that if the user is
838 * doing substitution with confirmation, we move to the last line about
839 * which the user was consulted, as opposed to the last line that they
840 * actually changed. This prevents a screen flash if the user doesn't
841 * change many of the possible lines.
843 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
844 sp->cno = 0;
845 (void)nonblank(sp, sp->lno, &sp->cno);
849 * If not in a global command, and nothing matched, say so.
850 * Else, if none of the lines displayed, put something up.
852 rval = 0;
853 if (!matched) {
854 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
855 msgq(sp, M_ERR, "157|No match found");
856 goto err;
858 } else if (!lflag && !nflag && !pflag)
859 F_SET(cmdp, E_AUTOPRINT);
861 if (0) {
862 err: rval = 1;
865 if (bp != NULL)
866 FREE_SPACEW(sp, bp, blen);
867 free(lb);
868 return (rval);
872 * re_compile --
873 * Compile the RE.
875 * PUBLIC: int re_compile(SCR *,
876 * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int);
879 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
881 size_t len;
882 int reflags, replaced, rval;
883 CHAR_T *p;
885 /* Set RE flags. */
886 reflags = 0;
887 if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
888 if (O_ISSET(sp, O_EXTENDED))
889 reflags |= REG_EXTENDED;
890 if (O_ISSET(sp, O_IGNORECASE))
891 reflags |= REG_ICASE;
892 if (O_ISSET(sp, O_ICLOWER)) {
893 for (p = ptrn, len = plen; len > 0; ++p, --len)
894 if (ISUPPER(*p))
895 break;
896 if (len == 0)
897 reflags |= REG_ICASE;
901 /* If we're replacing a saved value, clear the old one. */
902 if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
903 regfree(&sp->re_c);
904 F_CLR(sp, SC_RE_SEARCH);
906 if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
907 regfree(&sp->subre_c);
908 F_CLR(sp, SC_RE_SUBST);
912 * If we're saving the string, it's a pattern we haven't seen before,
913 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
914 * later recompilation. Free any previously saved value.
916 if (ptrnp != NULL) {
917 replaced = 0;
918 if (LF_ISSET(RE_C_CSCOPE)) {
919 if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
920 return (1);
922 * XXX
923 * Currently, the match-any-<blank> expression used in
924 * re_cscope_conv() requires extended RE's. This may
925 * not be right or safe.
927 reflags |= REG_EXTENDED;
928 } else if (LF_ISSET(RE_C_TAG)) {
929 if (re_tag_conv(sp, &ptrn, &plen, &replaced))
930 return (1);
931 } else
932 if (re_conv(sp, &ptrn, &plen, &replaced))
933 return (1);
935 /* Discard previous pattern. */
936 free(*ptrnp);
937 *ptrnp = NULL;
939 if (lenp != NULL)
940 *lenp = plen;
943 * Copy the string into allocated memory.
945 * XXX
946 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
947 * for now. There's just no other solution.
949 MALLOC(sp, *ptrnp, (plen + 1) * sizeof(CHAR_T));
950 if (*ptrnp != NULL) {
951 MEMCPY(*ptrnp, ptrn, plen);
952 (*ptrnp)[plen] = '\0';
955 /* Free up conversion-routine-allocated memory. */
956 if (replaced)
957 FREE_SPACEW(sp, ptrn, 0);
959 if (*ptrnp == NULL)
960 return (1);
962 ptrn = *ptrnp;
966 * XXX
967 * Regcomp isn't 8-bit clean, so we just lost if the pattern
968 * contained a nul. Bummer!
970 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
971 if (!LF_ISSET(RE_C_SILENT))
972 re_error(sp, rval, rep);
973 return (1);
976 if (LF_ISSET(RE_C_SEARCH))
977 F_SET(sp, SC_RE_SEARCH);
978 if (LF_ISSET(RE_C_SUBST))
979 F_SET(sp, SC_RE_SUBST);
981 return (0);
985 * re_conv --
986 * Convert vi's regular expressions into something that the
987 * the POSIX 1003.2 RE functions can handle.
989 * There are three conversions we make to make vi's RE's (specifically
990 * the global, search, and substitute patterns) work with POSIX RE's.
992 * 1: If O_MAGIC is not set, strip backslashes from the magic character
993 * set (.[*~) that have them, and add them to the ones that don't.
994 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
995 * from the last substitute command's replacement string. If O_MAGIC
996 * is set, it's the string "~".
997 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
998 * new RE escapes.
1000 * !!!/XXX
1001 * This doesn't exactly match the historic behavior of vi because we do
1002 * the ~ substitution before calling the RE engine, so magic characters
1003 * in the replacement string will be expanded by the RE engine, and they
1004 * weren't historically. It's a bug.
1006 static int
1007 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1009 size_t blen, len, needlen;
1010 int magic;
1011 CHAR_T *bp, *p, *t;
1014 * First pass through, we figure out how much space we'll need.
1015 * We do it in two passes, on the grounds that most of the time
1016 * the user is doing a search and won't have magic characters.
1017 * That way we can skip most of the memory allocation and copies.
1019 magic = 0;
1020 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1021 switch (*p) {
1022 case '\\':
1023 if (len > 1) {
1024 --len;
1025 switch (*++p) {
1026 case '<':
1027 magic = 1;
1028 needlen += RE_WSTART_LEN + 1;
1029 break;
1030 case '>':
1031 magic = 1;
1032 needlen += RE_WSTOP_LEN + 1;
1033 break;
1034 case '~':
1035 if (!O_ISSET(sp, O_MAGIC)) {
1036 magic = 1;
1037 needlen += sp->repl_len;
1039 break;
1040 case '.':
1041 case '[':
1042 case '*':
1043 if (!O_ISSET(sp, O_MAGIC)) {
1044 magic = 1;
1045 needlen += 1;
1047 break;
1048 default:
1049 needlen += 2;
1051 } else
1052 needlen += 1;
1053 break;
1054 case '~':
1055 if (O_ISSET(sp, O_MAGIC)) {
1056 magic = 1;
1057 needlen += sp->repl_len;
1059 break;
1060 case '.':
1061 case '[':
1062 case '*':
1063 if (!O_ISSET(sp, O_MAGIC)) {
1064 magic = 1;
1065 needlen += 2;
1067 break;
1068 default:
1069 needlen += 1;
1070 break;
1073 if (!magic) {
1074 *replacedp = 0;
1075 return (0);
1078 /* Get enough memory to hold the final pattern. */
1079 *replacedp = 1;
1080 GET_SPACE_RETW(sp, bp, blen, needlen);
1082 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1083 switch (*p) {
1084 case '\\':
1085 if (len > 1) {
1086 --len;
1087 switch (*++p) {
1088 case '<':
1089 MEMCPY(t,
1090 RE_WSTART, RE_WSTART_LEN);
1091 t += RE_WSTART_LEN;
1092 break;
1093 case '>':
1094 MEMCPY(t,
1095 RE_WSTOP, RE_WSTOP_LEN);
1096 t += RE_WSTOP_LEN;
1097 break;
1098 case '~':
1099 if (O_ISSET(sp, O_MAGIC))
1100 *t++ = '~';
1101 else {
1102 MEMCPY(t,
1103 sp->repl, sp->repl_len);
1104 t += sp->repl_len;
1106 break;
1107 case '.':
1108 case '[':
1109 case '*':
1110 if (O_ISSET(sp, O_MAGIC))
1111 *t++ = '\\';
1112 *t++ = *p;
1113 break;
1114 default:
1115 *t++ = '\\';
1116 *t++ = *p;
1118 } else
1119 *t++ = '\\';
1120 break;
1121 case '~':
1122 if (O_ISSET(sp, O_MAGIC)) {
1123 MEMCPY(t, sp->repl, sp->repl_len);
1124 t += sp->repl_len;
1125 } else
1126 *t++ = '~';
1127 break;
1128 case '.':
1129 case '[':
1130 case '*':
1131 if (!O_ISSET(sp, O_MAGIC))
1132 *t++ = '\\';
1133 *t++ = *p;
1134 break;
1135 default:
1136 *t++ = *p;
1137 break;
1140 *ptrnp = bp;
1141 *plenp = t - bp;
1142 return (0);
1146 * re_tag_conv --
1147 * Convert a tags search path into something that the POSIX
1148 * 1003.2 RE functions can handle.
1150 static int
1151 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1153 size_t blen, len;
1154 int lastdollar;
1155 CHAR_T *bp, *p, *t;
1157 len = *plenp;
1159 /* Max memory usage is 2 times the length of the string. */
1160 *replacedp = 1;
1161 GET_SPACE_RETW(sp, bp, blen, len * 2);
1163 p = *ptrnp;
1164 t = bp;
1166 /* If the last character is a '/' or '?', we just strip it. */
1167 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1168 --len;
1170 /* If the next-to-last or last character is a '$', it's magic. */
1171 if (len > 0 && p[len - 1] == '$') {
1172 --len;
1173 lastdollar = 1;
1174 } else
1175 lastdollar = 0;
1177 /* If the first character is a '/' or '?', we just strip it. */
1178 if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1179 ++p;
1180 --len;
1183 /* If the first or second character is a '^', it's magic. */
1184 if (p[0] == '^') {
1185 *t++ = *p++;
1186 --len;
1190 * Escape every other magic character we can find, meanwhile stripping
1191 * the backslashes ctags inserts when escaping the search delimiter
1192 * characters.
1194 for (; len > 0; --len) {
1195 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1196 ++p;
1197 --len;
1198 } else if (STRCHR(L("^.[]$*"), p[0]))
1199 *t++ = '\\';
1200 *t++ = *p++;
1202 if (lastdollar)
1203 *t++ = '$';
1205 *ptrnp = bp;
1206 *plenp = t - bp;
1207 return (0);
1211 * re_cscope_conv --
1212 * Convert a cscope search path into something that the POSIX
1213 * 1003.2 RE functions can handle.
1215 static int
1216 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1218 size_t blen, len, nspaces;
1219 CHAR_T *bp, *t;
1220 CHAR_T *p;
1221 CHAR_T *wp;
1222 size_t wlen;
1225 * Each space in the source line printed by cscope represents an
1226 * arbitrary sequence of spaces, tabs, and comments.
1228 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1229 #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
1230 CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1231 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1232 if (*p == ' ')
1233 ++nspaces;
1236 * Allocate plenty of space:
1237 * the string, plus potential escaping characters;
1238 * nspaces + 2 copies of CSCOPE_RE_SPACE;
1239 * ^, $, nul terminator characters.
1241 *replacedp = 1;
1242 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1243 GET_SPACE_RETW(sp, bp, blen, len);
1245 p = *ptrnp;
1246 t = bp;
1248 *t++ = '^';
1249 MEMCPY(t, wp, wlen);
1250 t += wlen;
1252 for (len = *plenp; len > 0; ++p, --len)
1253 if (*p == ' ') {
1254 MEMCPY(t, wp, wlen);
1255 t += wlen;
1256 } else {
1257 if (STRCHR(L("\\^.[]$*+?()|{}"), *p))
1258 *t++ = '\\';
1259 *t++ = *p;
1262 MEMCPY(t, wp, wlen);
1263 t += wlen;
1264 *t++ = '$';
1266 *ptrnp = bp;
1267 *plenp = t - bp;
1268 return (0);
1272 * re_error --
1273 * Report a regular expression error.
1275 * PUBLIC: void re_error(SCR *, int, regex_t *);
1277 void
1278 re_error(SCR *sp, int errcode, regex_t *preg)
1280 size_t s;
1281 char *oe;
1283 s = regerror(errcode, preg, "", 0);
1284 MALLOC(sp, oe, s);
1285 if (oe != NULL) {
1286 (void)regerror(errcode, preg, oe, s);
1287 msgq(sp, M_ERR, "RE error: %s", oe);
1288 free(oe);
1293 * re_sub --
1294 * Do the substitution for a regular expression.
1296 static int
1297 re_sub(
1298 SCR *sp,
1299 CHAR_T *ip, /* Input line. */
1300 CHAR_T **lbp,
1301 size_t *lbclenp,
1302 size_t *lblenp,
1303 regmatch_t match[10])
1305 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1306 size_t lbclen, lblen; /* Local copies. */
1307 size_t mlen; /* Match length. */
1308 size_t rpl; /* Remaining replacement length. */
1309 CHAR_T *rp; /* Replacement pointer. */
1310 int ch;
1311 int no; /* Match replacement offset. */
1312 CHAR_T *p, *t; /* Buffer pointers. */
1313 CHAR_T *lb; /* Local copies. */
1315 lb = *lbp; /* Get local copies. */
1316 lbclen = *lbclenp;
1317 lblen = *lblenp;
1320 * QUOTING NOTE:
1322 * There are some special sequences that vi provides in the
1323 * replacement patterns.
1324 * & string the RE matched (\& if nomagic set)
1325 * \# n-th regular subexpression
1326 * \E end \U, \L conversion
1327 * \e end \U, \L conversion
1328 * \l convert the next character to lower-case
1329 * \L convert to lower-case, until \E, \e, or end of replacement
1330 * \u convert the next character to upper-case
1331 * \U convert to upper-case, until \E, \e, or end of replacement
1333 * Otherwise, since this is the lowest level of replacement, discard
1334 * all escaping characters. This (hopefully) matches historic practice.
1336 #define OUTCH(ch, nltrans) { \
1337 ARG_CHAR_T __ch = (ch); \
1338 e_key_t __value = KEY_VAL(sp, __ch); \
1339 if (nltrans && (__value == K_CR || __value == K_NL)) { \
1340 NEEDNEWLINE(sp); \
1341 sp->newl[sp->newl_cnt++] = lbclen; \
1342 } else if (conv != C_NOTSET) { \
1343 switch (conv) { \
1344 case C_ONELOWER: \
1345 conv = C_NOTSET; \
1346 /* FALLTHROUGH */ \
1347 case C_LOWER: \
1348 if (ISUPPER(__ch)) \
1349 __ch = TOLOWER(__ch); \
1350 break; \
1351 case C_ONEUPPER: \
1352 conv = C_NOTSET; \
1353 /* FALLTHROUGH */ \
1354 case C_UPPER: \
1355 if (ISLOWER(__ch)) \
1356 __ch = TOUPPER(__ch); \
1357 break; \
1358 default: \
1359 abort(); \
1362 NEEDSP(sp, 1, p); \
1363 *p++ = __ch; \
1364 ++lbclen; \
1366 conv = C_NOTSET;
1367 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1368 switch (ch = *rp++) {
1369 case '&':
1370 if (O_ISSET(sp, O_MAGIC)) {
1371 no = 0;
1372 goto subzero;
1374 break;
1375 case '\\':
1376 if (rpl == 0)
1377 break;
1378 --rpl;
1379 switch (ch = *rp) {
1380 case '&':
1381 ++rp;
1382 if (!O_ISSET(sp, O_MAGIC)) {
1383 no = 0;
1384 goto subzero;
1386 break;
1387 case '0': case '1': case '2': case '3': case '4':
1388 case '5': case '6': case '7': case '8': case '9':
1389 no = *rp++ - '0';
1390 subzero: if (match[no].rm_so == -1 ||
1391 match[no].rm_eo == -1)
1392 break;
1393 mlen = match[no].rm_eo - match[no].rm_so;
1394 for (t = ip + match[no].rm_so; mlen--; ++t)
1395 OUTCH(*t, 0);
1396 continue;
1397 case 'e':
1398 case 'E':
1399 ++rp;
1400 conv = C_NOTSET;
1401 continue;
1402 case 'l':
1403 ++rp;
1404 conv = C_ONELOWER;
1405 continue;
1406 case 'L':
1407 ++rp;
1408 conv = C_LOWER;
1409 continue;
1410 case 'u':
1411 ++rp;
1412 conv = C_ONEUPPER;
1413 continue;
1414 case 'U':
1415 ++rp;
1416 conv = C_UPPER;
1417 continue;
1418 case '\r':
1419 OUTCH(ch, 0);
1420 continue;
1421 default:
1422 ++rp;
1423 break;
1426 OUTCH(ch, 1);
1429 *lbp = lb; /* Update caller's information. */
1430 *lbclenp = lbclen;
1431 *lblenp = lblen;
1432 return (0);