ex/ex_subst.c

   1 /*-
   2  * Copyright (c) 1992, 1993, 1994
   3  *      The Regents of the University of California.  All rights reserved.
   4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
   5  *      Keith Bostic.  All rights reserved.
   6  *
   7  * See the LICENSE file for redistribution information.
   8  */
   9
  10 #include "config.h"
  11
  12 #ifndef lint
  13 static const char sccsid[] = "$Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp $ (Berkeley) $Date: 2002/02/09 21:18:23 $";
  14 #endif /* not lint */
  15
  16 #include <sys/types.h>
  17 #include <sys/queue.h>
  18 #include <sys/time.h>
  19
  20 #include <bitstring.h>
  21 #include <ctype.h>
  22 #include <errno.h>
  23 #include <limits.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <unistd.h>
  28
  29 #include "../common/common.h"
  30 #include "../vi/vi.h"
  31
  32 #define SUB_FIRST       0x01            /* The 'r' flag isn't reasonable. */
  33 #define SUB_MUSTSETR    0x02            /* The 'r' flag is required. */
  34
  35 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
  36 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
  37 static int re_sub __P((SCR *,
  38                 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
  39 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
  40 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
  41
  42 /*
  43  * ex_s --
  44  *      [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
  45  *
  46  *      Substitute on lines matching a pattern.
  47  *
  48  * PUBLIC: int ex_s __P((SCR *, EXCMD *));
  49  */
  50 int
  51 ex_s(SCR *sp, EXCMD *cmdp)
  52 {
  53         regex_t *re;
  54         size_t blen, len;
  55         u_int flags;
  56         int delim;
  57         CHAR_T *bp, *p, *ptrn, *rep, *t;
  58
  59         /*
  60          * Skip leading white space.
  61          *
  62          * !!!
  63          * Historic vi allowed any non-alphanumeric to serve as the
  64          * substitution command delimiter.
  65          *
  66          * !!!
  67          * If the arguments are empty, it's the same as &, i.e. we
  68          * repeat the last substitution.
  69          */
  70         if (cmdp->argc == 0)
  71                 goto subagain;
  72         for (p = cmdp->argv[0]->bp,
  73             len = cmdp->argv[0]->len; len > 0; --len, ++p) {
  74                 if (!isblank(*p))
  75                         break;
  76         }
  77         if (len == 0)
  78 subagain:       return (ex_subagain(sp, cmdp));
  79
  80         delim = *p++;
  81         if (isalnum(delim) || delim == '\\')
  82                 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
  83
  84         /*
  85          * !!!
  86          * The full-blown substitute command reset the remembered
  87          * state of the 'c' and 'g' suffices.
  88          */
  89         sp->c_suffix = sp->g_suffix = 0;
  90
  91         /*
  92          * Get the pattern string, toss escaping characters.
  93          *
  94          * !!!
  95          * Historic vi accepted any of the following forms:
  96          *
  97          *      :s/abc/def/             change "abc" to "def"
  98          *      :s/abc/def              change "abc" to "def"
  99          *      :s/abc/                 delete "abc"
 100          *      :s/abc                  delete "abc"
 101          *
 102          * QUOTING NOTE:
 103          *
 104          * Only toss an escaping character if it escapes a delimiter.
 105          * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
 106          * would be nice to be more regular, i.e. for each layer of
 107          * escaping a single escaping character is removed, but that's
 108          * not how the historic vi worked.
 109          */
 110         for (ptrn = t = p;;) {
 111                 if (p[0] == '\0' || p[0] == delim) {
 112                         if (p[0] == delim)
 113                                 ++p;
 114                         /*
 115                          * !!!
 116                          * Nul terminate the pattern string -- it's passed
 117                          * to regcomp which doesn't understand anything else.
 118                          */
 119                         *t = '\0';
 120                         break;
 121                 }
 122                 if (p[0] == '\\')
 123                         if (p[1] == delim)
 124                                 ++p;
 125                         else if (p[1] == '\\')
 126                                 *t++ = *p++;
 127                 *t++ = *p++;
 128         }
 129
 130         /*
 131          * If the pattern string is empty, use the last RE (not just the
 132          * last substitution RE).
 133          */
 134         if (*ptrn == '\0') {
 135                 if (sp->re == NULL) {
 136                         ex_emsg(sp, NULL, EXM_NOPREVRE);
 137                         return (1);
 138                 }
 139
 140                 /* Re-compile the RE if necessary. */
 141                 if (!F_ISSET(sp, SC_RE_SEARCH) &&
 142                     re_compile(sp, sp->re, sp->re_len,
 143                     NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 144                         return (1);
 145                 flags = 0;
 146         } else {
 147                 /*
 148                  * !!!
 149                  * Compile the RE.  Historic practice is that substitutes set
 150                  * the search direction as well as both substitute and search
 151                  * RE's.  We compile the RE twice, as we don't want to bother
 152                  * ref counting the pattern string and (opaque) structure.
 153                  */
 154                 if (re_compile(sp, ptrn, t - ptrn, &sp->re,
 155                     &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 156                         return (1);
 157                 if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
 158                     &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
 159                         return (1);
 160
 161                 flags = SUB_FIRST;
 162                 sp->searchdir = FORWARD;
 163         }
 164         re = &sp->re_c;
 165
 166         /*
 167          * Get the replacement string.
 168          *
 169          * The special character & (\& if O_MAGIC not set) matches the
 170          * entire RE.  No handling of & is required here, it's done by
 171          * re_sub().
 172          *
 173          * The special character ~ (\~ if O_MAGIC not set) inserts the
 174          * previous replacement string into this replacement string.
 175          * Count ~'s to figure out how much space we need.  We could
 176          * special case nonexistent last patterns or whether or not
 177          * O_MAGIC is set, but it's probably not worth the effort.
 178          *
 179          * QUOTING NOTE:
 180          *
 181          * Only toss an escaping character if it escapes a delimiter or
 182          * if O_MAGIC is set and it escapes a tilde.
 183          *
 184          * !!!
 185          * If the entire replacement pattern is "%", then use the last
 186          * replacement pattern.  This semantic was added to vi in System
 187          * V and then percolated elsewhere, presumably around the time
 188          * that it was added to their version of ed(1).
 189          */
 190         if (p[0] == L('\0') || p[0] == delim) {
 191                 if (p[0] == delim)
 192                         ++p;
 193                 if (sp->repl != NULL)
 194                         free(sp->repl);
 195                 sp->repl = NULL;
 196                 sp->repl_len = 0;
 197         } else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
 198                 p += p[1] == delim ? 2 : 1;
 199         else {
 200                 for (rep = p, len = 0;
 201                     p[0] != L('\0') && p[0] != delim; ++p, ++len)
 202                         if (p[0] == L('~'))
 203                                 len += sp->repl_len;
 204                 GET_SPACE_RETW(sp, bp, blen, len);
 205                 for (t = bp, len = 0, p = rep;;) {
 206                         if (p[0] == L('\0') || p[0] == delim) {
 207                                 if (p[0] == delim)
 208                                         ++p;
 209                                 break;
 210                         }
 211                         if (p[0] == L('\\')) {
 212                                 if (p[1] == delim)
 213                                         ++p;
 214                                 else if (p[1] == L('\\')) {
 215                                         *t++ = *p++;
 216                                         ++len;
 217                                 } else if (p[1] == L('~')) {
 218                                         ++p;
 219                                         if (!O_ISSET(sp, O_MAGIC))
 220                                                 goto tilde;
 221                                 }
 222                         } else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
 223 tilde:                          ++p;
 224                                 MEMCPYW(t, sp->repl, sp->repl_len);
 225                                 t += sp->repl_len;
 226                                 len += sp->repl_len;
 227                                 continue;
 228                         }
 229                         *t++ = *p++;
 230                         ++len;
 231                 }
 232                 if ((sp->repl_len = len) != 0) {
 233                         if (sp->repl != NULL)
 234                                 free(sp->repl);
 235                         if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
 236                                 msgq(sp, M_SYSERR, NULL);
 237                                 FREE_SPACEW(sp, bp, blen);
 238                                 return (1);
 239                         }
 240                         MEMCPYW(sp->repl, bp, len);
 241                 }
 242                 FREE_SPACEW(sp, bp, blen);
 243         }
 244         return (s(sp, cmdp, p, re, flags));
 245 }
 246
 247 /*
 248  * ex_subagain --
 249  *      [line [,line]] & [cgr] [count] [#lp]]
 250  *
 251  *      Substitute using the last substitute RE and replacement pattern.
 252  *
 253  * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
 254  */
 255 int
 256 ex_subagain(SCR *sp, EXCMD *cmdp)
 257 {
 258         if (sp->subre == NULL) {
 259                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 260                 return (1);
 261         }
 262         if (!F_ISSET(sp, SC_RE_SUBST) &&
 263             re_compile(sp, sp->subre, sp->subre_len,
 264             NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
 265                 return (1);
 266         return (s(sp,
 267             cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
 268 }
 269
 270 /*
 271  * ex_subtilde --
 272  *      [line [,line]] ~ [cgr] [count] [#lp]]
 273  *
 274  *      Substitute using the last RE and last substitute replacement pattern.
 275  *
 276  * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
 277  */
 278 int
 279 ex_subtilde(SCR *sp, EXCMD *cmdp)
 280 {
 281         if (sp->re == NULL) {
 282                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 283                 return (1);
 284         }
 285         if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
 286             sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 287                 return (1);
 288         return (s(sp,
 289             cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
 290 }
 291
 292 /*
 293  * s --
 294  * Do the substitution.  This stuff is *really* tricky.  There are lots of
 295  * special cases, and general nastiness.  Don't mess with it unless you're
 296  * pretty confident.
 297  *
 298  * The nasty part of the substitution is what happens when the replacement
 299  * string contains newlines.  It's a bit tricky -- consider the information
 300  * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 301  * to build a set of newline offsets which we use to break the line up later,
 302  * when the replacement is done.  Don't change it unless you're *damned*
 303  * confident.
 304  */
 305 #define NEEDNEWLINE(sp) {                                               \
 306         if (sp->newl_len == sp->newl_cnt) {                             \
 307                 sp->newl_len += 25;                                     \
 308                 REALLOC(sp, sp->newl, size_t *,                         \
 309                     sp->newl_len * sizeof(size_t));                     \
 310                 if (sp->newl == NULL) {                                 \
 311                         sp->newl_len = 0;                               \
 312                         return (1);                                     \
 313                 }                                                       \
 314         }                                                               \
 315 }
 316
 317 #define BUILD(sp, l, len) {                                             \
 318         if (lbclen + (len) > lblen) {                                   \
 319                 lblen += MAX(lbclen + (len), 256);                      \
 320                 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));      \
 321                 if (lb == NULL) {                                       \
 322                         lbclen = 0;                                     \
 323                         return (1);                                     \
 324                 }                                                       \
 325         }                                                               \
 326         MEMCPYW(lb + lbclen, l, len);                                   \
 327         lbclen += len;                                                  \
 328 }
 329
 330 #define NEEDSP(sp, len, pnt) {                                          \
 331         if (lbclen + (len) > lblen) {                                   \
 332                 lblen += MAX(lbclen + (len), 256);                      \
 333                 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));      \
 334                 if (lb == NULL) {                                       \
 335                         lbclen = 0;                                     \
 336                         return (1);                                     \
 337                 }                                                       \
 338                 pnt = lb + lbclen;                                      \
 339         }                                                               \
 340 }
 341
 342 static int
 343 s(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags)
 344 {
 345         EVENT ev;
 346         MARK from, to;
 347         TEXTH tiq;
 348         db_recno_t elno, lno, slno;
 349         u_long ul;
 350         regmatch_t match[10];
 351         size_t blen, cnt, last, lbclen, lblen, len, llen;
 352         size_t offset, saved_offset, scno;
 353         int cflag, lflag, nflag, pflag, rflag;
 354         int didsub, do_eol_match, eflags, empty_ok, eval;
 355         int linechanged, matched, quit, rval;
 356         CHAR_T *p, *lb, *bp;
 357         enum nresult nret;
 358
 359         NEEDFILE(sp, cmdp);
 360
 361         slno = sp->lno;
 362         scno = sp->cno;
 363
 364         /*
 365          * !!!
 366          * Historically, the 'g' and 'c' suffices were always toggled as flags,
 367          * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
 368          * not set, they were initialized to 0 for all substitute commands.  If
 369          * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
 370          * specified substitute/replacement patterns (see ex_s()).
 371          */
 372         if (!O_ISSET(sp, O_EDCOMPATIBLE))
 373                 sp->c_suffix = sp->g_suffix = 0;
 374
 375         /*
 376          * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
 377          * it only displayed the last change.  I'd disallow them, but they are
 378          * useful in combination with the [v]global commands.  In the current
 379          * model the problem is combining them with the 'c' flag -- the screen
 380          * would have to flip back and forth between the confirm screen and the
 381          * ex print screen, which would be pretty awful.  We do display all
 382          * changes, though, for what that's worth.
 383          *
 384          * !!!
 385          * Historic vi was fairly strict about the order of "options", the
 386          * count, and "flags".  I'm somewhat fuzzy on the difference between
 387          * options and flags, anyway, so this is a simpler approach, and we
 388          * just take it them in whatever order the user gives them.  (The ex
 389          * usage statement doesn't reflect this.)
 390          */
 391         cflag = lflag = nflag = pflag = rflag = 0;
 392         if (s == NULL)
 393                 goto noargs;
 394         for (lno = OOBLNO; *s != '\0'; ++s)
 395                 switch (*s) {
 396                 case ' ':
 397                 case '\t':
 398                         continue;
 399                 case '+':
 400                         ++cmdp->flagoff;
 401                         break;
 402                 case '-':
 403                         --cmdp->flagoff;
 404                         break;
 405                 case '0': case '1': case '2': case '3': case '4':
 406                 case '5': case '6': case '7': case '8': case '9':
 407                         if (lno != OOBLNO)
 408                                 goto usage;
 409                         errno = 0;
 410                         nret = nget_uslong(sp, &ul, s, &s, 10);
 411                         lno = ul;
 412                         if (*s == '\0')         /* Loop increment correction. */
 413                                 --s;
 414                         if (nret != NUM_OK) {
 415                                 if (nret == NUM_OVER)
 416                                         msgq(sp, M_ERR, "153|Count overflow");
 417                                 else if (nret == NUM_UNDER)
 418                                         msgq(sp, M_ERR, "154|Count underflow");
 419                                 else
 420                                         msgq(sp, M_SYSERR, NULL);
 421                                 return (1);
 422                         }
 423                         /*
 424                          * In historic vi, the count was inclusive from the
 425                          * second address.
 426                          */
 427                         cmdp->addr1.lno = cmdp->addr2.lno;
 428                         cmdp->addr2.lno += lno - 1;
 429                         if (!db_exist(sp, cmdp->addr2.lno) &&
 430                             db_last(sp, &cmdp->addr2.lno))
 431                                 return (1);
 432                         break;
 433                 case '#':
 434                         nflag = 1;
 435                         break;
 436                 case 'c':
 437                         sp->c_suffix = !sp->c_suffix;
 438
 439                         /* Ex text structure initialization. */
 440                         if (F_ISSET(sp, SC_EX)) {
 441                                 memset(&tiq, 0, sizeof(TEXTH));
 442                                 CIRCLEQ_INIT(&tiq);
 443                         }
 444                         break;
 445                 case 'g':
 446                         sp->g_suffix = !sp->g_suffix;
 447                         break;
 448                 case 'l':
 449                         lflag = 1;
 450                         break;
 451                 case 'p':
 452                         pflag = 1;
 453                         break;
 454                 case 'r':
 455                         if (LF_ISSET(SUB_FIRST)) {
 456                                 msgq(sp, M_ERR,
 457                     "155|Regular expression specified; r flag meaningless");
 458                                 return (1);
 459                         }
 460                         if (!F_ISSET(sp, SC_RE_SEARCH)) {
 461                                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 462                                 return (1);
 463                         }
 464                         rflag = 1;
 465                         re = &sp->re_c;
 466                         break;
 467                 default:
 468                         goto usage;
 469                 }
 470
 471         if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
 472 usage:          ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
 473                 return (1);
 474         }
 475
 476 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
 477                 msgq(sp, M_ERR,
 478 "156|The #, l and p flags may not be combined with the c flag in vi mode");
 479                 return (1);
 480         }
 481
 482         /*
 483          * bp:          if interactive, line cache
 484          * blen:        if interactive, line cache length
 485          * lb:          build buffer pointer.
 486          * lbclen:      current length of built buffer.
 487          * lblen;       length of build buffer.
 488          */
 489         bp = lb = NULL;
 490         blen = lbclen = lblen = 0;
 491
 492         /* For each line... */
 493         lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
 494         for (matched = quit = 0,
 495             elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
 496
 497                 /* Someone's unhappy, time to stop. */
 498                 if (INTERRUPTED(sp))
 499                         break;
 500
 501                 /* Get the line. */
 502                 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
 503                         goto err;
 504
 505                 /*
 506                  * Make a local copy if doing confirmation -- when calling
 507                  * the confirm routine we're likely to lose the cached copy.
 508                  */
 509                 if (sp->c_suffix) {
 510                         if (bp == NULL) {
 511                                 GET_SPACE_RETW(sp, bp, blen, llen);
 512                         } else
 513                                 ADD_SPACE_RETW(sp, bp, blen, llen);
 514                         MEMCPYW(bp, s, llen);
 515                         s = bp;
 516                 }
 517
 518                 /* Start searching from the beginning. */
 519                 offset = 0;
 520                 len = llen;
 521
 522                 /* Reset the build buffer offset. */
 523                 lbclen = 0;
 524
 525                 /* Reset empty match flag. */
 526                 empty_ok = 1;
 527
 528                 /*
 529                  * We don't want to have to do a setline if the line didn't
 530                  * change -- keep track of whether or not this line changed.
 531                  * If doing confirmations, don't want to keep setting the
 532                  * line if change is refused -- keep track of substitutions.
 533                  */
 534                 didsub = linechanged = 0;
 535
 536                 /* New line, do an EOL match. */
 537                 do_eol_match = 1;
 538
 539                 /* It's not nul terminated, but we pretend it is. */
 540                 eflags = REG_STARTEND;
 541
 542                 /*
 543                  * The search area is from s + offset to the EOL.
 544                  *
 545                  * Generally, match[0].rm_so is the offset of the start
 546                  * of the match from the start of the search, and offset
 547                  * is the offset of the start of the last search.
 548                  */
 549 nextmatch:      match[0].rm_so = 0;
 550                 match[0].rm_eo = len;
 551
 552                 /* Get the next match. */
 553                 eval = regexec(re, s + offset, 10, match, eflags);
 554
 555                 /*
 556                  * There wasn't a match or if there was an error, deal with
 557                  * it.  If there was a previous match in this line, resolve
 558                  * the changes into the database.  Otherwise, just move on.
 559                  */
 560                 if (eval == REG_NOMATCH)
 561                         goto endmatch;
 562                 if (eval != 0) {
 563                         re_error(sp, eval, re);
 564                         goto err;
 565                 }
 566                 matched = 1;
 567
 568                 /* Only the first search can match an anchored expression. */
 569                 eflags |= REG_NOTBOL;
 570
 571                 /*
 572                  * !!!
 573                  * It's possible to match 0-length strings -- for example, the
 574                  * command s;a*;X;, when matched against the string "aabb" will
 575                  * result in "XbXbX", i.e. the matches are "aa", the space
 576                  * between the b's and the space between the b's and the end of
 577                  * the string.  There is a similar space between the beginning
 578                  * of the string and the a's.  The rule that we use (because vi
 579                  * historically used it) is that any 0-length match, occurring
 580                  * immediately after a match, is ignored.  Otherwise, the above
 581                  * example would have resulted in "XXbXbX".  Another example is
 582                  * incorrectly using " *" to replace groups of spaces with one
 583                  * space.
 584                  *
 585                  * The way we do this is that if we just had a successful match,
 586                  * the starting offset does not skip characters, and the match
 587                  * is empty, ignore the match and move forward.  If there's no
 588                  * more characters in the string, we were attempting to match
 589                  * after the last character, so quit.
 590                  */
 591                 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
 592                         empty_ok = 1;
 593                         if (len == 0)
 594                                 goto endmatch;
 595                         BUILD(sp, s + offset, 1)
 596                         ++offset;
 597                         --len;
 598                         goto nextmatch;
 599                 }
 600
 601                 /* Confirm change. */
 602                 if (sp->c_suffix) {
 603                         /*
 604                          * Set the cursor position for confirmation.  Note,
 605                          * if we matched on a '$', the cursor may be past
 606                          * the end of line.
 607                          */
 608                         from.lno = to.lno = lno;
 609                         from.cno = match[0].rm_so + offset;
 610                         to.cno = match[0].rm_eo + offset;
 611                         /*
 612                          * Both ex and vi have to correct for a change before
 613                          * the first character in the line.
 614                          */
 615                         if (llen == 0)
 616                                 from.cno = to.cno = 0;
 617                         if (F_ISSET(sp, SC_VI)) {
 618                                 /*
 619                                  * Only vi has to correct for a change after
 620                                  * the last character in the line.
 621                                  *
 622                                  * XXX
 623                                  * It would be nice to change the vi code so
 624                                  * that we could display a cursor past EOL.
 625                                  */
 626                                 if (to.cno >= llen)
 627                                         to.cno = llen - 1;
 628                                 if (from.cno >= llen)
 629                                         from.cno = llen - 1;
 630
 631                                 sp->lno = from.lno;
 632                                 sp->cno = from.cno;
 633                                 if (vs_refresh(sp, 1))
 634                                         goto err;
 635
 636                                 vs_update(sp, msg_cat(sp,
 637                                     "169|Confirm change? [n]", NULL), NULL);
 638
 639                                 if (v_event_get(sp, &ev, 0, 0))
 640                                         goto err;
 641                                 switch (ev.e_event) {
 642                                 case E_CHARACTER:
 643                                         break;
 644                                 case E_EOF:
 645                                 case E_ERR:
 646                                 case E_INTERRUPT:
 647                                         goto lquit;
 648                                 default:
 649                                         v_event_err(sp, &ev);
 650                                         goto lquit;
 651                                 }
 652                         } else {
 653                                 if (ex_print(sp, cmdp, &from, &to, 0) ||
 654                                     ex_scprint(sp, &from, &to))
 655                                         goto lquit;
 656                                 if (ex_txt(sp, &tiq, 0, TXT_CR))
 657                                         goto err;
 658                                 ev.e_c = tiq.cqh_first->lb[0];
 659                         }
 660
 661                         switch (ev.e_c) {
 662                         case CH_YES:
 663                                 break;
 664                         default:
 665                         case CH_NO:
 666                                 didsub = 0;
 667                                 BUILD(sp, s +offset, match[0].rm_eo);
 668                                 goto skip;
 669                         case CH_QUIT:
 670                                 /* Set the quit/interrupted flags. */
 671 lquit:                          quit = 1;
 672                                 F_SET(sp->gp, G_INTERRUPTED);
 673
 674                                 /*
 675                                  * Resolve any changes, then return to (and
 676                                  * exit from) the main loop.
 677                                  */
 678                                 goto endmatch;
 679                         }
 680                 }
 681
 682                 /*
 683                  * Set the cursor to the last position changed, converting
 684                  * from 1-based to 0-based.
 685                  */
 686                 sp->lno = lno;
 687                 sp->cno = match[0].rm_so;
 688
 689                 /* Copy the bytes before the match into the build buffer. */
 690                 BUILD(sp, s + offset, match[0].rm_so);
 691
 692                 /* Substitute the matching bytes. */
 693                 didsub = 1;
 694                 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
 695                         goto err;
 696
 697                 /* Set the change flag so we know this line was modified. */
 698                 linechanged = 1;
 699
 700                 /* Move past the matched bytes. */
 701 skip:           offset += match[0].rm_eo;
 702                 len -= match[0].rm_eo;
 703
 704                 /* A match cannot be followed by an empty pattern. */
 705                 empty_ok = 0;
 706
 707                 /*
 708                  * If doing a global change with confirmation, we have to
 709                  * update the screen.  The basic idea is to store the line
 710                  * so the screen update routines can find it, and restart.
 711                  */
 712                 if (didsub && sp->c_suffix && sp->g_suffix) {
 713                         /*
 714                          * The new search offset will be the end of the
 715                          * modified line.
 716                          */
 717                         saved_offset = lbclen;
 718
 719                         /* Copy the rest of the line. */
 720                         if (len)
 721                                 BUILD(sp, s + offset, len)
 722
 723                         /* Set the new offset. */
 724                         offset = saved_offset;
 725
 726                         /* Store inserted lines, adjusting the build buffer. */
 727                         last = 0;
 728                         if (sp->newl_cnt) {
 729                                 for (cnt = 0;
 730                                     cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
 731                                         if (db_insert(sp, lno,
 732                                             lb + last, sp->newl[cnt] - last))
 733                                                 goto err;
 734                                         last = sp->newl[cnt] + 1;
 735                                         ++sp->rptlines[L_ADDED];
 736                                 }
 737                                 lbclen -= last;
 738                                 offset -= last;
 739                                 sp->newl_cnt = 0;
 740                         }
 741
 742                         /* Store and retrieve the line. */
 743                         if (db_set(sp, lno, lb + last, lbclen))
 744                                 goto err;
 745                         if (db_get(sp, lno, DBG_FATAL, &s, &llen))
 746                                 goto err;
 747                         ADD_SPACE_RETW(sp, bp, blen, llen)
 748                         MEMCPYW(bp, s, llen);
 749                         s = bp;
 750                         len = llen - offset;
 751
 752                         /* Restart the build. */
 753                         lbclen = 0;
 754                         BUILD(sp, s, offset);
 755
 756                         /*
 757                          * If we haven't already done the after-the-string
 758                          * match, do one.  Set REG_NOTEOL so the '$' pattern
 759                          * only matches once.
 760                          */
 761                         if (!do_eol_match)
 762                                 goto endmatch;
 763                         if (offset == len) {
 764                                 do_eol_match = 0;
 765                                 eflags |= REG_NOTEOL;
 766                         }
 767                         goto nextmatch;
 768                 }
 769
 770                 /*
 771                  * If it's a global:
 772                  *
 773                  * If at the end of the string, do a test for the after
 774                  * the string match.  Set REG_NOTEOL so the '$' pattern
 775                  * only matches once.
 776                  */
 777                 if (sp->g_suffix && do_eol_match) {
 778                         if (len == 0) {
 779                                 do_eol_match = 0;
 780                                 eflags |= REG_NOTEOL;
 781                         }
 782                         goto nextmatch;
 783                 }
 784
 785 endmatch:       if (!linechanged)
 786                         continue;
 787
 788                 /* Copy any remaining bytes into the build buffer. */
 789                 if (len)
 790                         BUILD(sp, s + offset, len)
 791
 792                 /* Store inserted lines, adjusting the build buffer. */
 793                 last = 0;
 794                 if (sp->newl_cnt) {
 795                         for (cnt = 0;
 796                             cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
 797                                 if (db_insert(sp,
 798                                     lno, lb + last, sp->newl[cnt] - last))
 799                                         goto err;
 800                                 last = sp->newl[cnt] + 1;
 801                                 ++sp->rptlines[L_ADDED];
 802                         }
 803                         lbclen -= last;
 804                         sp->newl_cnt = 0;
 805                 }
 806
 807                 /* Store the changed line. */
 808                 if (db_set(sp, lno, lb + last, lbclen))
 809                         goto err;
 810
 811                 /* Update changed line counter. */
 812                 if (sp->rptlchange != lno) {
 813                         sp->rptlchange = lno;
 814                         ++sp->rptlines[L_CHANGED];
 815                 }
 816
 817                 /*
 818                  * !!!
 819                  * Display as necessary.  Historic practice is to only
 820                  * display the last line of a line split into multiple
 821                  * lines.
 822                  */
 823                 if (lflag || nflag || pflag) {
 824                         from.lno = to.lno = lno;
 825                         from.cno = to.cno = 0;
 826                         if (lflag)
 827                                 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
 828                         if (nflag)
 829                                 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
 830                         if (pflag)
 831                                 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
 832                 }
 833         }
 834
 835         /*
 836          * !!!
 837          * Historically, vi attempted to leave the cursor at the same place if
 838          * the substitution was done at the current cursor position.  Otherwise
 839          * it moved it to the first non-blank of the last line changed.  There
 840          * were some problems: for example, :s/$/foo/ with the cursor on the
 841          * last character of the line left the cursor on the last character, or
 842          * the & command with multiple occurrences of the matching string in the
 843          * line usually left the cursor in a fairly random position.
 844          *
 845          * We try to do the same thing, with the exception that if the user is
 846          * doing substitution with confirmation, we move to the last line about
 847          * which the user was consulted, as opposed to the last line that they
 848          * actually changed.  This prevents a screen flash if the user doesn't
 849          * change many of the possible lines.
 850          */
 851         if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
 852                 sp->cno = 0;
 853                 (void)nonblank(sp, sp->lno, &sp->cno);
 854         }
 855
 856         /*
 857          * If not in a global command, and nothing matched, say so.
 858          * Else, if none of the lines displayed, put something up.
 859          */
 860         rval = 0;
 861         if (!matched) {
 862                 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
 863                         msgq(sp, M_ERR, "157|No match found");
 864                         goto err;
 865                 }
 866         } else if (!lflag && !nflag && !pflag)
 867                 F_SET(cmdp, E_AUTOPRINT);
 868
 869         if (0) {
 870 err:            rval = 1;
 871         }
 872
 873         if (bp != NULL)
 874                 FREE_SPACEW(sp, bp, blen);
 875         if (lb != NULL)
 876                 free(lb);
 877         return (rval);
 878 }
 879
 880 /*
 881  * re_compile --
 882  *      Compile the RE.
 883  *
 884  * PUBLIC: int re_compile __P((SCR *,
 885  * PUBLIC:     CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
 886  */
 887 int
 888 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
 889 {
 890         size_t len;
 891         int reflags, replaced, rval;
 892         CHAR_T *p;
 893
 894         /* Set RE flags. */
 895         reflags = 0;
 896         if (LF_ISSET(SEARCH_EXTEND))
 897                 reflags |= REG_EXTENDED;
 898         if (LF_ISSET(SEARCH_IC))
 899                 reflags |= REG_ICASE;
 900         if (LF_ISSET(SEARCH_LITERAL))
 901                 reflags |= REG_NOSPEC;
 902         if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
 903                 if (O_ISSET(sp, O_EXTENDED))
 904                         reflags |= REG_EXTENDED;
 905                 if (O_ISSET(sp, O_IGNORECASE))
 906                         reflags |= REG_ICASE;
 907                 if (O_ISSET(sp, O_ICLOWER))
 908                         goto iclower;
 909         }
 910         if (LF_ISSET(SEARCH_ICL)) {
 911 iclower:        for (p = ptrn, len = plen; len > 0; ++p, --len)
 912                         if (isupper(*p))
 913                                 break;
 914                 if (len == 0)
 915                         reflags |= REG_ICASE;
 916         }
 917
 918         /* If we're replacing a saved value, clear the old one. */
 919         if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
 920                 regfree(&sp->re_c);
 921                 F_CLR(sp, SC_RE_SEARCH);
 922         }
 923         if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
 924                 regfree(&sp->subre_c);
 925                 F_CLR(sp, SC_RE_SUBST);
 926         }
 927
 928         /*
 929          * If we're saving the string, it's a pattern we haven't seen before,
 930          * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
 931          * later recompilation.   Free any previously saved value.
 932          */
 933         if (ptrnp != NULL) {
 934                 replaced = 0;
 935                 if (LF_ISSET(SEARCH_CSCOPE)) {
 936                         if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
 937                                 return (1);
 938                         /*
 939                          * XXX
 940                          * Currently, the match-any-<blank> expression used in
 941                          * re_cscope_conv() requires extended RE's.  This may
 942                          * not be right or safe.
 943                          */
 944                         reflags |= REG_EXTENDED;
 945                 } else if (LF_ISSET(SEARCH_TAG)) {
 946                         if (re_tag_conv(sp, &ptrn, &plen, &replaced))
 947                                 return (1);
 948                 } else if (!LF_ISSET(SEARCH_LITERAL))
 949                         if (re_conv(sp, &ptrn, &plen, &replaced))
 950                                 return (1);
 951
 952                 /* Discard previous pattern. */
 953                 if (*ptrnp != NULL) {
 954                         free(*ptrnp);
 955                         *ptrnp = NULL;
 956                 }
 957                 if (lenp != NULL)
 958                         *lenp = plen;
 959
 960                 /*
 961                  * Copy the string into allocated memory.
 962                  *
 963                  * XXX
 964                  * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
 965                  * for now.  There's just no other solution.
 966                  */
 967                 MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
 968                 if (*ptrnp != NULL) {
 969                         MEMCPYW(*ptrnp, ptrn, plen);
 970                         (*ptrnp)[plen] = '\0';
 971                 }
 972
 973                 /* Free up conversion-routine-allocated memory. */
 974                 if (replaced)
 975                         FREE_SPACEW(sp, ptrn, 0);
 976
 977                 if (*ptrnp == NULL)
 978                         return (1);
 979
 980                 ptrn = *ptrnp;
 981         }
 982
 983         /*
 984          * XXX
 985          * Regcomp isn't 8-bit clean, so we just lost if the pattern
 986          * contained a nul.  Bummer!
 987          */
 988         if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
 989                 if (LF_ISSET(SEARCH_MSG))
 990                         re_error(sp, rval, rep);
 991                 return (1);
 992         }
 993
 994         if (LF_ISSET(SEARCH_CSEARCH))
 995                 F_SET(sp, SC_RE_SEARCH);
 996         if (LF_ISSET(SEARCH_CSUBST))
 997                 F_SET(sp, SC_RE_SUBST);
 998
 999         return (0);
1000 }
1001
1002 /*
1003  * re_conv --
1004  *      Convert vi's regular expressions into something that the
1005  *      the POSIX 1003.2 RE functions can handle.
1006  *
1007  * There are three conversions we make to make vi's RE's (specifically
1008  * the global, search, and substitute patterns) work with POSIX RE's.
1009  *
1010  * 1: If O_MAGIC is not set, strip backslashes from the magic character
1011  *    set (.[*~) that have them, and add them to the ones that don't.
1012  * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1013  *    from the last substitute command's replacement string.  If O_MAGIC
1014  *    is set, it's the string "~".
1015  * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1016  *    new RE escapes.
1017  *
1018  * !!!/XXX
1019  * This doesn't exactly match the historic behavior of vi because we do
1020  * the ~ substitution before calling the RE engine, so magic characters
1021  * in the replacement string will be expanded by the RE engine, and they
1022  * weren't historically.  It's a bug.
1023  */
1024 static int
1025 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1026 {
1027         size_t blen, len, needlen;
1028         int magic;
1029         CHAR_T *bp, *p, *t;
1030
1031         /*
1032          * First pass through, we figure out how much space we'll need.
1033          * We do it in two passes, on the grounds that most of the time
1034          * the user is doing a search and won't have magic characters.
1035          * That way we can skip most of the memory allocation and copies.
1036          */
1037         magic = 0;
1038         for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1039                 switch (*p) {
1040                 case '\\':
1041                         if (len > 1) {
1042                                 --len;
1043                                 switch (*++p) {
1044                                 case '<':
1045                                         magic = 1;
1046                                         needlen += RE_WSTART_LEN + 1;
1047                                         break;
1048                                 case '>':
1049                                         magic = 1;
1050                                         needlen += RE_WSTOP_LEN + 1;
1051                                         break;
1052                                 case '~':
1053                                         if (!O_ISSET(sp, O_MAGIC)) {
1054                                                 magic = 1;
1055                                                 needlen += sp->repl_len;
1056                                         }
1057                                         break;
1058                                 case '.':
1059                                 case '[':
1060                                 case '*':
1061                                         if (!O_ISSET(sp, O_MAGIC)) {
1062                                                 magic = 1;
1063                                                 needlen += 1;
1064                                         }
1065                                         break;
1066                                 default:
1067                                         needlen += 2;
1068                                 }
1069                         } else
1070                                 needlen += 1;
1071                         break;
1072                 case '~':
1073                         if (O_ISSET(sp, O_MAGIC)) {
1074                                 magic = 1;
1075                                 needlen += sp->repl_len;
1076                         }
1077                         break;
1078                 case '.':
1079                 case '[':
1080                 case '*':
1081                         if (!O_ISSET(sp, O_MAGIC)) {
1082                                 magic = 1;
1083                                 needlen += 2;
1084                         }
1085                         break;
1086                 default:
1087                         needlen += 1;
1088                         break;
1089                 }
1090
1091         if (!magic) {
1092                 *replacedp = 0;
1093                 return (0);
1094         }
1095
1096         /* Get enough memory to hold the final pattern. */
1097         *replacedp = 1;
1098         GET_SPACE_RETW(sp, bp, blen, needlen);
1099
1100         for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1101                 switch (*p) {
1102                 case '\\':
1103                         if (len > 1) {
1104                                 --len;
1105                                 switch (*++p) {
1106                                 case '<':
1107                                         MEMCPY(t,
1108                                             RE_WSTART, RE_WSTART_LEN);
1109                                         t += RE_WSTART_LEN;
1110                                         break;
1111                                 case '>':
1112                                         MEMCPY(t,
1113                                             RE_WSTOP, RE_WSTOP_LEN);
1114                                         t += RE_WSTOP_LEN;
1115                                         break;
1116                                 case '~':
1117                                         if (O_ISSET(sp, O_MAGIC))
1118                                                 *t++ = '~';
1119                                         else {
1120                                                 MEMCPYW(t,
1121                                                     sp->repl, sp->repl_len);
1122                                                 t += sp->repl_len;
1123                                         }
1124                                         break;
1125                                 case '.':
1126                                 case '[':
1127                                 case '*':
1128                                         if (O_ISSET(sp, O_MAGIC))
1129                                                 *t++ = '\\';
1130                                         *t++ = *p;
1131                                         break;
1132                                 default:
1133                                         *t++ = '\\';
1134                                         *t++ = *p;
1135                                 }
1136                         } else
1137                                 *t++ = '\\';
1138                         break;
1139                 case '~':
1140                         if (O_ISSET(sp, O_MAGIC)) {
1141                                 MEMCPYW(t, sp->repl, sp->repl_len);
1142                                 t += sp->repl_len;
1143                         } else
1144                                 *t++ = '~';
1145                         break;
1146                 case '.':
1147                 case '[':
1148                 case '*':
1149                         if (!O_ISSET(sp, O_MAGIC))
1150                                 *t++ = '\\';
1151                         *t++ = *p;
1152                         break;
1153                 default:
1154                         *t++ = *p;
1155                         break;
1156                 }
1157
1158         *ptrnp = bp;
1159         *plenp = t - bp;
1160         return (0);
1161 }
1162
1163 /*
1164  * re_tag_conv --
1165  *      Convert a tags search path into something that the POSIX
1166  *      1003.2 RE functions can handle.
1167  */
1168 static int
1169 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1170 {
1171         size_t blen, len;
1172         int lastdollar;
1173         CHAR_T *bp, *p, *t;
1174
1175         len = *plenp;
1176
1177         /* Max memory usage is 2 times the length of the string. */
1178         *replacedp = 1;
1179         GET_SPACE_RETW(sp, bp, blen, len * 2);
1180
1181         p = *ptrnp;
1182         t = bp;
1183
1184         /* If the last character is a '/' or '?', we just strip it. */
1185         if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1186                 --len;
1187
1188         /* If the next-to-last or last character is a '$', it's magic. */
1189         if (len > 0 && p[len - 1] == '$') {
1190                 --len;
1191                 lastdollar = 1;
1192         } else
1193                 lastdollar = 0;
1194
1195         /* If the first character is a '/' or '?', we just strip it. */
1196         if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1197                 ++p;
1198                 --len;
1199         }
1200
1201         /* If the first or second character is a '^', it's magic. */
1202         if (p[0] == '^') {
1203                 *t++ = *p++;
1204                 --len;
1205         }
1206
1207         /*
1208          * Escape every other magic character we can find, meanwhile stripping
1209          * the backslashes ctags inserts when escaping the search delimiter
1210          * characters.
1211          */
1212         for (; len > 0; --len) {
1213                 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1214                         ++p;
1215                         --len;
1216                 } else if (strchr("^.[]$*", p[0]))
1217                         *t++ = '\\';
1218                 *t++ = *p++;
1219         }
1220         if (lastdollar)
1221                 *t++ = '$';
1222
1223         *ptrnp = bp;
1224         *plenp = t - bp;
1225         return (0);
1226 }
1227
1228 /*
1229  * re_cscope_conv --
1230  *       Convert a cscope search path into something that the POSIX
1231  *      1003.2 RE functions can handle.
1232  */
1233 static int
1234 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1235 {
1236         size_t blen, len, nspaces;
1237         CHAR_T *bp, *t;
1238         CHAR_T *p;
1239         CHAR_T *wp;
1240         size_t wlen;
1241
1242         /*
1243          * Each space in the source line printed by cscope represents an
1244          * arbitrary sequence of spaces, tabs, and comments.
1245          */
1246 #define CSCOPE_RE_SPACE         "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1247 #define CSCOPE_LEN      sizeof(CSCOPE_RE_SPACE) - 1
1248         CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1249         for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1250                 if (*p == ' ')
1251                         ++nspaces;
1252
1253         /*
1254          * Allocate plenty of space:
1255          *      the string, plus potential escaping characters;
1256          *      nspaces + 2 copies of CSCOPE_RE_SPACE;
1257          *      ^, $, nul terminator characters.
1258          */
1259         *replacedp = 1;
1260         len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1261         GET_SPACE_RETW(sp, bp, blen, len);
1262
1263         p = *ptrnp;
1264         t = bp;
1265
1266         *t++ = '^';
1267         MEMCPYW(t, wp, wlen);
1268         t += wlen;
1269
1270         for (len = *plenp; len > 0; ++p, --len)
1271                 if (*p == ' ') {
1272                         MEMCPYW(t, wp, wlen);
1273                         t += wlen;
1274                 } else {
1275                         if (strchr("\\^.[]$*+?()|{}", *p))
1276                                 *t++ = '\\';
1277                         *t++ = *p;
1278                 }
1279
1280         MEMCPYW(t, wp, wlen);
1281         t += wlen;
1282         *t++ = '$';
1283
1284         *ptrnp = bp;
1285         *plenp = t - bp;
1286         return (0);
1287 }
1288
1289 /*
1290  * re_error --
1291  *      Report a regular expression error.
1292  *
1293  * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1294  */
1295 void
1296 re_error(SCR *sp, int errcode, regex_t *preg)
1297 {
1298         size_t s;
1299         char *oe;
1300
1301         s = regerror(errcode, preg, "", 0);
1302         if ((oe = malloc(s)) == NULL)
1303                 msgq(sp, M_SYSERR, NULL);
1304         else {
1305                 (void)regerror(errcode, preg, oe, s);
1306                 msgq(sp, M_ERR, "RE error: %s", oe);
1307                 free(oe);
1308         }
1309 }
1310
1311 /*
1312  * re_sub --
1313  *      Do the substitution for a regular expression.
1314  */
1315 static int
1316 re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t *match)
1317
1318                                         /* Input line. */
1319
1320
1321
1322 {
1323         enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1324         size_t lbclen, lblen;           /* Local copies. */
1325         size_t mlen;                    /* Match length. */
1326         size_t rpl;                     /* Remaining replacement length. */
1327         CHAR_T *rp;                     /* Replacement pointer. */
1328         int ch;
1329         int no;                         /* Match replacement offset. */
1330         CHAR_T *p, *t;                  /* Buffer pointers. */
1331         CHAR_T *lb;                     /* Local copies. */
1332
1333         lb = *lbp;                      /* Get local copies. */
1334         lbclen = *lbclenp;
1335         lblen = *lblenp;
1336
1337         /*
1338          * QUOTING NOTE:
1339          *
1340          * There are some special sequences that vi provides in the
1341          * replacement patterns.
1342          *       & string the RE matched (\& if nomagic set)
1343          *      \# n-th regular subexpression
1344          *      \E end \U, \L conversion
1345          *      \e end \U, \L conversion
1346          *      \l convert the next character to lower-case
1347          *      \L convert to lower-case, until \E, \e, or end of replacement
1348          *      \u convert the next character to upper-case
1349          *      \U convert to upper-case, until \E, \e, or end of replacement
1350          *
1351          * Otherwise, since this is the lowest level of replacement, discard
1352          * all escaping characters.  This (hopefully) matches historic practice.
1353          */
1354 #define OUTCH(ch, nltrans) {                                            \
1355         CHAR_T __ch = (ch);                                             \
1356         u_int __value = KEY_VAL(sp, __ch);                              \
1357         if (nltrans && (__value == K_CR || __value == K_NL)) {          \
1358                 NEEDNEWLINE(sp);                                        \
1359                 sp->newl[sp->newl_cnt++] = lbclen;                      \
1360         } else if (conv != C_NOTSET) {                                  \
1361                 switch (conv) {                                         \
1362                 case C_ONELOWER:                                        \
1363                         conv = C_NOTSET;                                \
1364                         /* FALLTHROUGH */                               \
1365                 case C_LOWER:                                           \
1366                         if (isupper(__ch))                              \
1367                                 __ch = tolower(__ch);                   \
1368                         break;                                          \
1369                 case C_ONEUPPER:                                        \
1370                         conv = C_NOTSET;                                \
1371                         /* FALLTHROUGH */                               \
1372                 case C_UPPER:                                           \
1373                         if (islower(__ch))                              \
1374                                 __ch = toupper(__ch);                   \
1375                         break;                                          \
1376                 default:                                                \
1377                         abort();                                        \
1378                 }                                                       \
1379         }                                                               \
1380         NEEDSP(sp, 1, p);                                               \
1381         *p++ = __ch;                                                    \
1382         ++lbclen;                                                       \
1383 }
1384         conv = C_NOTSET;
1385         for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1386                 switch (ch = *rp++) {
1387                 case '&':
1388                         if (O_ISSET(sp, O_MAGIC)) {
1389                                 no = 0;
1390                                 goto subzero;
1391                         }
1392                         break;
1393                 case '\\':
1394                         if (rpl == 0)
1395                                 break;
1396                         --rpl;
1397                         switch (ch = *rp) {
1398                         case '&':
1399                                 ++rp;
1400                                 if (!O_ISSET(sp, O_MAGIC)) {
1401                                         no = 0;
1402                                         goto subzero;
1403                                 }
1404                                 break;
1405                         case '0': case '1': case '2': case '3': case '4':
1406                         case '5': case '6': case '7': case '8': case '9':
1407                                 no = *rp++ - '0';
1408 subzero:                        if (match[no].rm_so == -1 ||
1409                                     match[no].rm_eo == -1)
1410                                         break;
1411                                 mlen = match[no].rm_eo - match[no].rm_so;
1412                                 for (t = ip + match[no].rm_so; mlen--; ++t)
1413                                         OUTCH(*t, 0);
1414                                 continue;
1415                         case 'e':
1416                         case 'E':
1417                                 ++rp;
1418                                 conv = C_NOTSET;
1419                                 continue;
1420                         case 'l':
1421                                 ++rp;
1422                                 conv = C_ONELOWER;
1423                                 continue;
1424                         case 'L':
1425                                 ++rp;
1426                                 conv = C_LOWER;
1427                                 continue;
1428                         case 'u':
1429                                 ++rp;
1430                                 conv = C_ONEUPPER;
1431                                 continue;
1432                         case 'U':
1433                                 ++rp;
1434                                 conv = C_UPPER;
1435                                 continue;
1436                         default:
1437                                 ++rp;
1438                                 break;
1439                         }
1440                 }
1441                 OUTCH(ch, 1);
1442         }
1443
1444         *lbp = lb;                      /* Update caller's information. */
1445         *lbclenp = lbclen;
1446         *lblenp = lblen;
1447         return (0);
1448 }