contrib/nvi/ex/ex_subst.c

   1 /*-
   2  * Copyright (c) 1992, 1993, 1994
   3  *      The Regents of the University of California.  All rights reserved.
   4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
   5  *      Keith Bostic.  All rights reserved.
   6  *
   7  * See the LICENSE file for redistribution information.
   8  */
   9
  10 #include "config.h"
  11
  12 #ifndef lint
  13 static const char sccsid[] = "@(#)ex_subst.c    10.37 (Berkeley) 9/15/96";
  14 #endif /* not lint */
  15
  16 #include <sys/types.h>
  17 #include <sys/queue.h>
  18 #include <sys/time.h>
  19
  20 #include <bitstring.h>
  21 #include <ctype.h>
  22 #include <errno.h>
  23 #include <limits.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <unistd.h>
  28
  29 #include "../common/common.h"
  30 #include "../vi/vi.h"
  31
  32 #define SUB_FIRST       0x01            /* The 'r' flag isn't reasonable. */
  33 #define SUB_MUSTSETR    0x02            /* The 'r' flag is required. */
  34
  35 static int re_conv __P((SCR *, char **, size_t *, int *));
  36 static int re_cscope_conv __P((SCR *, char **, size_t *, int *));
  37 static int re_sub __P((SCR *,
  38                 char *, char **, size_t *, size_t *, regmatch_t [10]));
  39 static int re_tag_conv __P((SCR *, char **, size_t *, int *));
  40 static int s __P((SCR *, EXCMD *, char *, regex_t *, u_int));
  41
  42 /*
  43  * ex_s --
  44  *      [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
  45  *
  46  *      Substitute on lines matching a pattern.
  47  *
  48  * PUBLIC: int ex_s __P((SCR *, EXCMD *));
  49  */
  50 int
  51 ex_s(sp, cmdp)
  52         SCR *sp;
  53         EXCMD *cmdp;
  54 {
  55         regex_t *re;
  56         size_t blen, len;
  57         u_int flags;
  58         int delim;
  59         char *bp, *ptrn, *rep, *p, *t;
  60
  61         /*
  62          * Skip leading white space.
  63          *
  64          * !!!
  65          * Historic vi allowed any non-alphanumeric to serve as the
  66          * substitution command delimiter.
  67          *
  68          * !!!
  69          * If the arguments are empty, it's the same as &, i.e. we
  70          * repeat the last substitution.
  71          */
  72         if (cmdp->argc == 0)
  73                 goto subagain;
  74         for (p = cmdp->argv[0]->bp,
  75             len = cmdp->argv[0]->len; len > 0; --len, ++p) {
  76                 if (!isblank(*p))
  77                         break;
  78         }
  79         if (len == 0)
  80 subagain:       return (ex_subagain(sp, cmdp));
  81
  82         delim = *p++;
  83         if (isalnum(delim) || delim == '\\')
  84                 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
  85
  86         /*
  87          * !!!
  88          * The full-blown substitute command reset the remembered
  89          * state of the 'c' and 'g' suffices.
  90          */
  91         sp->c_suffix = sp->g_suffix = 0;
  92
  93         /*
  94          * Get the pattern string, toss escaping characters.
  95          *
  96          * !!!
  97          * Historic vi accepted any of the following forms:
  98          *
  99          *      :s/abc/def/             change "abc" to "def"
 100          *      :s/abc/def              change "abc" to "def"
 101          *      :s/abc/                 delete "abc"
 102          *      :s/abc                  delete "abc"
 103          *
 104          * QUOTING NOTE:
 105          *
 106          * Only toss an escaping character if it escapes a delimiter.
 107          * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
 108          * would be nice to be more regular, i.e. for each layer of
 109          * escaping a single escaping character is removed, but that's
 110          * not how the historic vi worked.
 111          */
 112         for (ptrn = t = p;;) {
 113                 if (p[0] == '\0' || p[0] == delim) {
 114                         if (p[0] == delim)
 115                                 ++p;
 116                         /*
 117                          * !!!
 118                          * Nul terminate the pattern string -- it's passed
 119                          * to regcomp which doesn't understand anything else.
 120                          */
 121                         *t = '\0';
 122                         break;
 123                 }
 124                 if (p[0] == '\\')
 125                         if (p[1] == delim)
 126                                 ++p;
 127                         else if (p[1] == '\\')
 128                                 *t++ = *p++;
 129                 *t++ = *p++;
 130         }
 131
 132         /*
 133          * If the pattern string is empty, use the last RE (not just the
 134          * last substitution RE).
 135          */
 136         if (*ptrn == '\0') {
 137                 if (sp->re == NULL) {
 138                         ex_emsg(sp, NULL, EXM_NOPREVRE);
 139                         return (1);
 140                 }
 141
 142                 /* Re-compile the RE if necessary. */
 143                 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
 144                     sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
 145                         return (1);
 146                 flags = 0;
 147         } else {
 148                 /*
 149                  * !!!
 150                  * Compile the RE.  Historic practice is that substitutes set
 151                  * the search direction as well as both substitute and search
 152                  * RE's.  We compile the RE twice, as we don't want to bother
 153                  * ref counting the pattern string and (opaque) structure.
 154                  */
 155                 if (re_compile(sp, ptrn, t - ptrn,
 156                     &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
 157                         return (1);
 158                 if (re_compile(sp, ptrn, t - ptrn,
 159                     &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
 160                         return (1);
 161
 162                 flags = SUB_FIRST;
 163                 sp->searchdir = FORWARD;
 164         }
 165         re = &sp->re_c;
 166
 167         /*
 168          * Get the replacement string.
 169          *
 170          * The special character & (\& if O_MAGIC not set) matches the
 171          * entire RE.  No handling of & is required here, it's done by
 172          * re_sub().
 173          *
 174          * The special character ~ (\~ if O_MAGIC not set) inserts the
 175          * previous replacement string into this replacement string.
 176          * Count ~'s to figure out how much space we need.  We could
 177          * special case nonexistent last patterns or whether or not
 178          * O_MAGIC is set, but it's probably not worth the effort.
 179          *
 180          * QUOTING NOTE:
 181          *
 182          * Only toss an escaping character if it escapes a delimiter or
 183          * if O_MAGIC is set and it escapes a tilde.
 184          *
 185          * !!!
 186          * If the entire replacement pattern is "%", then use the last
 187          * replacement pattern.  This semantic was added to vi in System
 188          * V and then percolated elsewhere, presumably around the time
 189          * that it was added to their version of ed(1).
 190          */
 191         if (p[0] == '\0' || p[0] == delim) {
 192                 if (p[0] == delim)
 193                         ++p;
 194                 if (sp->repl != NULL)
 195                         free(sp->repl);
 196                 sp->repl = NULL;
 197                 sp->repl_len = 0;
 198         } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
 199                 p += p[1] == delim ? 2 : 1;
 200         else {
 201                 for (rep = p, len = 0;
 202                     p[0] != '\0' && p[0] != delim; ++p, ++len)
 203                         if (p[0] == '~')
 204                                 len += sp->repl_len;
 205                 GET_SPACE_RET(sp, bp, blen, len);
 206                 for (t = bp, len = 0, p = rep;;) {
 207                         if (p[0] == '\0' || p[0] == delim) {
 208                                 if (p[0] == delim)
 209                                         ++p;
 210                                 break;
 211                         }
 212                         if (p[0] == '\\') {
 213                                 if (p[1] == delim)
 214                                         ++p;
 215                                 else if (p[1] == '\\') {
 216                                         *t++ = *p++;
 217                                         ++len;
 218                                 } else if (p[1] == '~') {
 219                                         ++p;
 220                                         if (!O_ISSET(sp, O_MAGIC))
 221                                                 goto tilde;
 222                                 }
 223                         } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
 224 tilde:                          ++p;
 225                                 memcpy(t, sp->repl, sp->repl_len);
 226                                 t += sp->repl_len;
 227                                 len += sp->repl_len;
 228                                 continue;
 229                         }
 230                         *t++ = *p++;
 231                         ++len;
 232                 }
 233                 if ((sp->repl_len = len) != 0) {
 234                         if (sp->repl != NULL)
 235                                 free(sp->repl);
 236                         if ((sp->repl = malloc(len)) == NULL) {
 237                                 msgq(sp, M_SYSERR, NULL);
 238                                 FREE_SPACE(sp, bp, blen);
 239                                 return (1);
 240                         }
 241                         memcpy(sp->repl, bp, len);
 242                 }
 243                 FREE_SPACE(sp, bp, blen);
 244         }
 245         return (s(sp, cmdp, p, re, flags));
 246 }
 247
 248 /*
 249  * ex_subagain --
 250  *      [line [,line]] & [cgr] [count] [#lp]]
 251  *
 252  *      Substitute using the last substitute RE and replacement pattern.
 253  *
 254  * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
 255  */
 256 int
 257 ex_subagain(sp, cmdp)
 258         SCR *sp;
 259         EXCMD *cmdp;
 260 {
 261         if (sp->subre == NULL) {
 262                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 263                 return (1);
 264         }
 265         if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
 266             sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
 267                 return (1);
 268         return (s(sp,
 269             cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
 270 }
 271
 272 /*
 273  * ex_subtilde --
 274  *      [line [,line]] ~ [cgr] [count] [#lp]]
 275  *
 276  *      Substitute using the last RE and last substitute replacement pattern.
 277  *
 278  * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
 279  */
 280 int
 281 ex_subtilde(sp, cmdp)
 282         SCR *sp;
 283         EXCMD *cmdp;
 284 {
 285         if (sp->re == NULL) {
 286                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 287                 return (1);
 288         }
 289         if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
 290             sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
 291                 return (1);
 292         return (s(sp,
 293             cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
 294 }
 295
 296 /*
 297  * s --
 298  * Do the substitution.  This stuff is *really* tricky.  There are lots of
 299  * special cases, and general nastiness.  Don't mess with it unless you're
 300  * pretty confident.
 301  *
 302  * The nasty part of the substitution is what happens when the replacement
 303  * string contains newlines.  It's a bit tricky -- consider the information
 304  * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 305  * to build a set of newline offsets which we use to break the line up later,
 306  * when the replacement is done.  Don't change it unless you're *damned*
 307  * confident.
 308  */
 309 #define NEEDNEWLINE(sp) {                                               \
 310         if (sp->newl_len == sp->newl_cnt) {                             \
 311                 sp->newl_len += 25;                                     \
 312                 REALLOC(sp, sp->newl, size_t *,                         \
 313                     sp->newl_len * sizeof(size_t));                     \
 314                 if (sp->newl == NULL) {                                 \
 315                         sp->newl_len = 0;                               \
 316                         return (1);                                     \
 317                 }                                                       \
 318         }                                                               \
 319 }
 320
 321 #define BUILD(sp, l, len) {                                             \
 322         if (lbclen + (len) > lblen) {                                   \
 323                 lblen += MAX(lbclen + (len), 256);                      \
 324                 REALLOC(sp, lb, char *, lblen);                         \
 325                 if (lb == NULL) {                                       \
 326                         lbclen = 0;                                     \
 327                         return (1);                                     \
 328                 }                                                       \
 329         }                                                               \
 330         memcpy(lb + lbclen, l, len);                                    \
 331         lbclen += len;                                                  \
 332 }
 333
 334 #define NEEDSP(sp, len, pnt) {                                          \
 335         if (lbclen + (len) > lblen) {                                   \
 336                 lblen += MAX(lbclen + (len), 256);                      \
 337                 REALLOC(sp, lb, char *, lblen);                         \
 338                 if (lb == NULL) {                                       \
 339                         lbclen = 0;                                     \
 340                         return (1);                                     \
 341                 }                                                       \
 342                 pnt = lb + lbclen;                                      \
 343         }                                                               \
 344 }
 345
 346 static int
 347 s(sp, cmdp, s, re, flags)
 348         SCR *sp;
 349         EXCMD *cmdp;
 350         char *s;
 351         regex_t *re;
 352         u_int flags;
 353 {
 354         EVENT ev;
 355         MARK from, to;
 356         TEXTH tiq;
 357         recno_t elno, lno, slno;
 358         regmatch_t match[10];
 359         size_t blen, cnt, last, lbclen, lblen, len, llen;
 360         size_t offset, saved_offset, scno;
 361         int cflag, lflag, nflag, pflag, rflag;
 362         int didsub, do_eol_match, eflags, empty_ok, eval;
 363         int linechanged, matched, quit, rval;
 364         char *bp, *lb;
 365
 366         NEEDFILE(sp, cmdp);
 367
 368         slno = sp->lno;
 369         scno = sp->cno;
 370
 371         /*
 372          * !!!
 373          * Historically, the 'g' and 'c' suffices were always toggled as flags,
 374          * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
 375          * not set, they were initialized to 0 for all substitute commands.  If
 376          * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
 377          * specified substitute/replacement patterns (see ex_s()).
 378          */
 379         if (!O_ISSET(sp, O_EDCOMPATIBLE))
 380                 sp->c_suffix = sp->g_suffix = 0;
 381
 382         /*
 383          * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
 384          * it only displayed the last change.  I'd disallow them, but they are
 385          * useful in combination with the [v]global commands.  In the current
 386          * model the problem is combining them with the 'c' flag -- the screen
 387          * would have to flip back and forth between the confirm screen and the
 388          * ex print screen, which would be pretty awful.  We do display all
 389          * changes, though, for what that's worth.
 390          *
 391          * !!!
 392          * Historic vi was fairly strict about the order of "options", the
 393          * count, and "flags".  I'm somewhat fuzzy on the difference between
 394          * options and flags, anyway, so this is a simpler approach, and we
 395          * just take it them in whatever order the user gives them.  (The ex
 396          * usage statement doesn't reflect this.)
 397          */
 398         cflag = lflag = nflag = pflag = rflag = 0;
 399         if (s == NULL)
 400                 goto noargs;
 401         for (lno = OOBLNO; *s != '\0'; ++s)
 402                 switch (*s) {
 403                 case ' ':
 404                 case '\t':
 405                         continue;
 406                 case '+':
 407                         ++cmdp->flagoff;
 408                         break;
 409                 case '-':
 410                         --cmdp->flagoff;
 411                         break;
 412                 case '0': case '1': case '2': case '3': case '4':
 413                 case '5': case '6': case '7': case '8': case '9':
 414                         if (lno != OOBLNO)
 415                                 goto usage;
 416                         errno = 0;
 417                         lno = strtoul(s, &s, 10);
 418                         if (*s == '\0')         /* Loop increment correction. */
 419                                 --s;
 420                         if (errno == ERANGE) {
 421                                 if (lno == LONG_MAX)
 422                                         msgq(sp, M_ERR, "153|Count overflow");
 423                                 else if (lno == LONG_MIN)
 424                                         msgq(sp, M_ERR, "154|Count underflow");
 425                                 else
 426                                         msgq(sp, M_SYSERR, NULL);
 427                                 return (1);
 428                         }
 429                         /*
 430                          * In historic vi, the count was inclusive from the
 431                          * second address.
 432                          */
 433                         cmdp->addr1.lno = cmdp->addr2.lno;
 434                         cmdp->addr2.lno += lno - 1;
 435                         if (!db_exist(sp, cmdp->addr2.lno) &&
 436                             db_last(sp, &cmdp->addr2.lno))
 437                                 return (1);
 438                         break;
 439                 case '#':
 440                         nflag = 1;
 441                         break;
 442                 case 'c':
 443                         sp->c_suffix = !sp->c_suffix;
 444
 445                         /* Ex text structure initialization. */
 446                         if (F_ISSET(sp, SC_EX)) {
 447                                 memset(&tiq, 0, sizeof(TEXTH));
 448                                 CIRCLEQ_INIT(&tiq);
 449                         }
 450                         break;
 451                 case 'g':
 452                         sp->g_suffix = !sp->g_suffix;
 453                         break;
 454                 case 'l':
 455                         lflag = 1;
 456                         break;
 457                 case 'p':
 458                         pflag = 1;
 459                         break;
 460                 case 'r':
 461                         if (LF_ISSET(SUB_FIRST)) {
 462                                 msgq(sp, M_ERR,
 463                     "155|Regular expression specified; r flag meaningless");
 464                                 return (1);
 465                         }
 466                         if (!F_ISSET(sp, SC_RE_SEARCH)) {
 467                                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 468                                 return (1);
 469                         }
 470                         rflag = 1;
 471                         re = &sp->re_c;
 472                         break;
 473                 default:
 474                         goto usage;
 475                 }
 476
 477         if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
 478 usage:          ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
 479                 return (1);
 480         }
 481
 482 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
 483                 msgq(sp, M_ERR,
 484 "156|The #, l and p flags may not be combined with the c flag in vi mode");
 485                 return (1);
 486         }
 487
 488         /*
 489          * bp:          if interactive, line cache
 490          * blen:        if interactive, line cache length
 491          * lb:          build buffer pointer.
 492          * lbclen:      current length of built buffer.
 493          * lblen;       length of build buffer.
 494          */
 495         bp = lb = NULL;
 496         blen = lbclen = lblen = 0;
 497
 498         /* For each line... */
 499         for (matched = quit = 0, lno = cmdp->addr1.lno,
 500             elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
 501
 502                 /* Someone's unhappy, time to stop. */
 503                 if (INTERRUPTED(sp))
 504                         break;
 505
 506                 /* Get the line. */
 507                 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
 508                         goto err;
 509
 510                 /*
 511                  * Make a local copy if doing confirmation -- when calling
 512                  * the confirm routine we're likely to lose the cached copy.
 513                  */
 514                 if (sp->c_suffix) {
 515                         if (bp == NULL) {
 516                                 GET_SPACE_RET(sp, bp, blen, llen);
 517                         } else
 518                                 ADD_SPACE_RET(sp, bp, blen, llen);
 519                         memcpy(bp, s, llen);
 520                         s = bp;
 521                 }
 522
 523                 /* Start searching from the beginning. */
 524                 offset = 0;
 525                 len = llen;
 526
 527                 /* Reset the build buffer offset. */
 528                 lbclen = 0;
 529
 530                 /* Reset empty match flag. */
 531                 empty_ok = 1;
 532
 533                 /*
 534                  * We don't want to have to do a setline if the line didn't
 535                  * change -- keep track of whether or not this line changed.
 536                  * If doing confirmations, don't want to keep setting the
 537                  * line if change is refused -- keep track of substitutions.
 538                  */
 539                 didsub = linechanged = 0;
 540
 541                 /* New line, do an EOL match. */
 542                 do_eol_match = 1;
 543
 544                 /* It's not nul terminated, but we pretend it is. */
 545                 eflags = REG_STARTEND;
 546
 547                 /*
 548                  * The search area is from s + offset to the EOL.
 549                  *
 550                  * Generally, match[0].rm_so is the offset of the start
 551                  * of the match from the start of the search, and offset
 552                  * is the offset of the start of the last search.
 553                  */
 554 nextmatch:      match[0].rm_so = 0;
 555                 match[0].rm_eo = len;
 556
 557                 /* Get the next match. */
 558                 eval = regexec(re, (char *)s + offset, 10, match, eflags);
 559
 560                 /*
 561                  * There wasn't a match or if there was an error, deal with
 562                  * it.  If there was a previous match in this line, resolve
 563                  * the changes into the database.  Otherwise, just move on.
 564                  */
 565                 if (eval == REG_NOMATCH)
 566                         goto endmatch;
 567                 if (eval != 0) {
 568                         re_error(sp, eval, re);
 569                         goto err;
 570                 }
 571                 matched = 1;
 572
 573                 /* Only the first search can match an anchored expression. */
 574                 eflags |= REG_NOTBOL;
 575
 576                 /*
 577                  * !!!
 578                  * It's possible to match 0-length strings -- for example, the
 579                  * command s;a*;X;, when matched against the string "aabb" will
 580                  * result in "XbXbX", i.e. the matches are "aa", the space
 581                  * between the b's and the space between the b's and the end of
 582                  * the string.  There is a similar space between the beginning
 583                  * of the string and the a's.  The rule that we use (because vi
 584                  * historically used it) is that any 0-length match, occurring
 585                  * immediately after a match, is ignored.  Otherwise, the above
 586                  * example would have resulted in "XXbXbX".  Another example is
 587                  * incorrectly using " *" to replace groups of spaces with one
 588                  * space.
 589                  *
 590                  * The way we do this is that if we just had a successful match,
 591                  * the starting offset does not skip characters, and the match
 592                  * is empty, ignore the match and move forward.  If there's no
 593                  * more characters in the string, we were attempting to match
 594                  * after the last character, so quit.
 595                  */
 596                 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
 597                         empty_ok = 1;
 598                         if (len == 0)
 599                                 goto endmatch;
 600                         BUILD(sp, s + offset, 1)
 601                         ++offset;
 602                         --len;
 603                         goto nextmatch;
 604                 }
 605
 606                 /* Confirm change. */
 607                 if (sp->c_suffix) {
 608                         /*
 609                          * Set the cursor position for confirmation.  Note,
 610                          * if we matched on a '$', the cursor may be past
 611                          * the end of line.
 612                          */
 613                         from.lno = to.lno = lno;
 614                         from.cno = match[0].rm_so + offset;
 615                         to.cno = match[0].rm_eo + offset;
 616                         /*
 617                          * Both ex and vi have to correct for a change before
 618                          * the first character in the line.
 619                          */
 620                         if (llen == 0)
 621                                 from.cno = to.cno = 0;
 622                         if (F_ISSET(sp, SC_VI)) {
 623                                 /*
 624                                  * Only vi has to correct for a change after
 625                                  * the last character in the line.
 626                                  *
 627                                  * XXX
 628                                  * It would be nice to change the vi code so
 629                                  * that we could display a cursor past EOL.
 630                                  */
 631                                 if (to.cno >= llen)
 632                                         to.cno = llen - 1;
 633                                 if (from.cno >= llen)
 634                                         from.cno = llen - 1;
 635
 636                                 sp->lno = from.lno;
 637                                 sp->cno = from.cno;
 638                                 if (vs_refresh(sp, 1))
 639                                         goto err;
 640
 641                                 vs_update(sp, msg_cat(sp,
 642                                     "169|Confirm change? [n]", NULL), NULL);
 643
 644                                 if (v_event_get(sp, &ev, 0, 0))
 645                                         goto err;
 646                                 switch (ev.e_event) {
 647                                 case E_CHARACTER:
 648                                         break;
 649                                 case E_EOF:
 650                                 case E_ERR:
 651                                 case E_INTERRUPT:
 652                                         goto lquit;
 653                                 default:
 654                                         v_event_err(sp, &ev);
 655                                         goto lquit;
 656                                 }
 657                         } else {
 658                                 if (ex_print(sp, cmdp, &from, &to, 0) ||
 659                                     ex_scprint(sp, &from, &to))
 660                                         goto lquit;
 661                                 if (ex_txt(sp, &tiq, 0, TXT_CR))
 662                                         goto err;
 663                                 ev.e_c = tiq.cqh_first->lb[0];
 664                         }
 665
 666                         switch (ev.e_c) {
 667                         case CH_YES:
 668                                 break;
 669                         default:
 670                         case CH_NO:
 671                                 didsub = 0;
 672                                 BUILD(sp, s +offset, match[0].rm_eo);
 673                                 goto skip;
 674                         case CH_QUIT:
 675                                 /* Set the quit/interrupted flags. */
 676 lquit:                          quit = 1;
 677                                 F_SET(sp->gp, G_INTERRUPTED);
 678
 679                                 /*
 680                                  * Resolve any changes, then return to (and
 681                                  * exit from) the main loop.
 682                                  */
 683                                 goto endmatch;
 684                         }
 685                 }
 686
 687                 /*
 688                  * Set the cursor to the last position changed, converting
 689                  * from 1-based to 0-based.
 690                  */
 691                 sp->lno = lno;
 692                 sp->cno = match[0].rm_so;
 693
 694                 /* Copy the bytes before the match into the build buffer. */
 695                 BUILD(sp, s + offset, match[0].rm_so);
 696
 697                 /* Substitute the matching bytes. */
 698                 didsub = 1;
 699                 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
 700                         goto err;
 701
 702                 /* Set the change flag so we know this line was modified. */
 703                 linechanged = 1;
 704
 705                 /* Move past the matched bytes. */
 706 skip:           offset += match[0].rm_eo;
 707                 len -= match[0].rm_eo;
 708
 709                 /* A match cannot be followed by an empty pattern. */
 710                 empty_ok = 0;
 711
 712                 /*
 713                  * If doing a global change with confirmation, we have to
 714                  * update the screen.  The basic idea is to store the line
 715                  * so the screen update routines can find it, and restart.
 716                  */
 717                 if (didsub && sp->c_suffix && sp->g_suffix) {
 718                         /*
 719                          * The new search offset will be the end of the
 720                          * modified line.
 721                          */
 722                         saved_offset = lbclen;
 723
 724                         /* Copy the rest of the line. */
 725                         if (len)
 726                                 BUILD(sp, s + offset, len)
 727
 728                         /* Set the new offset. */
 729                         offset = saved_offset;
 730
 731                         /* Store inserted lines, adjusting the build buffer. */
 732                         last = 0;
 733                         if (sp->newl_cnt) {
 734                                 for (cnt = 0;
 735                                     cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
 736                                         if (db_insert(sp, lno,
 737                                             lb + last, sp->newl[cnt] - last))
 738                                                 goto err;
 739                                         last = sp->newl[cnt] + 1;
 740                                         ++sp->rptlines[L_ADDED];
 741                                 }
 742                                 lbclen -= last;
 743                                 offset -= last;
 744                                 sp->newl_cnt = 0;
 745                         }
 746
 747                         /* Store and retrieve the line. */
 748                         if (db_set(sp, lno, lb + last, lbclen))
 749                                 goto err;
 750                         if (db_get(sp, lno, DBG_FATAL, &s, &llen))
 751                                 goto err;
 752                         ADD_SPACE_RET(sp, bp, blen, llen)
 753                         memcpy(bp, s, llen);
 754                         s = bp;
 755                         len = llen - offset;
 756
 757                         /* Restart the build. */
 758                         lbclen = 0;
 759                         BUILD(sp, s, offset);
 760
 761                         /*
 762                          * If we haven't already done the after-the-string
 763                          * match, do one.  Set REG_NOTEOL so the '$' pattern
 764                          * only matches once.
 765                          */
 766                         if (!do_eol_match)
 767                                 goto endmatch;
 768                         if (offset == len) {
 769                                 do_eol_match = 0;
 770                                 eflags |= REG_NOTEOL;
 771                         }
 772                         goto nextmatch;
 773                 }
 774
 775                 /*
 776                  * If it's a global:
 777                  *
 778                  * If at the end of the string, do a test for the after
 779                  * the string match.  Set REG_NOTEOL so the '$' pattern
 780                  * only matches once.
 781                  */
 782                 if (sp->g_suffix && do_eol_match) {
 783                         if (len == 0) {
 784                                 do_eol_match = 0;
 785                                 eflags |= REG_NOTEOL;
 786                         }
 787                         goto nextmatch;
 788                 }
 789
 790 endmatch:       if (!linechanged)
 791                         continue;
 792
 793                 /* Copy any remaining bytes into the build buffer. */
 794                 if (len)
 795                         BUILD(sp, s + offset, len)
 796
 797                 /* Store inserted lines, adjusting the build buffer. */
 798                 last = 0;
 799                 if (sp->newl_cnt) {
 800                         for (cnt = 0;
 801                             cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
 802                                 if (db_insert(sp,
 803                                     lno, lb + last, sp->newl[cnt] - last))
 804                                         goto err;
 805                                 last = sp->newl[cnt] + 1;
 806                                 ++sp->rptlines[L_ADDED];
 807                         }
 808                         lbclen -= last;
 809                         sp->newl_cnt = 0;
 810                 }
 811
 812                 /* Store the changed line. */
 813                 if (db_set(sp, lno, lb + last, lbclen))
 814                         goto err;
 815
 816                 /* Update changed line counter. */
 817                 if (sp->rptlchange != lno) {
 818                         sp->rptlchange = lno;
 819                         ++sp->rptlines[L_CHANGED];
 820                 }
 821
 822                 /*
 823                  * !!!
 824                  * Display as necessary.  Historic practice is to only
 825                  * display the last line of a line split into multiple
 826                  * lines.
 827                  */
 828                 if (lflag || nflag || pflag) {
 829                         from.lno = to.lno = lno;
 830                         from.cno = to.cno = 0;
 831                         if (lflag)
 832                                 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
 833                         if (nflag)
 834                                 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
 835                         if (pflag)
 836                                 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
 837                 }
 838         }
 839
 840         /*
 841          * !!!
 842          * Historically, vi attempted to leave the cursor at the same place if
 843          * the substitution was done at the current cursor position.  Otherwise
 844          * it moved it to the first non-blank of the last line changed.  There
 845          * were some problems: for example, :s/$/foo/ with the cursor on the
 846          * last character of the line left the cursor on the last character, or
 847          * the & command with multiple occurrences of the matching string in the
 848          * line usually left the cursor in a fairly random position.
 849          *
 850          * We try to do the same thing, with the exception that if the user is
 851          * doing substitution with confirmation, we move to the last line about
 852          * which the user was consulted, as opposed to the last line that they
 853          * actually changed.  This prevents a screen flash if the user doesn't
 854          * change many of the possible lines.
 855          */
 856         if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
 857                 sp->cno = 0;
 858                 (void)nonblank(sp, sp->lno, &sp->cno);
 859         }
 860
 861         /*
 862          * If not in a global command, and nothing matched, say so.
 863          * Else, if none of the lines displayed, put something up.
 864          */
 865         rval = 0;
 866         if (!matched) {
 867                 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
 868                         msgq(sp, M_ERR, "157|No match found");
 869                         goto err;
 870                 }
 871         } else if (!lflag && !nflag && !pflag)
 872                 F_SET(cmdp, E_AUTOPRINT);
 873
 874         if (0) {
 875 err:            rval = 1;
 876         }
 877
 878         if (bp != NULL)
 879                 FREE_SPACE(sp, bp, blen);
 880         if (lb != NULL)
 881                 free(lb);
 882         return (rval);
 883 }
 884
 885 /*
 886  * re_compile --
 887  *      Compile the RE.
 888  *
 889  * PUBLIC: int re_compile __P((SCR *,
 890  * PUBLIC:     char *, size_t, char **, size_t *, regex_t *, u_int));
 891  */
 892 int
 893 re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags)
 894         SCR *sp;
 895         char *ptrn, **ptrnp;
 896         size_t plen, *lenp;
 897         regex_t *rep;
 898         u_int flags;
 899 {
 900         size_t len;
 901         int reflags, replaced, rval;
 902         char *p;
 903
 904         /* Set RE flags. */
 905         reflags = 0;
 906         if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
 907                 if (O_ISSET(sp, O_EXTENDED))
 908                         reflags |= REG_EXTENDED;
 909                 if (O_ISSET(sp, O_IGNORECASE))
 910                         reflags |= REG_ICASE;
 911                 if (O_ISSET(sp, O_ICLOWER)) {
 912                         for (p = ptrn, len = plen; len > 0; ++p, --len)
 913                                 if (isupper(*p))
 914                                         break;
 915                         if (len == 0)
 916                                 reflags |= REG_ICASE;
 917                 }
 918         }
 919
 920         /* If we're replacing a saved value, clear the old one. */
 921         if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
 922                 regfree(&sp->re_c);
 923                 F_CLR(sp, SC_RE_SEARCH);
 924         }
 925         if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
 926                 regfree(&sp->subre_c);
 927                 F_CLR(sp, SC_RE_SUBST);
 928         }
 929
 930         /*
 931          * If we're saving the string, it's a pattern we haven't seen before,
 932          * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
 933          * later recompilation.   Free any previously saved value.
 934          */
 935         if (ptrnp != NULL) {
 936                 if (LF_ISSET(RE_C_CSCOPE)) {
 937                         if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
 938                                 return (1);
 939                         /*
 940                          * XXX
 941                          * Currently, the match-any-<blank> expression used in
 942                          * re_cscope_conv() requires extended RE's.  This may
 943                          * not be right or safe.
 944                          */
 945                         reflags |= REG_EXTENDED;
 946                 } else if (LF_ISSET(RE_C_TAG)) {
 947                         if (re_tag_conv(sp, &ptrn, &plen, &replaced))
 948                                 return (1);
 949                 } else
 950                         if (re_conv(sp, &ptrn, &plen, &replaced))
 951                                 return (1);
 952
 953                 /* Discard previous pattern. */
 954                 if (*ptrnp != NULL) {
 955                         free(*ptrnp);
 956                         *ptrnp = NULL;
 957                 }
 958                 if (lenp != NULL)
 959                         *lenp = plen;
 960
 961                 /*
 962                  * Copy the string into allocated memory.
 963                  *
 964                  * XXX
 965                  * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
 966                  * for now.  There's just no other solution.
 967                  */
 968                 MALLOC(sp, *ptrnp, char *, plen + 1);
 969                 if (*ptrnp != NULL) {
 970                         memcpy(*ptrnp, ptrn, plen);
 971                         (*ptrnp)[plen] = '\0';
 972                 }
 973
 974                 /* Free up conversion-routine-allocated memory. */
 975                 if (replaced)
 976                         FREE_SPACE(sp, ptrn, 0);
 977
 978                 if (*ptrnp == NULL)
 979                         return (1);
 980
 981                 ptrn = *ptrnp;
 982         }
 983
 984         /*
 985          * XXX
 986          * Regcomp isn't 8-bit clean, so we just lost if the pattern
 987          * contained a nul.  Bummer!
 988          */
 989         if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
 990                 if (!LF_ISSET(RE_C_SILENT))
 991                         re_error(sp, rval, rep);
 992                 return (1);
 993         }
 994
 995         if (LF_ISSET(RE_C_SEARCH))
 996                 F_SET(sp, SC_RE_SEARCH);
 997         if (LF_ISSET(RE_C_SUBST))
 998                 F_SET(sp, SC_RE_SUBST);
 999
1000         return (0);
1001 }
1002
1003 /*
1004  * re_conv --
1005  *      Convert vi's regular expressions into something that the
1006  *      the POSIX 1003.2 RE functions can handle.
1007  *
1008  * There are three conversions we make to make vi's RE's (specifically
1009  * the global, search, and substitute patterns) work with POSIX RE's.
1010  *
1011  * 1: If O_MAGIC is not set, strip backslashes from the magic character
1012  *    set (.[*~) that have them, and add them to the ones that don't.
1013  * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1014  *    from the last substitute command's replacement string.  If O_MAGIC
1015  *    is set, it's the string "~".
1016  * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1017  *    new RE escapes.
1018  *
1019  * !!!/XXX
1020  * This doesn't exactly match the historic behavior of vi because we do
1021  * the ~ substitution before calling the RE engine, so magic characters
1022  * in the replacement string will be expanded by the RE engine, and they
1023  * weren't historically.  It's a bug.
1024  */
1025 static int
1026 re_conv(sp, ptrnp, plenp, replacedp)
1027         SCR *sp;
1028         char **ptrnp;
1029         size_t *plenp;
1030         int *replacedp;
1031 {
1032         size_t blen, len, needlen;
1033         int magic;
1034         char *bp, *p, *t;
1035
1036         /*
1037          * First pass through, we figure out how much space we'll need.
1038          * We do it in two passes, on the grounds that most of the time
1039          * the user is doing a search and won't have magic characters.
1040          * That way we can skip most of the memory allocation and copies.
1041          */
1042         magic = 0;
1043         for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1044                 switch (*p) {
1045                 case '\\':
1046                         if (len > 1) {
1047                                 --len;
1048                                 switch (*++p) {
1049                                 case '<':
1050                                         magic = 1;
1051                                         needlen += sizeof(RE_WSTART);
1052                                         break;
1053                                 case '>':
1054                                         magic = 1;
1055                                         needlen += sizeof(RE_WSTOP);
1056                                         break;
1057                                 case '~':
1058                                         if (!O_ISSET(sp, O_MAGIC)) {
1059                                                 magic = 1;
1060                                                 needlen += sp->repl_len;
1061                                         }
1062                                         break;
1063                                 case '.':
1064                                 case '[':
1065                                 case '*':
1066                                         if (!O_ISSET(sp, O_MAGIC)) {
1067                                                 magic = 1;
1068                                                 needlen += 1;
1069                                         }
1070                                         break;
1071                                 default:
1072                                         needlen += 2;
1073                                 }
1074                         } else
1075                                 needlen += 1;
1076                         break;
1077                 case '~':
1078                         if (O_ISSET(sp, O_MAGIC)) {
1079                                 magic = 1;
1080                                 needlen += sp->repl_len;
1081                         }
1082                         break;
1083                 case '.':
1084                 case '[':
1085                 case '*':
1086                         if (!O_ISSET(sp, O_MAGIC)) {
1087                                 magic = 1;
1088                                 needlen += 2;
1089                         }
1090                         break;
1091                 default:
1092                         needlen += 1;
1093                         break;
1094                 }
1095
1096         if (!magic) {
1097                 *replacedp = 0;
1098                 return (0);
1099         }
1100
1101         /* Get enough memory to hold the final pattern. */
1102         *replacedp = 1;
1103         GET_SPACE_RET(sp, bp, blen, needlen);
1104
1105         for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1106                 switch (*p) {
1107                 case '\\':
1108                         if (len > 1) {
1109                                 --len;
1110                                 switch (*++p) {
1111                                 case '<':
1112                                         memcpy(t,
1113                                             RE_WSTART, sizeof(RE_WSTART) - 1);
1114                                         t += sizeof(RE_WSTART) - 1;
1115                                         break;
1116                                 case '>':
1117                                         memcpy(t,
1118                                             RE_WSTOP, sizeof(RE_WSTOP) - 1);
1119                                         t += sizeof(RE_WSTOP) - 1;
1120                                         break;
1121                                 case '~':
1122                                         if (O_ISSET(sp, O_MAGIC))
1123                                                 *t++ = '~';
1124                                         else {
1125                                                 memcpy(t,
1126                                                     sp->repl, sp->repl_len);
1127                                                 t += sp->repl_len;
1128                                         }
1129                                         break;
1130                                 case '.':
1131                                 case '[':
1132                                 case '*':
1133                                         if (O_ISSET(sp, O_MAGIC))
1134                                                 *t++ = '\\';
1135                                         *t++ = *p;
1136                                         break;
1137                                 default:
1138                                         *t++ = '\\';
1139                                         *t++ = *p;
1140                                 }
1141                         } else
1142                                 *t++ = '\\';
1143                         break;
1144                 case '~':
1145                         if (O_ISSET(sp, O_MAGIC)) {
1146                                 memcpy(t, sp->repl, sp->repl_len);
1147                                 t += sp->repl_len;
1148                         } else
1149                                 *t++ = '~';
1150                         break;
1151                 case '.':
1152                 case '[':
1153                 case '*':
1154                         if (!O_ISSET(sp, O_MAGIC))
1155                                 *t++ = '\\';
1156                         *t++ = *p;
1157                         break;
1158                 default:
1159                         *t++ = *p;
1160                         break;
1161                 }
1162
1163         *ptrnp = bp;
1164         *plenp = t - bp;
1165         return (0);
1166 }
1167
1168 /*
1169  * re_tag_conv --
1170  *      Convert a tags search path into something that the POSIX
1171  *      1003.2 RE functions can handle.
1172  */
1173 static int
1174 re_tag_conv(sp, ptrnp, plenp, replacedp)
1175         SCR *sp;
1176         char **ptrnp;
1177         size_t *plenp;
1178         int *replacedp;
1179 {
1180         size_t blen, len;
1181         int lastdollar;
1182         char *bp, *p, *t;
1183
1184         len = *plenp;
1185
1186         /* Max memory usage is 2 times the length of the string. */
1187         *replacedp = 1;
1188         GET_SPACE_RET(sp, bp, blen, len * 2);
1189
1190         p = *ptrnp;
1191         t = bp;
1192
1193         /* If the last character is a '/' or '?', we just strip it. */
1194         if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1195                 --len;
1196
1197         /* If the next-to-last or last character is a '$', it's magic. */
1198         if (len > 0 && p[len - 1] == '$') {
1199                 --len;
1200                 lastdollar = 1;
1201         } else
1202                 lastdollar = 0;
1203
1204         /* If the first character is a '/' or '?', we just strip it. */
1205         if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1206                 ++p;
1207                 --len;
1208         }
1209
1210         /* If the first or second character is a '^', it's magic. */
1211         if (p[0] == '^') {
1212                 *t++ = *p++;
1213                 --len;
1214         }
1215
1216         /*
1217          * Escape every other magic character we can find, meanwhile stripping
1218          * the backslashes ctags inserts when escaping the search delimiter
1219          * characters.
1220          */
1221         for (; len > 0; --len) {
1222                 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1223                         ++p;
1224                         --len;
1225                 } else if (strchr("^.[]$*", p[0]))
1226                         *t++ = '\\';
1227                 *t++ = *p++;
1228         }
1229         if (lastdollar)
1230                 *t++ = '$';
1231
1232         *ptrnp = bp;
1233         *plenp = t - bp;
1234         return (0);
1235 }
1236
1237 /*
1238  * re_cscope_conv --
1239  *       Convert a cscope search path into something that the POSIX
1240  *      1003.2 RE functions can handle.
1241  */
1242 static int
1243 re_cscope_conv(sp, ptrnp, plenp, replacedp)
1244         SCR *sp;
1245         char **ptrnp;
1246         size_t *plenp;
1247         int *replacedp;
1248 {
1249         size_t blen, len, nspaces;
1250         char *bp, *p, *t;
1251
1252         /*
1253          * Each space in the source line printed by cscope represents an
1254          * arbitrary sequence of spaces, tabs, and comments.
1255          */
1256 #define CSCOPE_RE_SPACE         "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1257         for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1258                 if (*p == ' ')
1259                         ++nspaces;
1260
1261         /*
1262          * Allocate plenty of space:
1263          *      the string, plus potential escaping characters;
1264          *      nspaces + 2 copies of CSCOPE_RE_SPACE;
1265          *      ^, $, nul terminator characters.
1266          */
1267         *replacedp = 1;
1268         len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1269         GET_SPACE_RET(sp, bp, blen, len);
1270
1271         p = *ptrnp;
1272         t = bp;
1273
1274         *t++ = '^';
1275         memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
1276         t += sizeof(CSCOPE_RE_SPACE) - 1;
1277
1278         for (len = *plenp; len > 0; ++p, --len)
1279                 if (*p == ' ') {
1280                         memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
1281                         t += sizeof(CSCOPE_RE_SPACE) - 1;
1282                 } else {
1283                         if (strchr("\\^.[]$*+?()|{}", *p))
1284                                 *t++ = '\\';
1285                         *t++ = *p;
1286                 }
1287
1288         memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
1289         t += sizeof(CSCOPE_RE_SPACE) - 1;
1290         *t++ = '$';
1291
1292         *ptrnp = bp;
1293         *plenp = t - bp;
1294         return (0);
1295 }
1296
1297 /*
1298  * re_error --
1299  *      Report a regular expression error.
1300  *
1301  * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1302  */
1303 void
1304 re_error(sp, errcode, preg)
1305         SCR *sp;
1306         int errcode;
1307         regex_t *preg;
1308 {
1309         size_t s;
1310         char *oe;
1311
1312         s = regerror(errcode, preg, "", 0);
1313         if ((oe = malloc(s)) == NULL)
1314                 msgq(sp, M_SYSERR, NULL);
1315         else {
1316                 (void)regerror(errcode, preg, oe, s);
1317                 msgq(sp, M_ERR, "RE error: %s", oe);
1318                 free(oe);
1319         }
1320 }
1321
1322 /*
1323  * re_sub --
1324  *      Do the substitution for a regular expression.
1325  */
1326 static int
1327 re_sub(sp, ip, lbp, lbclenp, lblenp, match)
1328         SCR *sp;
1329         char *ip;                       /* Input line. */
1330         char **lbp;
1331         size_t *lbclenp, *lblenp;
1332         regmatch_t match[10];
1333 {
1334         enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1335         size_t lbclen, lblen;           /* Local copies. */
1336         size_t mlen;                    /* Match length. */
1337         size_t rpl;                     /* Remaining replacement length. */
1338         char *rp;                       /* Replacement pointer. */
1339         int ch;
1340         int no;                         /* Match replacement offset. */
1341         char *p, *t;                    /* Buffer pointers. */
1342         char *lb;                       /* Local copies. */
1343
1344         lb = *lbp;                      /* Get local copies. */
1345         lbclen = *lbclenp;
1346         lblen = *lblenp;
1347
1348         /*
1349          * QUOTING NOTE:
1350          *
1351          * There are some special sequences that vi provides in the
1352          * replacement patterns.
1353          *       & string the RE matched (\& if nomagic set)
1354          *      \# n-th regular subexpression
1355          *      \E end \U, \L conversion
1356          *      \e end \U, \L conversion
1357          *      \l convert the next character to lower-case
1358          *      \L convert to lower-case, until \E, \e, or end of replacement
1359          *      \u convert the next character to upper-case
1360          *      \U convert to upper-case, until \E, \e, or end of replacement
1361          *
1362          * Otherwise, since this is the lowest level of replacement, discard
1363          * all escaping characters.  This (hopefully) matches historic practice.
1364          */
1365 #define OUTCH(ch, nltrans) {                                            \
1366         CHAR_T __ch = (ch);                                             \
1367         u_int __value = KEY_VAL(sp, __ch);                              \
1368         if (nltrans && (__value == K_CR || __value == K_NL)) {          \
1369                 NEEDNEWLINE(sp);                                        \
1370                 sp->newl[sp->newl_cnt++] = lbclen;                      \
1371         } else if (conv != C_NOTSET) {                                  \
1372                 switch (conv) {                                         \
1373                 case C_ONELOWER:                                        \
1374                         conv = C_NOTSET;                                \
1375                         /* FALLTHROUGH */                               \
1376                 case C_LOWER:                                           \
1377                         if (isupper(__ch))                              \
1378                                 __ch = tolower(__ch);                   \
1379                         break;                                          \
1380                 case C_ONEUPPER:                                        \
1381                         conv = C_NOTSET;                                \
1382                         /* FALLTHROUGH */                               \
1383                 case C_UPPER:                                           \
1384                         if (islower(__ch))                              \
1385                                 __ch = toupper(__ch);                   \
1386                         break;                                          \
1387                 default:                                                \
1388                         abort();                                        \
1389                 }                                                       \
1390         }                                                               \
1391         NEEDSP(sp, 1, p);                                               \
1392         *p++ = __ch;                                                    \
1393         ++lbclen;                                                       \
1394 }
1395         conv = C_NOTSET;
1396         for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1397                 switch (ch = *rp++) {
1398                 case '&':
1399                         if (O_ISSET(sp, O_MAGIC)) {
1400                                 no = 0;
1401                                 goto subzero;
1402                         }
1403                         break;
1404                 case '\\':
1405                         if (rpl == 0)
1406                                 break;
1407                         --rpl;
1408                         switch (ch = *rp) {
1409                         case '&':
1410                                 ++rp;
1411                                 if (!O_ISSET(sp, O_MAGIC)) {
1412                                         no = 0;
1413                                         goto subzero;
1414                                 }
1415                                 break;
1416                         case '0': case '1': case '2': case '3': case '4':
1417                         case '5': case '6': case '7': case '8': case '9':
1418                                 no = *rp++ - '0';
1419 subzero:                        if (match[no].rm_so == -1 ||
1420                                     match[no].rm_eo == -1)
1421                                         break;
1422                                 mlen = match[no].rm_eo - match[no].rm_so;
1423                                 for (t = ip + match[no].rm_so; mlen--; ++t)
1424                                         OUTCH(*t, 0);
1425                                 continue;
1426                         case 'e':
1427                         case 'E':
1428                                 ++rp;
1429                                 conv = C_NOTSET;
1430                                 continue;
1431                         case 'l':
1432                                 ++rp;
1433                                 conv = C_ONELOWER;
1434                                 continue;
1435                         case 'L':
1436                                 ++rp;
1437                                 conv = C_LOWER;
1438                                 continue;
1439                         case 'u':
1440                                 ++rp;
1441                                 conv = C_ONEUPPER;
1442                                 continue;
1443                         case 'U':
1444                                 ++rp;
1445                                 conv = C_UPPER;
1446                                 continue;
1447                         default:
1448                                 ++rp;
1449                                 break;
1450                         }
1451                 }
1452                 OUTCH(ch, 1);
1453         }
1454
1455         *lbp = lb;                      /* Update caller's information. */
1456         *lbclenp = lbclen;
1457         *lblenp = lblen;
1458         return (0);
1459 }