ex/ex_subst.c

   1 /*-
   2  * Copyright (c) 1992, 1993, 1994
   3  *      The Regents of the University of California.  All rights reserved.
   4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
   5  *      Keith Bostic.  All rights reserved.
   6  *
   7  * See the LICENSE file for redistribution information.
   8  */
   9
  10 #include "config.h"
  11
  12 #ifndef lint
  13 static const char sccsid[] = "$Id: ex_subst.c,v 10.46 2000/09/02 13:14:15 skimo Exp $ (Berkeley) $Date: 2000/09/02 13:14:15 $";
  14 #endif /* not lint */
  15
  16 #include <sys/types.h>
  17 #include <sys/queue.h>
  18 #include <sys/time.h>
  19
  20 #include <bitstring.h>
  21 #include <ctype.h>
  22 #include <errno.h>
  23 #include <limits.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <unistd.h>
  28
  29 #include "../common/common.h"
  30 #include "../vi/vi.h"
  31
  32 #define SUB_FIRST       0x01            /* The 'r' flag isn't reasonable. */
  33 #define SUB_MUSTSETR    0x02            /* The 'r' flag is required. */
  34
  35 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
  36 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
  37 static int re_sub __P((SCR *,
  38                 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
  39 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
  40 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
  41
  42 /*
  43  * ex_s --
  44  *      [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
  45  *
  46  *      Substitute on lines matching a pattern.
  47  *
  48  * PUBLIC: int ex_s __P((SCR *, EXCMD *));
  49  */
  50 int
  51 ex_s(sp, cmdp)
  52         SCR *sp;
  53         EXCMD *cmdp;
  54 {
  55         regex_t *re;
  56         size_t blen, len;
  57         u_int flags;
  58         int delim;
  59         CHAR_T *bp, *p, *ptrn, *rep, *t;
  60
  61         /*
  62          * Skip leading white space.
  63          *
  64          * !!!
  65          * Historic vi allowed any non-alphanumeric to serve as the
  66          * substitution command delimiter.
  67          *
  68          * !!!
  69          * If the arguments are empty, it's the same as &, i.e. we
  70          * repeat the last substitution.
  71          */
  72         if (cmdp->argc == 0)
  73                 goto subagain;
  74         for (p = cmdp->argv[0]->bp,
  75             len = cmdp->argv[0]->len; len > 0; --len, ++p) {
  76                 if (!isblank(*p))
  77                         break;
  78         }
  79         if (len == 0)
  80 subagain:       return (ex_subagain(sp, cmdp));
  81
  82         delim = *p++;
  83         if (isalnum(delim) || delim == '\\')
  84                 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
  85
  86         /*
  87          * !!!
  88          * The full-blown substitute command reset the remembered
  89          * state of the 'c' and 'g' suffices.
  90          */
  91         sp->c_suffix = sp->g_suffix = 0;
  92
  93         /*
  94          * Get the pattern string, toss escaping characters.
  95          *
  96          * !!!
  97          * Historic vi accepted any of the following forms:
  98          *
  99          *      :s/abc/def/             change "abc" to "def"
 100          *      :s/abc/def              change "abc" to "def"
 101          *      :s/abc/                 delete "abc"
 102          *      :s/abc                  delete "abc"
 103          *
 104          * QUOTING NOTE:
 105          *
 106          * Only toss an escaping character if it escapes a delimiter.
 107          * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
 108          * would be nice to be more regular, i.e. for each layer of
 109          * escaping a single escaping character is removed, but that's
 110          * not how the historic vi worked.
 111          */
 112         for (ptrn = t = p;;) {
 113                 if (p[0] == '\0' || p[0] == delim) {
 114                         if (p[0] == delim)
 115                                 ++p;
 116                         /*
 117                          * !!!
 118                          * Nul terminate the pattern string -- it's passed
 119                          * to regcomp which doesn't understand anything else.
 120                          */
 121                         *t = '\0';
 122                         break;
 123                 }
 124                 if (p[0] == '\\')
 125                         if (p[1] == delim)
 126                                 ++p;
 127                         else if (p[1] == '\\')
 128                                 *t++ = *p++;
 129                 *t++ = *p++;
 130         }
 131
 132         /*
 133          * If the pattern string is empty, use the last RE (not just the
 134          * last substitution RE).
 135          */
 136         if (*ptrn == '\0') {
 137                 if (sp->re == NULL) {
 138                         ex_emsg(sp, NULL, EXM_NOPREVRE);
 139                         return (1);
 140                 }
 141
 142                 /* Re-compile the RE if necessary. */
 143                 if (!F_ISSET(sp, SC_RE_SEARCH) &&
 144                     re_compile(sp, sp->re, sp->re_len,
 145                     NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 146                         return (1);
 147                 flags = 0;
 148         } else {
 149                 /*
 150                  * !!!
 151                  * Compile the RE.  Historic practice is that substitutes set
 152                  * the search direction as well as both substitute and search
 153                  * RE's.  We compile the RE twice, as we don't want to bother
 154                  * ref counting the pattern string and (opaque) structure.
 155                  */
 156                 if (re_compile(sp, ptrn, t - ptrn, &sp->re,
 157                     &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 158                         return (1);
 159                 if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
 160                     &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
 161                         return (1);
 162
 163                 flags = SUB_FIRST;
 164                 sp->searchdir = FORWARD;
 165         }
 166         re = &sp->re_c;
 167
 168         /*
 169          * Get the replacement string.
 170          *
 171          * The special character & (\& if O_MAGIC not set) matches the
 172          * entire RE.  No handling of & is required here, it's done by
 173          * re_sub().
 174          *
 175          * The special character ~ (\~ if O_MAGIC not set) inserts the
 176          * previous replacement string into this replacement string.
 177          * Count ~'s to figure out how much space we need.  We could
 178          * special case nonexistent last patterns or whether or not
 179          * O_MAGIC is set, but it's probably not worth the effort.
 180          *
 181          * QUOTING NOTE:
 182          *
 183          * Only toss an escaping character if it escapes a delimiter or
 184          * if O_MAGIC is set and it escapes a tilde.
 185          *
 186          * !!!
 187          * If the entire replacement pattern is "%", then use the last
 188          * replacement pattern.  This semantic was added to vi in System
 189          * V and then percolated elsewhere, presumably around the time
 190          * that it was added to their version of ed(1).
 191          */
 192         if (p[0] == '\0' || p[0] == delim) {
 193                 if (p[0] == delim)
 194                         ++p;
 195                 if (sp->repl != NULL)
 196                         free(sp->repl);
 197                 sp->repl = NULL;
 198                 sp->repl_len = 0;
 199         } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
 200                 p += p[1] == delim ? 2 : 1;
 201         else {
 202                 for (rep = p, len = 0;
 203                     p[0] != '\0' && p[0] != delim; ++p, ++len)
 204                         if (p[0] == '~')
 205                                 len += sp->repl_len;
 206                 GET_SPACE_RETW(sp, bp, blen, len);
 207                 for (t = bp, len = 0, p = rep;;) {
 208                         if (p[0] == '\0' || p[0] == delim) {
 209                                 if (p[0] == delim)
 210                                         ++p;
 211                                 break;
 212                         }
 213                         if (p[0] == '\\') {
 214                                 if (p[1] == delim)
 215                                         ++p;
 216                                 else if (p[1] == '\\') {
 217                                         *t++ = *p++;
 218                                         ++len;
 219                                 } else if (p[1] == '~') {
 220                                         ++p;
 221                                         if (!O_ISSET(sp, O_MAGIC))
 222                                                 goto tilde;
 223                                 }
 224                         } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
 225 tilde:                          ++p;
 226                                 MEMCPYW(t, sp->repl, sp->repl_len);
 227                                 t += sp->repl_len;
 228                                 len += sp->repl_len;
 229                                 continue;
 230                         }
 231                         *t++ = *p++;
 232                         ++len;
 233                 }
 234                 if ((sp->repl_len = len) != 0) {
 235                         if (sp->repl != NULL)
 236                                 free(sp->repl);
 237                         if ((sp->repl = malloc(len)) == NULL) {
 238                                 msgq(sp, M_SYSERR, NULL);
 239                                 FREE_SPACEW(sp, bp, blen);
 240                                 return (1);
 241                         }
 242                         MEMCPYW(sp->repl, bp, len);
 243                 }
 244                 FREE_SPACEW(sp, bp, blen);
 245         }
 246         return (s(sp, cmdp, p, re, flags));
 247 }
 248
 249 /*
 250  * ex_subagain --
 251  *      [line [,line]] & [cgr] [count] [#lp]]
 252  *
 253  *      Substitute using the last substitute RE and replacement pattern.
 254  *
 255  * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
 256  */
 257 int
 258 ex_subagain(sp, cmdp)
 259         SCR *sp;
 260         EXCMD *cmdp;
 261 {
 262         if (sp->subre == NULL) {
 263                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 264                 return (1);
 265         }
 266         if (!F_ISSET(sp, SC_RE_SUBST) &&
 267             re_compile(sp, sp->subre, sp->subre_len,
 268             NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
 269                 return (1);
 270         return (s(sp,
 271             cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
 272 }
 273
 274 /*
 275  * ex_subtilde --
 276  *      [line [,line]] ~ [cgr] [count] [#lp]]
 277  *
 278  *      Substitute using the last RE and last substitute replacement pattern.
 279  *
 280  * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
 281  */
 282 int
 283 ex_subtilde(sp, cmdp)
 284         SCR *sp;
 285         EXCMD *cmdp;
 286 {
 287         if (sp->re == NULL) {
 288                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 289                 return (1);
 290         }
 291         if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
 292             sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 293                 return (1);
 294         return (s(sp,
 295             cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
 296 }
 297
 298 /*
 299  * s --
 300  * Do the substitution.  This stuff is *really* tricky.  There are lots of
 301  * special cases, and general nastiness.  Don't mess with it unless you're
 302  * pretty confident.
 303  *
 304  * The nasty part of the substitution is what happens when the replacement
 305  * string contains newlines.  It's a bit tricky -- consider the information
 306  * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 307  * to build a set of newline offsets which we use to break the line up later,
 308  * when the replacement is done.  Don't change it unless you're *damned*
 309  * confident.
 310  */
 311 #define NEEDNEWLINE(sp) {                                               \
 312         if (sp->newl_len == sp->newl_cnt) {                             \
 313                 sp->newl_len += 25;                                     \
 314                 REALLOC(sp, sp->newl, size_t *,                         \
 315                     sp->newl_len * sizeof(size_t));                     \
 316                 if (sp->newl == NULL) {                                 \
 317                         sp->newl_len = 0;                               \
 318                         return (1);                                     \
 319                 }                                                       \
 320         }                                                               \
 321 }
 322
 323 #define BUILD(sp, l, len) {                                             \
 324         if (lbclen + (len) > lblen) {                                   \
 325                 lblen += MAX(lbclen + (len), 256);                      \
 326                 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));      \
 327                 if (lb == NULL) {                                       \
 328                         lbclen = 0;                                     \
 329                         return (1);                                     \
 330                 }                                                       \
 331         }                                                               \
 332         MEMCPYW(lb + lbclen, l, len);                                   \
 333         lbclen += len;                                                  \
 334 }
 335
 336 #define NEEDSP(sp, len, pnt) {                                          \
 337         if (lbclen + (len) > lblen) {                                   \
 338                 lblen += MAX(lbclen + (len), 256);                      \
 339                 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));      \
 340                 if (lb == NULL) {                                       \
 341                         lbclen = 0;                                     \
 342                         return (1);                                     \
 343                 }                                                       \
 344                 pnt = lb + lbclen;                                      \
 345         }                                                               \
 346 }
 347
 348 static int
 349 s(sp, cmdp, s, re, flags)
 350         SCR *sp;
 351         EXCMD *cmdp;
 352         CHAR_T *s;
 353         regex_t *re;
 354         u_int flags;
 355 {
 356         EVENT ev;
 357         MARK from, to;
 358         TEXTH tiq;
 359         db_recno_t elno, lno, slno;
 360         u_long ul;
 361         regmatch_t match[10];
 362         size_t blen, cnt, last, lbclen, lblen, len, llen;
 363         size_t offset, saved_offset, scno;
 364         int cflag, lflag, nflag, pflag, rflag;
 365         int didsub, do_eol_match, eflags, empty_ok, eval;
 366         int linechanged, matched, quit, rval;
 367         CHAR_T *p, *lb, *bp;
 368         enum nresult nret;
 369
 370         NEEDFILE(sp, cmdp);
 371
 372         slno = sp->lno;
 373         scno = sp->cno;
 374
 375         /*
 376          * !!!
 377          * Historically, the 'g' and 'c' suffices were always toggled as flags,
 378          * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
 379          * not set, they were initialized to 0 for all substitute commands.  If
 380          * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
 381          * specified substitute/replacement patterns (see ex_s()).
 382          */
 383         if (!O_ISSET(sp, O_EDCOMPATIBLE))
 384                 sp->c_suffix = sp->g_suffix = 0;
 385
 386         /*
 387          * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
 388          * it only displayed the last change.  I'd disallow them, but they are
 389          * useful in combination with the [v]global commands.  In the current
 390          * model the problem is combining them with the 'c' flag -- the screen
 391          * would have to flip back and forth between the confirm screen and the
 392          * ex print screen, which would be pretty awful.  We do display all
 393          * changes, though, for what that's worth.
 394          *
 395          * !!!
 396          * Historic vi was fairly strict about the order of "options", the
 397          * count, and "flags".  I'm somewhat fuzzy on the difference between
 398          * options and flags, anyway, so this is a simpler approach, and we
 399          * just take it them in whatever order the user gives them.  (The ex
 400          * usage statement doesn't reflect this.)
 401          */
 402         cflag = lflag = nflag = pflag = rflag = 0;
 403         if (s == NULL)
 404                 goto noargs;
 405         for (lno = OOBLNO; *s != '\0'; ++s)
 406                 switch (*s) {
 407                 case ' ':
 408                 case '\t':
 409                         continue;
 410                 case '+':
 411                         ++cmdp->flagoff;
 412                         break;
 413                 case '-':
 414                         --cmdp->flagoff;
 415                         break;
 416                 case '0': case '1': case '2': case '3': case '4':
 417                 case '5': case '6': case '7': case '8': case '9':
 418                         if (lno != OOBLNO)
 419                                 goto usage;
 420                         errno = 0;
 421                         nret = nget_uslong(sp, &ul, s, &s, 10);
 422                         lno = ul;
 423                         if (*s == '\0')         /* Loop increment correction. */
 424                                 --s;
 425                         if (nret != NUM_OK) {
 426                                 if (nret == NUM_OVER)
 427                                         msgq(sp, M_ERR, "153|Count overflow");
 428                                 else if (nret == NUM_UNDER)
 429                                         msgq(sp, M_ERR, "154|Count underflow");
 430                                 else
 431                                         msgq(sp, M_SYSERR, NULL);
 432                                 return (1);
 433                         }
 434                         /*
 435                          * In historic vi, the count was inclusive from the
 436                          * second address.
 437                          */
 438                         cmdp->addr1.lno = cmdp->addr2.lno;
 439                         cmdp->addr2.lno += lno - 1;
 440                         if (!db_exist(sp, cmdp->addr2.lno) &&
 441                             db_last(sp, &cmdp->addr2.lno))
 442                                 return (1);
 443                         break;
 444                 case '#':
 445                         nflag = 1;
 446                         break;
 447                 case 'c':
 448                         sp->c_suffix = !sp->c_suffix;
 449
 450                         /* Ex text structure initialization. */
 451                         if (F_ISSET(sp, SC_EX)) {
 452                                 memset(&tiq, 0, sizeof(TEXTH));
 453                                 CIRCLEQ_INIT(&tiq);
 454                         }
 455                         break;
 456                 case 'g':
 457                         sp->g_suffix = !sp->g_suffix;
 458                         break;
 459                 case 'l':
 460                         lflag = 1;
 461                         break;
 462                 case 'p':
 463                         pflag = 1;
 464                         break;
 465                 case 'r':
 466                         if (LF_ISSET(SUB_FIRST)) {
 467                                 msgq(sp, M_ERR,
 468                     "155|Regular expression specified; r flag meaningless");
 469                                 return (1);
 470                         }
 471                         if (!F_ISSET(sp, SC_RE_SEARCH)) {
 472                                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 473                                 return (1);
 474                         }
 475                         rflag = 1;
 476                         re = &sp->re_c;
 477                         break;
 478                 default:
 479                         goto usage;
 480                 }
 481
 482         if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
 483 usage:          ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
 484                 return (1);
 485         }
 486
 487 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
 488                 msgq(sp, M_ERR,
 489 "156|The #, l and p flags may not be combined with the c flag in vi mode");
 490                 return (1);
 491         }
 492
 493         /*
 494          * bp:          if interactive, line cache
 495          * blen:        if interactive, line cache length
 496          * lb:          build buffer pointer.
 497          * lbclen:      current length of built buffer.
 498          * lblen;       length of build buffer.
 499          */
 500         bp = lb = NULL;
 501         blen = lbclen = lblen = 0;
 502
 503         /* For each line... */
 504         lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
 505         for (matched = quit = 0,
 506             elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
 507
 508                 /* Someone's unhappy, time to stop. */
 509                 if (INTERRUPTED(sp))
 510                         break;
 511
 512                 /* Get the line. */
 513                 if (db_get(sp, lno, DBG_FATAL, &s, &llen))
 514                         goto err;
 515
 516                 /*
 517                  * Make a local copy if doing confirmation -- when calling
 518                  * the confirm routine we're likely to lose the cached copy.
 519                  */
 520                 if (sp->c_suffix) {
 521                         if (bp == NULL) {
 522                                 GET_SPACE_RETW(sp, bp, blen, llen);
 523                         } else
 524                                 ADD_SPACE_RETW(sp, bp, blen, llen);
 525                         MEMCPYW(bp, s, llen);
 526                         s = bp;
 527                 }
 528
 529                 /* Start searching from the beginning. */
 530                 offset = 0;
 531                 len = llen;
 532
 533                 /* Reset the build buffer offset. */
 534                 lbclen = 0;
 535
 536                 /* Reset empty match flag. */
 537                 empty_ok = 1;
 538
 539                 /*
 540                  * We don't want to have to do a setline if the line didn't
 541                  * change -- keep track of whether or not this line changed.
 542                  * If doing confirmations, don't want to keep setting the
 543                  * line if change is refused -- keep track of substitutions.
 544                  */
 545                 didsub = linechanged = 0;
 546
 547                 /* New line, do an EOL match. */
 548                 do_eol_match = 1;
 549
 550                 /* It's not nul terminated, but we pretend it is. */
 551                 eflags = REG_STARTEND;
 552
 553                 /*
 554                  * The search area is from s + offset to the EOL.
 555                  *
 556                  * Generally, match[0].rm_so is the offset of the start
 557                  * of the match from the start of the search, and offset
 558                  * is the offset of the start of the last search.
 559                  */
 560 nextmatch:      match[0].rm_so = 0;
 561                 match[0].rm_eo = len;
 562
 563                 /* Get the next match. */
 564                 eval = regexec(re, s + offset, 10, match, eflags);
 565
 566                 /*
 567                  * There wasn't a match or if there was an error, deal with
 568                  * it.  If there was a previous match in this line, resolve
 569                  * the changes into the database.  Otherwise, just move on.
 570                  */
 571                 if (eval == REG_NOMATCH)
 572                         goto endmatch;
 573                 if (eval != 0) {
 574                         re_error(sp, eval, re);
 575                         goto err;
 576                 }
 577                 matched = 1;
 578
 579                 /* Only the first search can match an anchored expression. */
 580                 eflags |= REG_NOTBOL;
 581
 582                 /*
 583                  * !!!
 584                  * It's possible to match 0-length strings -- for example, the
 585                  * command s;a*;X;, when matched against the string "aabb" will
 586                  * result in "XbXbX", i.e. the matches are "aa", the space
 587                  * between the b's and the space between the b's and the end of
 588                  * the string.  There is a similar space between the beginning
 589                  * of the string and the a's.  The rule that we use (because vi
 590                  * historically used it) is that any 0-length match, occurring
 591                  * immediately after a match, is ignored.  Otherwise, the above
 592                  * example would have resulted in "XXbXbX".  Another example is
 593                  * incorrectly using " *" to replace groups of spaces with one
 594                  * space.
 595                  *
 596                  * The way we do this is that if we just had a successful match,
 597                  * the starting offset does not skip characters, and the match
 598                  * is empty, ignore the match and move forward.  If there's no
 599                  * more characters in the string, we were attempting to match
 600                  * after the last character, so quit.
 601                  */
 602                 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
 603                         empty_ok = 1;
 604                         if (len == 0)
 605                                 goto endmatch;
 606                         BUILD(sp, s + offset, 1)
 607                         ++offset;
 608                         --len;
 609                         goto nextmatch;
 610                 }
 611
 612                 /* Confirm change. */
 613                 if (sp->c_suffix) {
 614                         /*
 615                          * Set the cursor position for confirmation.  Note,
 616                          * if we matched on a '$', the cursor may be past
 617                          * the end of line.
 618                          */
 619                         from.lno = to.lno = lno;
 620                         from.cno = match[0].rm_so + offset;
 621                         to.cno = match[0].rm_eo + offset;
 622                         /*
 623                          * Both ex and vi have to correct for a change before
 624                          * the first character in the line.
 625                          */
 626                         if (llen == 0)
 627                                 from.cno = to.cno = 0;
 628                         if (F_ISSET(sp, SC_VI)) {
 629                                 /*
 630                                  * Only vi has to correct for a change after
 631                                  * the last character in the line.
 632                                  *
 633                                  * XXX
 634                                  * It would be nice to change the vi code so
 635                                  * that we could display a cursor past EOL.
 636                                  */
 637                                 if (to.cno >= llen)
 638                                         to.cno = llen - 1;
 639                                 if (from.cno >= llen)
 640                                         from.cno = llen - 1;
 641
 642                                 sp->lno = from.lno;
 643                                 sp->cno = from.cno;
 644                                 if (vs_refresh(sp, 1))
 645                                         goto err;
 646
 647                                 vs_update(sp, msg_cat(sp,
 648                                     "169|Confirm change? [n]", NULL), NULL);
 649
 650                                 if (v_event_get(sp, &ev, 0, 0))
 651                                         goto err;
 652                                 switch (ev.e_event) {
 653                                 case E_CHARACTER:
 654                                         break;
 655                                 case E_EOF:
 656                                 case E_ERR:
 657                                 case E_INTERRUPT:
 658                                         goto lquit;
 659                                 default:
 660                                         v_event_err(sp, &ev);
 661                                         goto lquit;
 662                                 }
 663                         } else {
 664                                 if (ex_print(sp, cmdp, &from, &to, 0) ||
 665                                     ex_scprint(sp, &from, &to))
 666                                         goto lquit;
 667                                 if (ex_txt(sp, &tiq, 0, TXT_CR))
 668                                         goto err;
 669                                 ev.e_c = tiq.cqh_first->lb[0];
 670                         }
 671
 672                         switch (ev.e_c) {
 673                         case CH_YES:
 674                                 break;
 675                         default:
 676                         case CH_NO:
 677                                 didsub = 0;
 678                                 BUILD(sp, s +offset, match[0].rm_eo);
 679                                 goto skip;
 680                         case CH_QUIT:
 681                                 /* Set the quit/interrupted flags. */
 682 lquit:                          quit = 1;
 683                                 F_SET(sp->gp, G_INTERRUPTED);
 684
 685                                 /*
 686                                  * Resolve any changes, then return to (and
 687                                  * exit from) the main loop.
 688                                  */
 689                                 goto endmatch;
 690                         }
 691                 }
 692
 693                 /*
 694                  * Set the cursor to the last position changed, converting
 695                  * from 1-based to 0-based.
 696                  */
 697                 sp->lno = lno;
 698                 sp->cno = match[0].rm_so;
 699
 700                 /* Copy the bytes before the match into the build buffer. */
 701                 BUILD(sp, s + offset, match[0].rm_so);
 702
 703                 /* Substitute the matching bytes. */
 704                 didsub = 1;
 705                 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
 706                         goto err;
 707
 708                 /* Set the change flag so we know this line was modified. */
 709                 linechanged = 1;
 710
 711                 /* Move past the matched bytes. */
 712 skip:           offset += match[0].rm_eo;
 713                 len -= match[0].rm_eo;
 714
 715                 /* A match cannot be followed by an empty pattern. */
 716                 empty_ok = 0;
 717
 718                 /*
 719                  * If doing a global change with confirmation, we have to
 720                  * update the screen.  The basic idea is to store the line
 721                  * so the screen update routines can find it, and restart.
 722                  */
 723                 if (didsub && sp->c_suffix && sp->g_suffix) {
 724                         /*
 725                          * The new search offset will be the end of the
 726                          * modified line.
 727                          */
 728                         saved_offset = lbclen;
 729
 730                         /* Copy the rest of the line. */
 731                         if (len)
 732                                 BUILD(sp, s + offset, len)
 733
 734                         /* Set the new offset. */
 735                         offset = saved_offset;
 736
 737                         /* Store inserted lines, adjusting the build buffer. */
 738                         last = 0;
 739                         if (sp->newl_cnt) {
 740                                 for (cnt = 0;
 741                                     cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
 742                                         if (db_insert(sp, lno,
 743                                             lb + last, sp->newl[cnt] - last))
 744                                                 goto err;
 745                                         last = sp->newl[cnt] + 1;
 746                                         ++sp->rptlines[L_ADDED];
 747                                 }
 748                                 lbclen -= last;
 749                                 offset -= last;
 750                                 sp->newl_cnt = 0;
 751                         }
 752
 753                         /* Store and retrieve the line. */
 754                         if (db_set(sp, lno, lb + last, lbclen))
 755                                 goto err;
 756                         if (db_get(sp, lno, DBG_FATAL, &s, &llen))
 757                                 goto err;
 758                         ADD_SPACE_RETW(sp, bp, blen, llen)
 759                         MEMCPYW(bp, s, llen);
 760                         s = bp;
 761                         len = llen - offset;
 762
 763                         /* Restart the build. */
 764                         lbclen = 0;
 765                         BUILD(sp, s, offset);
 766
 767                         /*
 768                          * If we haven't already done the after-the-string
 769                          * match, do one.  Set REG_NOTEOL so the '$' pattern
 770                          * only matches once.
 771                          */
 772                         if (!do_eol_match)
 773                                 goto endmatch;
 774                         if (offset == len) {
 775                                 do_eol_match = 0;
 776                                 eflags |= REG_NOTEOL;
 777                         }
 778                         goto nextmatch;
 779                 }
 780
 781                 /*
 782                  * If it's a global:
 783                  *
 784                  * If at the end of the string, do a test for the after
 785                  * the string match.  Set REG_NOTEOL so the '$' pattern
 786                  * only matches once.
 787                  */
 788                 if (sp->g_suffix && do_eol_match) {
 789                         if (len == 0) {
 790                                 do_eol_match = 0;
 791                                 eflags |= REG_NOTEOL;
 792                         }
 793                         goto nextmatch;
 794                 }
 795
 796 endmatch:       if (!linechanged)
 797                         continue;
 798
 799                 /* Copy any remaining bytes into the build buffer. */
 800                 if (len)
 801                         BUILD(sp, s + offset, len)
 802
 803                 /* Store inserted lines, adjusting the build buffer. */
 804                 last = 0;
 805                 if (sp->newl_cnt) {
 806                         for (cnt = 0;
 807                             cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
 808                                 if (db_insert(sp,
 809                                     lno, lb + last, sp->newl[cnt] - last))
 810                                         goto err;
 811                                 last = sp->newl[cnt] + 1;
 812                                 ++sp->rptlines[L_ADDED];
 813                         }
 814                         lbclen -= last;
 815                         sp->newl_cnt = 0;
 816                 }
 817
 818                 /* Store the changed line. */
 819                 if (db_set(sp, lno, lb + last, lbclen))
 820                         goto err;
 821
 822                 /* Update changed line counter. */
 823                 if (sp->rptlchange != lno) {
 824                         sp->rptlchange = lno;
 825                         ++sp->rptlines[L_CHANGED];
 826                 }
 827
 828                 /*
 829                  * !!!
 830                  * Display as necessary.  Historic practice is to only
 831                  * display the last line of a line split into multiple
 832                  * lines.
 833                  */
 834                 if (lflag || nflag || pflag) {
 835                         from.lno = to.lno = lno;
 836                         from.cno = to.cno = 0;
 837                         if (lflag)
 838                                 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
 839                         if (nflag)
 840                                 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
 841                         if (pflag)
 842                                 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
 843                 }
 844         }
 845
 846         /*
 847          * !!!
 848          * Historically, vi attempted to leave the cursor at the same place if
 849          * the substitution was done at the current cursor position.  Otherwise
 850          * it moved it to the first non-blank of the last line changed.  There
 851          * were some problems: for example, :s/$/foo/ with the cursor on the
 852          * last character of the line left the cursor on the last character, or
 853          * the & command with multiple occurrences of the matching string in the
 854          * line usually left the cursor in a fairly random position.
 855          *
 856          * We try to do the same thing, with the exception that if the user is
 857          * doing substitution with confirmation, we move to the last line about
 858          * which the user was consulted, as opposed to the last line that they
 859          * actually changed.  This prevents a screen flash if the user doesn't
 860          * change many of the possible lines.
 861          */
 862         if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
 863                 sp->cno = 0;
 864                 (void)nonblank(sp, sp->lno, &sp->cno);
 865         }
 866
 867         /*
 868          * If not in a global command, and nothing matched, say so.
 869          * Else, if none of the lines displayed, put something up.
 870          */
 871         rval = 0;
 872         if (!matched) {
 873                 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
 874                         msgq(sp, M_ERR, "157|No match found");
 875                         goto err;
 876                 }
 877         } else if (!lflag && !nflag && !pflag)
 878                 F_SET(cmdp, E_AUTOPRINT);
 879
 880         if (0) {
 881 err:            rval = 1;
 882         }
 883
 884         if (bp != NULL)
 885                 FREE_SPACEW(sp, bp, blen);
 886         if (lb != NULL)
 887                 free(lb);
 888         return (rval);
 889 }
 890
 891 /*
 892  * re_compile --
 893  *      Compile the RE.
 894  *
 895  * PUBLIC: int re_compile __P((SCR *,
 896  * PUBLIC:     CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
 897  */
 898 int
 899 re_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags)
 900         SCR *sp;
 901         CHAR_T *ptrn, **ptrnp;
 902         size_t plen, *lenp;
 903         regex_t *rep;
 904         u_int flags;
 905 {
 906         size_t len;
 907         int reflags, replaced, rval;
 908         CHAR_T *p;
 909
 910         /* Set RE flags. */
 911         reflags = 0;
 912         if (LF_ISSET(SEARCH_EXTEND))
 913                 reflags |= REG_EXTENDED;
 914         if (LF_ISSET(SEARCH_IC))
 915                 reflags |= REG_ICASE;
 916         if (LF_ISSET(SEARCH_LITERAL))
 917                 reflags |= REG_NOSPEC;
 918         if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
 919                 if (O_ISSET(sp, O_EXTENDED))
 920                         reflags |= REG_EXTENDED;
 921                 if (O_ISSET(sp, O_IGNORECASE))
 922                         reflags |= REG_ICASE;
 923                 if (O_ISSET(sp, O_ICLOWER))
 924                         goto iclower;
 925         }
 926         if (LF_ISSET(SEARCH_ICL)) {
 927 iclower:        for (p = ptrn, len = plen; len > 0; ++p, --len)
 928                         if (isupper(*p))
 929                                 break;
 930                 if (len == 0)
 931                         reflags |= REG_ICASE;
 932         }
 933
 934         /* If we're replacing a saved value, clear the old one. */
 935         if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
 936                 regfree(&sp->re_c);
 937                 F_CLR(sp, SC_RE_SEARCH);
 938         }
 939         if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
 940                 regfree(&sp->subre_c);
 941                 F_CLR(sp, SC_RE_SUBST);
 942         }
 943
 944         /*
 945          * If we're saving the string, it's a pattern we haven't seen before,
 946          * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
 947          * later recompilation.   Free any previously saved value.
 948          */
 949         if (ptrnp != NULL) {
 950                 replaced = 0;
 951                 if (LF_ISSET(SEARCH_CSCOPE)) {
 952                         if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
 953                                 return (1);
 954                         /*
 955                          * XXX
 956                          * Currently, the match-any-<blank> expression used in
 957                          * re_cscope_conv() requires extended RE's.  This may
 958                          * not be right or safe.
 959                          */
 960                         reflags |= REG_EXTENDED;
 961                 } else if (LF_ISSET(SEARCH_TAG)) {
 962                         if (re_tag_conv(sp, &ptrn, &plen, &replaced))
 963                                 return (1);
 964                 } else if (!LF_ISSET(SEARCH_LITERAL))
 965                         if (re_conv(sp, &ptrn, &plen, &replaced))
 966                                 return (1);
 967
 968                 /* Discard previous pattern. */
 969                 if (*ptrnp != NULL) {
 970                         free(*ptrnp);
 971                         *ptrnp = NULL;
 972                 }
 973                 if (lenp != NULL)
 974                         *lenp = plen;
 975
 976                 /*
 977                  * Copy the string into allocated memory.
 978                  *
 979                  * XXX
 980                  * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
 981                  * for now.  There's just no other solution.
 982                  */
 983                 MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
 984                 if (*ptrnp != NULL) {
 985                         MEMCPYW(*ptrnp, ptrn, plen);
 986                         (*ptrnp)[plen] = '\0';
 987                 }
 988
 989                 /* Free up conversion-routine-allocated memory. */
 990                 if (replaced)
 991                         FREE_SPACEW(sp, ptrn, 0);
 992
 993                 if (*ptrnp == NULL)
 994                         return (1);
 995
 996                 ptrn = *ptrnp;
 997         }
 998
 999         /*
1000          * XXX
1001          * Regcomp isn't 8-bit clean, so we just lost if the pattern
1002          * contained a nul.  Bummer!
1003          */
1004         if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
1005                 if (LF_ISSET(SEARCH_MSG))
1006                         re_error(sp, rval, rep);
1007                 return (1);
1008         }
1009
1010         if (LF_ISSET(SEARCH_CSEARCH))
1011                 F_SET(sp, SC_RE_SEARCH);
1012         if (LF_ISSET(SEARCH_CSUBST))
1013                 F_SET(sp, SC_RE_SUBST);
1014
1015         return (0);
1016 }
1017
1018 /*
1019  * re_conv --
1020  *      Convert vi's regular expressions into something that the
1021  *      the POSIX 1003.2 RE functions can handle.
1022  *
1023  * There are three conversions we make to make vi's RE's (specifically
1024  * the global, search, and substitute patterns) work with POSIX RE's.
1025  *
1026  * 1: If O_MAGIC is not set, strip backslashes from the magic character
1027  *    set (.[*~) that have them, and add them to the ones that don't.
1028  * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1029  *    from the last substitute command's replacement string.  If O_MAGIC
1030  *    is set, it's the string "~".
1031  * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1032  *    new RE escapes.
1033  *
1034  * !!!/XXX
1035  * This doesn't exactly match the historic behavior of vi because we do
1036  * the ~ substitution before calling the RE engine, so magic characters
1037  * in the replacement string will be expanded by the RE engine, and they
1038  * weren't historically.  It's a bug.
1039  */
1040 static int
1041 re_conv(sp, ptrnp, plenp, replacedp)
1042         SCR *sp;
1043         CHAR_T **ptrnp;
1044         size_t *plenp;
1045         int *replacedp;
1046 {
1047         size_t blen, len, needlen;
1048         int magic;
1049         CHAR_T *bp, *p, *t;
1050
1051         /*
1052          * First pass through, we figure out how much space we'll need.
1053          * We do it in two passes, on the grounds that most of the time
1054          * the user is doing a search and won't have magic characters.
1055          * That way we can skip most of the memory allocation and copies.
1056          */
1057         magic = 0;
1058         for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1059                 switch (*p) {
1060                 case '\\':
1061                         if (len > 1) {
1062                                 --len;
1063                                 switch (*++p) {
1064                                 case '<':
1065                                         magic = 1;
1066                                         needlen += RE_WSTART_LEN;
1067                                         break;
1068                                 case '>':
1069                                         magic = 1;
1070                                         needlen += RE_WSTOP_LEN;
1071                                         break;
1072                                 case '~':
1073                                         if (!O_ISSET(sp, O_MAGIC)) {
1074                                                 magic = 1;
1075                                                 needlen += sp->repl_len;
1076                                         }
1077                                         break;
1078                                 case '.':
1079                                 case '[':
1080                                 case '*':
1081                                         if (!O_ISSET(sp, O_MAGIC)) {
1082                                                 magic = 1;
1083                                                 needlen += 1;
1084                                         }
1085                                         break;
1086                                 default:
1087                                         needlen += 2;
1088                                 }
1089                         } else
1090                                 needlen += 1;
1091                         break;
1092                 case '~':
1093                         if (O_ISSET(sp, O_MAGIC)) {
1094                                 magic = 1;
1095                                 needlen += sp->repl_len;
1096                         }
1097                         break;
1098                 case '.':
1099                 case '[':
1100                 case '*':
1101                         if (!O_ISSET(sp, O_MAGIC)) {
1102                                 magic = 1;
1103                                 needlen += 2;
1104                         }
1105                         break;
1106                 default:
1107                         needlen += 1;
1108                         break;
1109                 }
1110
1111         if (!magic) {
1112                 *replacedp = 0;
1113                 return (0);
1114         }
1115
1116         /* Get enough memory to hold the final pattern. */
1117         *replacedp = 1;
1118         GET_SPACE_RETW(sp, bp, blen, needlen);
1119
1120         for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1121                 switch (*p) {
1122                 case '\\':
1123                         if (len > 1) {
1124                                 --len;
1125                                 switch (*++p) {
1126                                 case '<':
1127                                         MEMCPY(t,
1128                                             RE_WSTART, RE_WSTART_LEN - 1);
1129                                         t += RE_WSTART_LEN - 1;
1130                                         break;
1131                                 case '>':
1132                                         MEMCPY(t,
1133                                             RE_WSTOP, RE_WSTOP_LEN - 1);
1134                                         t += RE_WSTOP_LEN - 1;
1135                                         break;
1136                                 case '~':
1137                                         if (O_ISSET(sp, O_MAGIC))
1138                                                 *t++ = '~';
1139                                         else {
1140                                                 MEMCPYW(t,
1141                                                     sp->repl, sp->repl_len);
1142                                                 t += sp->repl_len;
1143                                         }
1144                                         break;
1145                                 case '.':
1146                                 case '[':
1147                                 case '*':
1148                                         if (O_ISSET(sp, O_MAGIC))
1149                                                 *t++ = '\\';
1150                                         *t++ = *p;
1151                                         break;
1152                                 default:
1153                                         *t++ = '\\';
1154                                         *t++ = *p;
1155                                 }
1156                         } else
1157                                 *t++ = '\\';
1158                         break;
1159                 case '~':
1160                         if (O_ISSET(sp, O_MAGIC)) {
1161                                 MEMCPYW(t, sp->repl, sp->repl_len);
1162                                 t += sp->repl_len;
1163                         } else
1164                                 *t++ = '~';
1165                         break;
1166                 case '.':
1167                 case '[':
1168                 case '*':
1169                         if (!O_ISSET(sp, O_MAGIC))
1170                                 *t++ = '\\';
1171                         *t++ = *p;
1172                         break;
1173                 default:
1174                         *t++ = *p;
1175                         break;
1176                 }
1177
1178         *ptrnp = bp;
1179         *plenp = t - bp;
1180         return (0);
1181 }
1182
1183 /*
1184  * re_tag_conv --
1185  *      Convert a tags search path into something that the POSIX
1186  *      1003.2 RE functions can handle.
1187  */
1188 static int
1189 re_tag_conv(sp, ptrnp, plenp, replacedp)
1190         SCR *sp;
1191         CHAR_T **ptrnp;
1192         size_t *plenp;
1193         int *replacedp;
1194 {
1195         size_t blen, len;
1196         int lastdollar;
1197         CHAR_T *bp, *p, *t;
1198
1199         len = *plenp;
1200
1201         /* Max memory usage is 2 times the length of the string. */
1202         *replacedp = 1;
1203         GET_SPACE_RETW(sp, bp, blen, len * 2);
1204
1205         p = *ptrnp;
1206         t = bp;
1207
1208         /* If the last character is a '/' or '?', we just strip it. */
1209         if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1210                 --len;
1211
1212         /* If the next-to-last or last character is a '$', it's magic. */
1213         if (len > 0 && p[len - 1] == '$') {
1214                 --len;
1215                 lastdollar = 1;
1216         } else
1217                 lastdollar = 0;
1218
1219         /* If the first character is a '/' or '?', we just strip it. */
1220         if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1221                 ++p;
1222                 --len;
1223         }
1224
1225         /* If the first or second character is a '^', it's magic. */
1226         if (p[0] == '^') {
1227                 *t++ = *p++;
1228                 --len;
1229         }
1230
1231         /*
1232          * Escape every other magic character we can find, meanwhile stripping
1233          * the backslashes ctags inserts when escaping the search delimiter
1234          * characters.
1235          */
1236         for (; len > 0; --len) {
1237                 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1238                         ++p;
1239                         --len;
1240                 } else if (strchr("^.[]$*", p[0]))
1241                         *t++ = '\\';
1242                 *t++ = *p++;
1243         }
1244         if (lastdollar)
1245                 *t++ = '$';
1246
1247         *ptrnp = bp;
1248         *plenp = t - bp;
1249         return (0);
1250 }
1251
1252 /*
1253  * re_cscope_conv --
1254  *       Convert a cscope search path into something that the POSIX
1255  *      1003.2 RE functions can handle.
1256  */
1257 static int
1258 re_cscope_conv(sp, ptrnp, plenp, replacedp)
1259         SCR *sp;
1260         CHAR_T **ptrnp;
1261         size_t *plenp;
1262         int *replacedp;
1263 {
1264         size_t blen, len, nspaces;
1265         CHAR_T *bp, *t;
1266         CHAR_T *p;
1267         CHAR_T *wp;
1268         size_t wlen;
1269
1270         /*
1271          * Each space in the source line printed by cscope represents an
1272          * arbitrary sequence of spaces, tabs, and comments.
1273          */
1274 #define CSCOPE_RE_SPACE         "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1275 #define CSCOPE_LEN      sizeof(CSCOPE_RE_SPACE) - 1
1276         CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1277         for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1278                 if (*p == ' ')
1279                         ++nspaces;
1280
1281         /*
1282          * Allocate plenty of space:
1283          *      the string, plus potential escaping characters;
1284          *      nspaces + 2 copies of CSCOPE_RE_SPACE;
1285          *      ^, $, nul terminator characters.
1286          */
1287         *replacedp = 1;
1288         len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1289         GET_SPACE_RETW(sp, bp, blen, len);
1290
1291         p = *ptrnp;
1292         t = bp;
1293
1294         *t++ = '^';
1295         MEMCPYW(t, wp, wlen);
1296         t += wlen;
1297
1298         for (len = *plenp; len > 0; ++p, --len)
1299                 if (*p == ' ') {
1300                         MEMCPYW(t, wp, wlen);
1301                         t += wlen;
1302                 } else {
1303                         if (strchr("\\^.[]$*+?()|{}", *p))
1304                                 *t++ = '\\';
1305                         *t++ = *p;
1306                 }
1307
1308         MEMCPYW(t, wp, wlen);
1309         t += wlen;
1310         *t++ = '$';
1311
1312         *ptrnp = bp;
1313         *plenp = t - bp;
1314         return (0);
1315 }
1316
1317 /*
1318  * re_error --
1319  *      Report a regular expression error.
1320  *
1321  * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1322  */
1323 void
1324 re_error(sp, errcode, preg)
1325         SCR *sp;
1326         int errcode;
1327         regex_t *preg;
1328 {
1329         size_t s;
1330         char *oe;
1331
1332         s = regerror(errcode, preg, "", 0);
1333         if ((oe = malloc(s)) == NULL)
1334                 msgq(sp, M_SYSERR, NULL);
1335         else {
1336                 (void)regerror(errcode, preg, oe, s);
1337                 msgq(sp, M_ERR, "RE error: %s", oe);
1338                 free(oe);
1339         }
1340 }
1341
1342 /*
1343  * re_sub --
1344  *      Do the substitution for a regular expression.
1345  */
1346 static int
1347 re_sub(sp, ip, lbp, lbclenp, lblenp, match)
1348         SCR *sp;
1349         CHAR_T *ip;                     /* Input line. */
1350         CHAR_T **lbp;
1351         size_t *lbclenp, *lblenp;
1352         regmatch_t match[10];
1353 {
1354         enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
1355         size_t lbclen, lblen;           /* Local copies. */
1356         size_t mlen;                    /* Match length. */
1357         size_t rpl;                     /* Remaining replacement length. */
1358         CHAR_T *rp;                     /* Replacement pointer. */
1359         int ch;
1360         int no;                         /* Match replacement offset. */
1361         CHAR_T *p, *t;                  /* Buffer pointers. */
1362         CHAR_T *lb;                     /* Local copies. */
1363
1364         lb = *lbp;                      /* Get local copies. */
1365         lbclen = *lbclenp;
1366         lblen = *lblenp;
1367
1368         /*
1369          * QUOTING NOTE:
1370          *
1371          * There are some special sequences that vi provides in the
1372          * replacement patterns.
1373          *       & string the RE matched (\& if nomagic set)
1374          *      \# n-th regular subexpression
1375          *      \E end \U, \L conversion
1376          *      \e end \U, \L conversion
1377          *      \l convert the next character to lower-case
1378          *      \L convert to lower-case, until \E, \e, or end of replacement
1379          *      \u convert the next character to upper-case
1380          *      \U convert to upper-case, until \E, \e, or end of replacement
1381          *
1382          * Otherwise, since this is the lowest level of replacement, discard
1383          * all escaping characters.  This (hopefully) matches historic practice.
1384          */
1385 #define OUTCH(ch, nltrans) {                                            \
1386         CHAR_T __ch = (ch);                                             \
1387         u_int __value = KEY_VAL(sp, __ch);                              \
1388         if (nltrans && (__value == K_CR || __value == K_NL)) {          \
1389                 NEEDNEWLINE(sp);                                        \
1390                 sp->newl[sp->newl_cnt++] = lbclen;                      \
1391         } else if (conv != C_NOTSET) {                                  \
1392                 switch (conv) {                                         \
1393                 case C_ONELOWER:                                        \
1394                         conv = C_NOTSET;                                \
1395                         /* FALLTHROUGH */                               \
1396                 case C_LOWER:                                           \
1397                         if (isupper(__ch))                              \
1398                                 __ch = tolower(__ch);                   \
1399                         break;                                          \
1400                 case C_ONEUPPER:                                        \
1401                         conv = C_NOTSET;                                \
1402                         /* FALLTHROUGH */                               \
1403                 case C_UPPER:                                           \
1404                         if (islower(__ch))                              \
1405                                 __ch = toupper(__ch);                   \
1406                         break;                                          \
1407                 default:                                                \
1408                         abort();                                        \
1409                 }                                                       \
1410         }                                                               \
1411         NEEDSP(sp, 1, p);                                               \
1412         *p++ = __ch;                                                    \
1413         ++lbclen;                                                       \
1414 }
1415         conv = C_NOTSET;
1416         for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1417                 switch (ch = *rp++) {
1418                 case '&':
1419                         if (O_ISSET(sp, O_MAGIC)) {
1420                                 no = 0;
1421                                 goto subzero;
1422                         }
1423                         break;
1424                 case '\\':
1425                         if (rpl == 0)
1426                                 break;
1427                         --rpl;
1428                         switch (ch = *rp) {
1429                         case '&':
1430                                 ++rp;
1431                                 if (!O_ISSET(sp, O_MAGIC)) {
1432                                         no = 0;
1433                                         goto subzero;
1434                                 }
1435                                 break;
1436                         case '0': case '1': case '2': case '3': case '4':
1437                         case '5': case '6': case '7': case '8': case '9':
1438                                 no = *rp++ - '0';
1439 subzero:                        if (match[no].rm_so == -1 ||
1440                                     match[no].rm_eo == -1)
1441                                         break;
1442                                 mlen = match[no].rm_eo - match[no].rm_so;
1443                                 for (t = ip + match[no].rm_so; mlen--; ++t)
1444                                         OUTCH(*t, 0);
1445                                 continue;
1446                         case 'e':
1447                         case 'E':
1448                                 ++rp;
1449                                 conv = C_NOTSET;
1450                                 continue;
1451                         case 'l':
1452                                 ++rp;
1453                                 conv = C_ONELOWER;
1454                                 continue;
1455                         case 'L':
1456                                 ++rp;
1457                                 conv = C_LOWER;
1458                                 continue;
1459                         case 'u':
1460                                 ++rp;
1461                                 conv = C_ONEUPPER;
1462                                 continue;
1463                         case 'U':
1464                                 ++rp;
1465                                 conv = C_UPPER;
1466                                 continue;
1467                         default:
1468                                 ++rp;
1469                                 break;
1470                         }
1471                 }
1472                 OUTCH(ch, 1);
1473         }
1474
1475         *lbp = lb;                      /* Update caller's information. */
1476         *lbclenp = lbclen;
1477         *lblenp = lblen;
1478         return (0);
1479 }