Src/subst.c

   1 /*
   2  * subst.c - various substitutions
   3  *
   4  * This file is part of zsh, the Z shell.
   5  *
   6  * Copyright (c) 1992-1997 Paul Falstad
   7  * All rights reserved.
   8  *
   9  * Permission is hereby granted, without written agreement and without
  10  * license or royalty fees, to use, copy, modify, and distribute this
  11  * software and to distribute modified versions of this software for any
  12  * purpose, provided that the above copyright notice and the following
  13  * two paragraphs appear in all copies of this software.
  14  *
  15  * In no event shall Paul Falstad or the Zsh Development Group be liable
  16  * to any party for direct, indirect, special, incidental, or consequential
  17  * damages arising out of the use of this software and its documentation,
  18  * even if Paul Falstad and the Zsh Development Group have been advised of
  19  * the possibility of such damage.
  20  *
  21  * Paul Falstad and the Zsh Development Group specifically disclaim any
  22  * warranties, including, but not limited to, the implied warranties of
  23  * merchantability and fitness for a particular purpose.  The software
  24  * provided hereunder is on an "as is" basis, and Paul Falstad and the
  25  * Zsh Development Group have no obligation to provide maintenance,
  26  * support, updates, enhancements, or modifications.
  27  *
  28  */
  29
  30 #include "zsh.mdh"
  31 #include "subst.pro"
  32
  33 #define LF_ARRAY        1
  34
  35 /**/
  36 char nulstring[] = {Nularg, '\0'};
  37
  38 /* Do substitutions before fork. These are:
  39  *  - Process substitution: <(...), >(...), =(...)
  40  *  - Parameter substitution
  41  *  - Command substitution
  42  * Followed by
  43  *  - Quote removal
  44  *  - Brace expansion
  45  *  - Tilde and equals substitution
  46  *
  47  * PF_* flags are defined in zsh.h
  48  */
  49
  50 /**/
  51 mod_export void
  52 prefork(LinkList list, int flags)
  53 {
  54     LinkNode node, stop = 0;
  55     int keep = 0, asssub = (flags & PF_TYPESET) && isset(KSHTYPESET);
  56
  57     queue_signals();
  58     for (node = firstnode(list); node; incnode(node)) {
  59         if (isset(SHFILEEXPANSION)) {
  60             /*
  61              * Here and below we avoid taking the address
  62              * of a void * and then pretending it's a char **
  63              * instead of a void ** by a little inefficiency.
  64              * This could be avoided with some extra linked list
  65              * machinery, but that would need quite a lot of work
  66              * to ensure consistency.  What we really need is
  67              * templates...
  68              */
  69             char *cptr = (char *)getdata(node);
  70             filesub(&cptr, flags & (PF_TYPESET|PF_ASSIGN));
  71             /*
  72              * The assignment is so simple it's not worth
  73              * testing if cptr changed...
  74              */
  75             setdata(node, cptr);
  76         }
  77         if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) {
  78             unqueue_signals();
  79             return;
  80         }
  81     }
  82     for (node = firstnode(list); node; incnode(node)) {
  83         if (node == stop)
  84             keep = 0;
  85         if (*(char *)getdata(node)) {
  86             remnulargs(getdata(node));
  87             if (unset(IGNOREBRACES) && !(flags & PF_SINGLE)) {
  88                 if (!keep)
  89                     stop = nextnode(node);
  90                 while (hasbraces(getdata(node))) {
  91                     keep = 1;
  92                     xpandbraces(list, &node);
  93                 }
  94             }
  95             if (unset(SHFILEEXPANSION)) {
  96                 char *cptr = (char *)getdata(node);
  97                 filesub(&cptr, flags & (PF_TYPESET|PF_ASSIGN));
  98                 setdata(node, cptr);
  99             }
 100         } else if (!(flags & PF_SINGLE) && !keep)
 101             uremnode(list, node);
 102         if (errflag) {
 103             unqueue_signals();
 104             return;
 105         }
 106     }
 107     unqueue_signals();
 108 }
 109
 110 /*
 111  * Perform $'...' quoting.  The arguments are
 112  *   strstart   The start of the string
 113  *   pstrdpos   Initially, *pstrdpos is the position where the $ of the $'
 114  *              occurs.  It will be updated to the next character after the
 115  *              last ' of the $'...'.
 116  * The return value is the entire allocated string from strstart on the heap.
 117  * Note the original string may be modified in the process.
 118  */
 119 /**/
 120 static char *
 121 stringsubstquote(char *strstart, char **pstrdpos)
 122 {
 123     int len;
 124     char *strdpos = *pstrdpos, *strsub, *strret;
 125
 126     strsub = getkeystring(strdpos+2, &len,
 127                           GETKEYS_DOLLARS_QUOTE, NULL);
 128     len += 2;                   /* measured from strdpos */
 129
 130     if (strstart != strdpos) {
 131         *strdpos = '\0';
 132         if (strdpos[len])
 133             strret = zhtricat(strstart, strsub, strdpos + len);
 134         else
 135             strret = dyncat(strstart, strsub);
 136     } else if (strdpos[len])
 137         strret = dyncat(strsub, strdpos + len);
 138     else
 139         strret = strsub;
 140
 141     *pstrdpos = strret + (strdpos - strstart) + strlen(strsub);
 142
 143     return strret;
 144 }
 145
 146 /**/
 147 static LinkNode
 148 stringsubst(LinkList list, LinkNode node, int ssub, int asssub)
 149 {
 150     int qt;
 151     char *str3 = (char *)getdata(node);
 152     char *str  = str3, c;
 153
 154     while (!errflag && (c = *str)) {
 155         if (((c = *str) == Inang || c == OutangProc ||
 156              (str == str3 && c == Equals))
 157             && str[1] == Inpar) {
 158             char *subst, *rest, *snew, *sptr;
 159             int str3len = str - str3, sublen, restlen;
 160
 161             if (c == Inang || c == OutangProc)
 162                 subst = getproc(str, &rest);    /* <(...) or >(...) */
 163             else
 164                 subst = getoutputfile(str, &rest);      /* =(...) */
 165             if (!subst)
 166                 subst = "";
 167
 168             sublen = strlen(subst);
 169             restlen = strlen(rest);
 170             sptr = snew = hcalloc(str3len + sublen + restlen + 1);
 171             if (str3len) {
 172                 memcpy(sptr, str3, str3len);
 173                 sptr += str3len;
 174             }
 175             if (sublen) {
 176                 memcpy(sptr, subst, sublen);
 177                 sptr += sublen;
 178             }
 179             if (restlen)
 180                 memcpy(sptr, rest, restlen);
 181             sptr[restlen] = '\0';
 182             str3 = snew;
 183             str = snew + str3len + sublen;
 184             setdata(node, str3);
 185         } else
 186             str++;
 187     }
 188     str = str3;
 189
 190     while (!errflag && (c = *str)) {
 191         if ((qt = c == Qstring) || c == String) {
 192             if ((c = str[1]) == Inpar) {
 193                 if (!qt)
 194                     list->list.flags |= LF_ARRAY;
 195                 str++;
 196                 goto comsub;
 197             } else if (c == Inbrack) {
 198                 /* $[...] */
 199                 char *str2 = str;
 200                 str2++;
 201                 if (skipparens(Inbrack, Outbrack, &str2)) {
 202                     zerr("closing bracket missing");
 203                     return NULL;
 204                 }
 205                 str2[-1] = *str = '\0';
 206                 str = arithsubst(str + 2, &str3, str2);
 207                 setdata(node, (void *) str3);
 208                 continue;
 209             } else if (c == Snull) {
 210                 str3 = stringsubstquote(str3, &str);
 211                 setdata(node, (void *) str3);
 212                 continue;
 213             } else {
 214                 node = paramsubst(list, node, &str, qt, ssub);
 215                 if (errflag || !node)
 216                     return NULL;
 217                 str3 = (char *)getdata(node);
 218                 continue;
 219             }
 220         } else if ((qt = c == Qtick) || (c == Tick ? (list->list.flags |= LF_ARRAY) : 0))
 221           comsub: {
 222             LinkList pl;
 223             char *s, *str2 = str;
 224             char endchar;
 225             int l1, l2;
 226
 227             if (c == Inpar) {
 228                 endchar = Outpar;
 229                 str[-1] = '\0';
 230 #ifdef DEBUG
 231                 if (skipparens(Inpar, Outpar, &str))
 232                     dputs("BUG: parse error in command substitution");
 233 #else
 234                 skipparens(Inpar, Outpar, &str);
 235 #endif
 236                 str--;
 237             } else {
 238                 endchar = c;
 239                 *str = '\0';
 240
 241                 while (*++str != endchar)
 242                     DPUTS(!*str, "BUG: parse error in command substitution");
 243             }
 244             *str++ = '\0';
 245             if (endchar == Outpar && str2[1] == '(' && str[-2] == ')') {
 246                 /* Math substitution of the form $((...)) */
 247                 str[-2] = '\0';
 248                 str = arithsubst(str2 + 2, &str3, str);
 249                 setdata(node, (void *) str3);
 250                 continue;
 251             }
 252
 253             /* It is a command substitution, which will be parsed again   *
 254              * by the lexer, so we untokenize it first, but we cannot use *
 255              * untokenize() since in the case of `...` some Bnulls should *
 256              * be left unchanged.  Note that the lexer doesn't tokenize   *
 257              * the body of a command substitution so if there are some    *
 258              * tokens here they are from a ${(e)~...} substitution.       */
 259             for (str = str2; (c = *++str); )
 260                 if (itok(c) && c != Nularg &&
 261                     !(endchar != Outpar && c == Bnull &&
 262                       (str[1] == '$' || str[1] == '\\' || str[1] == '`' ||
 263                        (qt && str[1] == '"'))))
 264                     *str = ztokens[c - Pound];
 265             str++;
 266             if (!(pl = getoutput(str2 + 1, qt || ssub))) {
 267                 zerr("parse error in command substitution");
 268                 return NULL;
 269             }
 270             if (endchar == Outpar)
 271                 str2--;
 272             if (!(s = (char *) ugetnode(pl))) {
 273                 str = strcpy(str2, str);
 274                 continue;
 275             }
 276             if (!qt && ssub && isset(GLOBSUBST))
 277                 shtokenize(s);
 278             l1 = str2 - str3;
 279             l2 = strlen(s);
 280             if (nonempty(pl)) {
 281                 LinkNode n = lastnode(pl);
 282                 str2 = (char *) hcalloc(l1 + l2 + 1);
 283                 strcpy(str2, str3);
 284                 strcpy(str2 + l1, s);
 285                 setdata(node, str2);
 286                 insertlinklist(pl, node, list);
 287                 s = (char *) getdata(node = n);
 288                 l1 = 0;
 289                 l2 = strlen(s);
 290             }
 291             str2 = (char *) hcalloc(l1 + l2 + strlen(str) + 1);
 292             if (l1)
 293                 strcpy(str2, str3);
 294             strcpy(str2 + l1, s);
 295             str = strcpy(str2 + l1 + l2, str);
 296             str3 = str2;
 297             setdata(node, str3);
 298             continue;
 299         } else if (asssub && ((c == '=') || c == Equals) && str != str3) {
 300             /*
 301              * We are in a normal argument which looks like an assignment
 302              * and is to be treated like one, with no word splitting.
 303              */
 304             ssub = 1;
 305         }
 306         str++;
 307     }
 308     return errflag ? NULL : node;
 309 }
 310
 311 /*
 312  * Simplified version of the prefork/singsub processing where
 313  * we only do substitutions appropriate to quoting.  Currently
 314  * this means only the expansions in $'....'.  This is used
 315  * for the end tag for here documents.  As we are not doing
 316  * `...` expansions, we just use those for quoting.  However,
 317  * they stay in the text.  This is weird, but that's not
 318  * my fault.
 319  *
 320  * The remnulargs() makes this consistent with the other forms
 321  * of substitution, indicating that quotes have been fully
 322  * processed.
 323  *
 324  * The fully processed string is returned.
 325  */
 326
 327 /**/
 328 char *
 329 quotesubst(char *str)
 330 {
 331     char *s = str;
 332
 333     while (*s) {
 334         if (*s == String && s[1] == Snull) {
 335             str = stringsubstquote(str, &s);
 336         } else {
 337             s++;
 338         }
 339     }
 340     remnulargs(str);
 341     return str;
 342 }
 343
 344 /**/
 345 mod_export void
 346 globlist(LinkList list, int nountok)
 347 {
 348     LinkNode node, next;
 349
 350     badcshglob = 0;
 351     for (node = firstnode(list); !errflag && node; node = next) {
 352         next = nextnode(node);
 353         zglob(list, node, nountok);
 354     }
 355     if (badcshglob == 1)
 356         zerr("no match");
 357 }
 358
 359 /* perform substitution on a single word */
 360
 361 /**/
 362 mod_export void
 363 singsub(char **s)
 364 {
 365     local_list1(foo);
 366
 367     init_list1(foo, *s);
 368
 369     prefork(&foo, PF_SINGLE);
 370     if (errflag)
 371         return;
 372     *s = (char *) ugetnode(&foo);
 373     DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!");
 374 }
 375
 376 /* Perform substitution on a single word, *s. Unlike with singsub(), the
 377  * result can be more than one word. If split is non-zero, the string is
 378  * first word-split using IFS, but only for non-quoted "whitespace" (as
 379  * indicated by Dnull, Snull, Tick, Bnull, Inpar, and Outpar).
 380  *
 381  * If arg "a" was non-NULL and we got an array as a result of the parsing,
 382  * the strings are stored in *a (even for a 1-element array) and *isarr is
 383  * set to 1.  Otherwise, *isarr is set to 0, and the result is put into *s,
 384  * with any necessary joining of multiple elements using sep (which can be
 385  * NULL to use IFS).  The return value is true iff the expansion resulted
 386  * in an empty list. */
 387
 388 /**/
 389 static int
 390 multsub(char **s, int split, char ***a, int *isarr, char *sep)
 391 {
 392     int l;
 393     char **r, **p, *x = *s;
 394     local_list1(foo);
 395
 396     if (split) {
 397         /*
 398          * This doesn't handle multibyte characters, but we're
 399          * looking for whitespace separators which must be ASCII.
 400          */
 401         for ( ; *x; x += l) {
 402             char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
 403             l++;
 404             if (!iwsep(STOUC(c)))
 405                 break;
 406         }
 407     }
 408
 409     init_list1(foo, x);
 410
 411     if (split) {
 412         LinkNode n = firstnode(&foo);
 413         int inq = 0, inp = 0;
 414         MB_METACHARINIT();
 415         for ( ; *x; x += l) {
 416             int rawc = -1;
 417             convchar_t c;
 418             if (itok(STOUC(*x))) {
 419                 /* token, can't be separator, must be single byte */
 420                 rawc = *x;
 421                 l = 1;
 422             } else {
 423                 l = MB_METACHARLENCONV(x, &c);
 424                 if (!inq && !inp && WC_ZISTYPE(c, ISEP)) {
 425                     *x = '\0';
 426                     for (x += l; *x; x += l) {
 427                         if (itok(STOUC(*x))) {
 428                             /* as above */
 429                             rawc = *x;
 430                             l = 1;
 431                             break;
 432                         }
 433                         l = MB_METACHARLENCONV(x, &c);
 434                         if (!WC_ZISTYPE(c, ISEP))
 435                             break;
 436                     }
 437                     if (!*x)
 438                         break;
 439                     insertlinknode(&foo, n, (void *)x), incnode(n);
 440                 }
 441             }
 442             switch (rawc) {
 443             case Dnull:  /* " */
 444             case Snull:  /* ' */
 445             case Tick:   /* ` (note: no Qtick!) */
 446                 /* These always occur in unnested pairs. */
 447                 inq = !inq;
 448                 break;
 449             case Inpar:  /* ( */
 450                 inp++;
 451                 break;
 452             case Outpar: /* ) */
 453                 inp--;
 454                 break;
 455             case Bnull:  /* \ */
 456             case Bnullkeep:
 457                 /* The parser verified the following char's existence. */
 458                 x += l;
 459                 l = MB_METACHARLEN(x);
 460                 break;
 461             }
 462         }
 463     }
 464
 465     prefork(&foo, 0);
 466     if (errflag) {
 467         if (isarr)
 468             *isarr = 0;
 469         return 0;
 470     }
 471
 472     if ((l = countlinknodes(&foo)) > 1 || (foo.list.flags & LF_ARRAY && a)) {
 473         p = r = hcalloc((l + 1) * sizeof(char*));
 474         while (nonempty(&foo))
 475             *p++ = (char *)ugetnode(&foo);
 476         *p = NULL;
 477         /* We need a way to figure out if a one-item result was a scalar
 478          * or a single-item array.  The parser will have set LF_ARRAY
 479          * in the latter case, allowing us to return it as an array to
 480          * our caller (if they provided for that result). */
 481         if (a && (l > 1 || foo.list.flags & LF_ARRAY)) {
 482             *a = r;
 483             *isarr = SCANPM_MATCHMANY;
 484             return 0;
 485         }
 486         *s = sepjoin(r, sep, 1);
 487         if (isarr)
 488             *isarr = 0;
 489         return 0;
 490     }
 491     if (l)
 492         *s = (char *) ugetnode(&foo);
 493     else
 494         *s = dupstring("");
 495     if (isarr)
 496         *isarr = 0;
 497     return !l;
 498 }
 499
 500 /*
 501  * ~, = subs: assign & PF_TYPESET => typeset or magic equals
 502  *            assign & PF_ASSIGN => normal assignment
 503  */
 504
 505 /**/
 506 mod_export void
 507 filesub(char **namptr, int assign)
 508 {
 509     char *eql = NULL, *sub = NULL, *str, *ptr;
 510     int len;
 511
 512     filesubstr(namptr, assign);
 513
 514     if (!assign)
 515         return;
 516
 517     if (assign & PF_TYPESET) {
 518         if ((*namptr)[1] && (eql = sub = strchr(*namptr + 1, Equals))) {
 519             str = sub + 1;
 520             if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) {
 521                 sub[1] = '\0';
 522                 *namptr = dyncat(*namptr, str);
 523             }
 524         } else
 525             return;
 526     }
 527
 528     ptr = *namptr;
 529     while ((sub = strchr(ptr, ':'))) {
 530         str = sub + 1;
 531         len = sub - *namptr;
 532         if (sub > eql &&
 533             (sub[1] == Tilde || sub[1] == Equals) &&
 534             filesubstr(&str, assign)) {
 535             sub[1] = '\0';
 536             *namptr = dyncat(*namptr, str);
 537         }
 538         ptr = *namptr + len + 1;
 539     }
 540 }
 541
 542 /**/
 543 mod_export int
 544 filesubstr(char **namptr, int assign)
 545 {
 546 #define isend(c) ( !(c) || (c)=='/' || (c)==Inpar || (assign && (c)==':') )
 547 #define isend2(c) ( !(c) || (c)==Inpar || (assign && (c)==':') )
 548     char *str = *namptr;
 549
 550     if (*str == Tilde && str[1] != '=' && str[1] != Equals) {
 551         Shfunc dirfunc;
 552         char *ptr, *tmp, *res, *ptr2;
 553         int val;
 554
 555         val = zstrtol(str + 1, &ptr, 10);
 556         if (isend(str[1])) {   /* ~ */
 557             *namptr = dyncat(home ? home : "", str + 1);
 558             return 1;
 559         } else if (str[1] == '+' && isend(str[2])) {   /* ~+ */
 560             *namptr = dyncat(pwd, str + 2);
 561             return 1;
 562         } else if (str[1] == '-' && isend(str[2])) {   /* ~- */
 563             *namptr = dyncat((tmp = oldpwd) ? tmp : pwd, str + 2);
 564             return 1;
 565         } else if (str[1] == Inbrack &&
 566                    (dirfunc = getshfunc("zsh_directory_name")) &&
 567                    (ptr2 = strchr(str+2, Outbrack))) {
 568             char **arr;
 569             untokenize(tmp = dupstrpfx(str+2, ptr2 - (str+2)));
 570             remnulargs(tmp);
 571             arr = subst_string_by_func(dirfunc, "n", tmp);
 572             res = arr ? *arr : NULL;
 573             if (res) {
 574                 *namptr = dyncat(res, ptr2+1);
 575                 return 1;
 576             }
 577             if (isset(NOMATCH))
 578                 zerr("no directory expansion: ~[%s]", tmp);
 579             return 0;
 580         } else if (!inblank(str[1]) && isend(*ptr) &&
 581                    (!idigit(str[1]) || (ptr - str < 4))) {
 582             char *ds;
 583
 584             if (val < 0)
 585                 val = -val;
 586             ds = dstackent(str[1], val);
 587             if (!ds)
 588                 return 0;
 589             *namptr = dyncat(ds, ptr);
 590             return 1;
 591         } else if ((ptr = itype_end(str+1, IUSER, 0)) != str+1) {   /* ~foo */
 592             char *hom, save;
 593
 594             save = *ptr;
 595             if (!isend(save))
 596                 return 0;
 597             *ptr = 0;
 598             if (!(hom = getnameddir(++str))) {
 599                 if (isset(NOMATCH))
 600                     zerr("no such user or named directory: %s", str);
 601                 *ptr = save;
 602                 return 0;
 603             }
 604             *ptr = save;
 605             *namptr = dyncat(hom, ptr);
 606             return 1;
 607         }
 608     } else if (*str == Equals && isset(EQUALS) && str[1]) {   /* =foo */
 609         char *pp, *cnam, *cmdstr, *str1 = str+1;
 610
 611         for (pp = str1; !isend2(*pp); pp++)
 612             ;
 613         cmdstr = dupstrpfx(str1, pp-str1);
 614         untokenize(cmdstr);
 615         remnulargs(cmdstr);
 616         if (!(cnam = findcmd(cmdstr, 1))) {
 617             if (isset(NOMATCH))
 618                 zerr("%s not found", cmdstr);
 619             return 0;
 620         }
 621         *namptr = dupstring(cnam);
 622         if (*pp)
 623             *namptr = dyncat(*namptr, pp);
 624         return 1;
 625     }
 626     return 0;
 627 #undef isend
 628 #undef isend2
 629 }
 630
 631 /**/
 632 static char *
 633 strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub,
 634           int copied)
 635 {
 636     char *dest;
 637     int pl = pe - pb;
 638
 639     if (!pl && (!s || !*s)) {
 640         *d = dest = (copied ? src : dupstring(src));
 641         if (glbsub)
 642             shtokenize(dest);
 643     } else {
 644         *d = dest = hcalloc(pl + l + (s ? strlen(s) : 0) + 1);
 645         strncpy(dest, pb, pl);
 646         dest += pl;
 647         strcpy(dest, src);
 648         if (glbsub)
 649             shtokenize(dest);
 650         dest += l;
 651         if (s)
 652             strcpy(dest, s);
 653     }
 654     return dest;
 655 }
 656
 657 /*
 658  * Pad the string str, returning a result from the heap (or str itself,
 659  * if it didn't need padding).  If str is too large, it will be truncated.
 660  * Calculations are in terms of width if MULTIBYTE is in effect and
 661  * multi_width is non-zero, else characters.
 662  *
 663  * prenum and postnum are the width to which the string needs padding
 664  * on the left and right.
 665  *
 666  * preone and postone are string to insert once only before and after
 667  * str.  They will be truncated on the left or right, respectively,
 668  * if necessary to fit the width.  Either or both may be NULL in which
 669  * case they will not be used.
 670  *
 671  * premul and postmul are the padding strings to be repeated before
 672  * on the left (if prenum is non-zero) and right (if postnum is non-zero).  If
 673  * NULL the first character of IFS (typically but not necessarily a space)
 674  * will be used.
 675  */
 676
 677 static char *
 678 dopadding(char *str, int prenum, int postnum, char *preone, char *postone,
 679           char *premul, char *postmul
 680 #ifdef MULTIBYTE_SUPPORT
 681           , int multi_width
 682 #endif
 683     )
 684 {
 685 #ifdef MULTIBYTE_SUPPORT
 686 #define WCPADWIDTH(cchar)       (multi_width ? WCWIDTH(cchar) : 1)
 687 #else
 688 #define WCPADWIDTH(cchar)       (1)
 689 #endif
 690
 691     char *def, *ret, *t, *r;
 692     int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc, cl;
 693     convchar_t cchar;
 694
 695     MB_METACHARINIT();
 696     if (*ifs)
 697         def = dupstrpfx(ifs, MB_METACHARLEN(ifs));
 698     else
 699         def = "";
 700     if (preone && !*preone)
 701         preone = def;
 702     if (postone && !*postone)
 703         postone = def;
 704     if (!premul || !*premul)
 705         premul = def;
 706     if (!postmul || !*postmul)
 707         postmul = def;
 708
 709     ls = MB_METASTRLEN2(str, multi_width);
 710     lpreone = preone ? MB_METASTRLEN2(preone, multi_width) : 0;
 711     lpostone = postone ? MB_METASTRLEN2(postone, multi_width) : 0;
 712     lpremul = MB_METASTRLEN2(premul, multi_width);
 713     lpostmul = MB_METASTRLEN2(postmul, multi_width);
 714
 715     if (prenum + postnum == ls)
 716         return str;
 717
 718     /*
 719      * Try to be careful with allocated lengths.  The following
 720      * is a maximum, in case we need the entire repeated string
 721      * for each repetition.  We probably don't, but in case the user
 722      * has given us something pathological which doesn't convert
 723      * easily into a width we'd better be safe.
 724      */
 725     lr = strlen(str) + strlen(premul) * prenum + strlen(postmul) * postnum;
 726     /*
 727      * Same logic for preone and postone, except those may be NULL.
 728      */
 729     if (preone)
 730         lr += strlen(preone);
 731     if (postone)
 732         lr += strlen(postone);
 733     r = ret = (char *)zhalloc(lr + 1);
 734
 735     if (prenum) {
 736         /*
 737          * Pad on the left.
 738          */
 739         if (postnum) {
 740             /*
 741              * Pad on both right and left.
 742              * The strategy is to divide the string into two halves.
 743              * The first half is dealt with by the left hand padding
 744              * code, the second by the right hand.
 745              */
 746             ls2 = ls / 2;
 747
 748             /* The width left to pad for the first half. */
 749             f = prenum - ls2;
 750             if (f <= 0) {
 751                 /* First half doesn't fit.  Skip the first -f width. */
 752                 f = -f;
 753                 MB_METACHARINIT();
 754                 while (f > 0) {
 755                     str += MB_METACHARLENCONV(str, &cchar);
 756                     f -= WCPADWIDTH(cchar);
 757                 }
 758                 /* Now finish the first half. */
 759                 for (c = prenum; c > 0; ) {
 760                     cl = MB_METACHARLENCONV(str, &cchar);
 761                     while (cl--)
 762                         *r++ = *str++;
 763                     c -= WCPADWIDTH(cchar);
 764                 }
 765             } else {
 766                 if (f <= lpreone) {
 767                     if (preone) {
 768                         /*
 769                          * The unrepeated string doesn't fit.
 770                          */
 771                         MB_METACHARINIT();
 772                         /* The width we need to skip */
 773                         f = lpreone - f;
 774                         /* So skip. */
 775                         for (t = preone; f > 0; ) {
 776                             t += MB_METACHARLENCONV(t, &cchar);
 777                             f -= WCPADWIDTH(cchar);
 778                         }
 779                         /* Then copy the entire remainder. */
 780                         while (*t)
 781                             *r++ = *t++;
 782                     }
 783                 } else {
 784                     f -= lpreone;
 785                     if (lpremul) {
 786                         if ((m = f % lpremul)) {
 787                             /*
 788                              * Left over fraction of repeated string.
 789                              */
 790                             MB_METACHARINIT();
 791                             /* Skip this much. */
 792                             m = lpremul - m;
 793                             for (t = premul; m > 0; ) {
 794                                 t += MB_METACHARLENCONV(t, &cchar);
 795                                 m -= WCPADWIDTH(cchar);
 796                             }
 797                             /* Output the rest. */
 798                             while (*t)
 799                                 *r++ = *t++;
 800                         }
 801                         for (cc = f / lpremul; cc--;) {
 802                             /* Repeat the repeated string */
 803                             MB_METACHARINIT();
 804                             for (c = lpremul, t = premul; c > 0; ) {
 805                                 cl = MB_METACHARLENCONV(t, &cchar);
 806                                 while (cl--)
 807                                     *r++ = *t++;
 808                                 c -= WCPADWIDTH(cchar);
 809                             }
 810                         }
 811                     }
 812                     if (preone) {
 813                         /* Output the full unrepeated string */
 814                         while (*preone)
 815                             *r++ = *preone++;
 816                     }
 817                 }
 818                 /* Output the first half width of the original string. */
 819                 for (c = ls2; c > 0; ) {
 820                     cl = MB_METACHARLENCONV(str, &cchar);
 821                     c -= WCPADWIDTH(cchar);
 822                     while (cl--)
 823                         *r++ = *str++;
 824                 }
 825             }
 826             /* Other half.  In case the string had an odd length... */
 827             ls2 = ls - ls2;
 828             /* Width that needs padding... */
 829             f = postnum - ls2;
 830             if (f <= 0) {
 831                 /* ...is negative, truncate original string */
 832                 MB_METACHARINIT();
 833                 for (c = postnum; c > 0; ) {
 834                     cl = MB_METACHARLENCONV(str, &cchar);
 835                     c -= WCPADWIDTH(cchar);
 836                     while (cl--)
 837                         *r++ = *str++;
 838                 }
 839             } else {
 840                 /* Rest of original string fits, output it complete */
 841                 while (*str)
 842                     *r++ = *str++;
 843                 if (f <= lpostone) {
 844                     if (postone) {
 845                         /* Can't fit unrepeated string, truncate it */
 846                         for (c = f; c > 0; ) {
 847                             cl = MB_METACHARLENCONV(postone, &cchar);
 848                             c -= WCPADWIDTH(cchar);
 849                             while (cl--)
 850                                 *r++ = *postone++;
 851                         }
 852                     }
 853                 } else {
 854                     if (postone) {
 855                         f -= lpostone;
 856                         /* Output entire unrepeated string */
 857                         while (*postone)
 858                             *r++ = *postone++;
 859                     }
 860                     if (lpostmul) {
 861                         for (cc = f / lpostmul; cc--;) {
 862                             /* Begin the beguine */
 863                             for (t = postmul; *t; )
 864                                 *r++ = *t++;
 865                         }
 866                         if ((m = f % lpostmul)) {
 867                             /* Fill leftovers with chunk of repeated string */
 868                             MB_METACHARINIT();
 869                             while (m > 0) {
 870                                 cl = MB_METACHARLENCONV(postmul, &cchar);
 871                                 m -= WCPADWIDTH(cchar);
 872                                 while (cl--)
 873                                     *r++ = *postmul++;
 874                             }
 875                         }
 876                     }
 877                 }
 878             }
 879         } else {
 880             /*
 881              * Pad only on the left.
 882              */
 883             f = prenum - ls;
 884             if (f <= 0) {
 885                 /*
 886                  * Original string is at least as wide as padding.
 887                  * Truncate original string to width.
 888                  * Truncate on left, so skip the characters we
 889                  * don't need.
 890                  */
 891                 f = -f;
 892                 MB_METACHARINIT();
 893                 while (f > 0) {
 894                     str += MB_METACHARLENCONV(str, &cchar);
 895                     f -= WCPADWIDTH(cchar);
 896                 }
 897                 /* Copy the rest of the original string */
 898                 for (c = prenum; c > 0; ) {
 899                     cl = MB_METACHARLENCONV(str, &cchar);
 900                     while (cl--)
 901                         *r++ = *str++;
 902                     c -= WCPADWIDTH(cchar);
 903                 }
 904             } else {
 905                 /*
 906                  * We can fit the entire string...
 907                  */
 908                 if (f <= lpreone) {
 909                     if (preone) {
 910                         /*
 911                          * ...with some fraction of the unrepeated string.
 912                          */
 913                         /* We need this width of characters. */
 914                         c = f;
 915                         /*
 916                          * We therefore need to skip this width of
 917                          * characters.
 918                          */
 919                         f = lpreone - f;
 920                         MB_METACHARINIT();
 921                         for (t = preone; f > 0; ) {
 922                             t += MB_METACHARLENCONV(t, &cchar);
 923                             f -= WCPADWIDTH(cchar);
 924                         }
 925                         /* Copy the rest of preone */
 926                         while (*t)
 927                             *r++ = *t++;
 928                     }
 929                 } else {
 930                     /*
 931                      * We can fit the whole of preone, needing this width
 932                      * first
 933                      */
 934                     f -= lpreone;
 935                     if (lpremul) {
 936                         if ((m = f % lpremul)) {
 937                             /*
 938                              * Some fraction of the repeated string needed.
 939                              */
 940                             /* Need this much... */
 941                             c = m;
 942                             /* ...skipping this much first. */
 943                             m = lpremul - m;
 944                             MB_METACHARINIT();
 945                             for (t = premul; m > 0; ) {
 946                                 t += MB_METACHARLENCONV(t, &cchar);
 947                                 m -= WCPADWIDTH(cchar);
 948                             }
 949                             /* Now the rest of the repeated string. */
 950                             while (c > 0) {
 951                                 cl = MB_METACHARLENCONV(t, &cchar);
 952                                 while (cl--)
 953                                     *r++ = *t++;
 954                                 c -= WCPADWIDTH(cchar);
 955                             }
 956                         }
 957                         for (cc = f / lpremul; cc--;) {
 958                             /*
 959                              * Repeat the repeated string.
 960                              */
 961                             MB_METACHARINIT();
 962                             for (c = lpremul, t = premul; c > 0; ) {
 963                                 cl = MB_METACHARLENCONV(t, &cchar);
 964                                 while (cl--)
 965                                     *r++ = *t++;
 966                                 c -= WCPADWIDTH(cchar);
 967                             }
 968                         }
 969                     }
 970                     if (preone) {
 971                         /*
 972                          * Now the entire unrepeated string.  Don't
 973                          * count the width, just dump it.  This is
 974                          * significant if there are special characters
 975                          * in this string.  It's sort of a historical
 976                          * accident that this worked, but there's nothing
 977                          * to stop us just dumping the thing out and assuming
 978                          * the user knows what they're doing.
 979                          */
 980                         while (*preone)
 981                             *r++ = *preone++;
 982                     }
 983                 }
 984                 /* Now the string being padded */
 985                 while (*str)
 986                     *r++ = *str++;
 987             }
 988         }
 989     } else if (postnum) {
 990         /*
 991          * Pad on the right.
 992          */
 993         f = postnum - ls;
 994         MB_METACHARINIT();
 995         if (f <= 0) {
 996             /*
 997              * Original string is at least as wide as padding.
 998              * Truncate original string to width.
 999              */
1000             for (c = postnum; c > 0; ) {
1001                 cl = MB_METACHARLENCONV(str, &cchar);
1002                 while (cl--)
1003                     *r++ = *str++;
1004                 c -= WCPADWIDTH(cchar);
1005             }
1006         } else {
1007             /*
1008              * There's some space to fill.  First copy the original
1009              * string, counting the width.  Make sure we copy the
1010              * entire string.
1011              */
1012             for (c = ls; *str; ) {
1013                 cl = MB_METACHARLENCONV(str, &cchar);
1014                 while (cl--)
1015                     *r++ = *str++;
1016                 c -= WCPADWIDTH(cchar);
1017             }
1018             MB_METACHARINIT();
1019             if (f <= lpostone) {
1020                 if (postone) {
1021                     /*
1022                      * Not enough or only just enough space to fit
1023                      * the unrepeated string.  Truncate as necessary.
1024                      */
1025                     for (c = f; c > 0; ) {
1026                         cl = MB_METACHARLENCONV(postone, &cchar);
1027                         while (cl--)
1028                             *r++ = *postone++;
1029                         c -= WCPADWIDTH(cchar);
1030                     }
1031                 }
1032             } else {
1033                 if (postone) {
1034                     f -= lpostone;
1035                     /* Copy the entire unrepeated string */
1036                     for (c = lpostone; *postone; ) {
1037                         cl = MB_METACHARLENCONV(postone, &cchar);
1038                         while (cl--)
1039                             *r++ = *postone++;
1040                         c -= WCPADWIDTH(cchar);
1041                     }
1042                 }
1043                 if (lpostmul) {
1044                     /* Repeat the repeated string */
1045                     for (cc = f / lpostmul; cc--;) {
1046                         MB_METACHARINIT();
1047                         for (c = lpostmul, t = postmul; *t; ) {
1048                             cl = MB_METACHARLENCONV(t, &cchar);
1049                             while (cl--)
1050                                 *r++ = *t++;
1051                             c -= WCPADWIDTH(cchar);
1052                         }
1053                     }
1054                     /*
1055                      * See if there's any fraction of the repeated
1056                      * string needed to fill up the remaining space.
1057                      */
1058                     if ((m = f % lpostmul)) {
1059                         MB_METACHARINIT();
1060                         while (m > 0) {
1061                             cl = MB_METACHARLENCONV(postmul, &cchar);
1062                             while (cl--)
1063                                 *r++ = *postmul++;
1064                             m -= WCPADWIDTH(cchar);
1065                         }
1066                     }
1067                 }
1068             }
1069         }
1070     }
1071     *r = '\0';
1072
1073     return ret;
1074 }
1075
1076
1077 /*
1078  * Look for a delimited portion of a string.  The first (possibly
1079  * multibyte) character at s is the delimiter.  Various forms
1080  * of brackets are treated separately, as documented.
1081  *
1082  * Returns a pointer to the final delimiter.  Sets *len to the
1083  * length of the final delimiter; a NULL causes *len to be set
1084  * to zero since we shouldn't advance past it.  (The string is
1085  * tokenized, so a NULL is a real end of string.)
1086  */
1087
1088 /**/
1089 char *
1090 get_strarg(char *s, int *lenp)
1091 {
1092     convchar_t del;
1093     int len;
1094     char tok = 0;
1095
1096     MB_METACHARINIT();
1097     len = MB_METACHARLENCONV(s, &del);
1098     if (!len) {
1099         *lenp = 0;
1100         return s;
1101     }
1102
1103 #ifdef MULTIBYTE_SUPPORT
1104     if (del == WEOF)
1105         del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
1106 #endif
1107     s += len;
1108     switch (del) {
1109     case ZWC('('):
1110         del = ZWC(')');
1111         break;
1112     case '[':
1113         del = ZWC(']');
1114         break;
1115     case '{':
1116         del = ZWC('}');
1117         break;
1118     case '<':
1119         del = ZWC('>');
1120         break;
1121     case Inpar:
1122         tok = Outpar;
1123         break;
1124     case Inang:
1125         tok = Outang;
1126         break;
1127     case Inbrace:
1128         tok = Outbrace;
1129         break;
1130     case Inbrack:
1131         tok = Outbrack;
1132         break;
1133     }
1134
1135     if (tok) {
1136         /*
1137          * Looking for a matching token; we want the literal byte,
1138          * not a decoded multibyte character, so search specially.
1139          */
1140         while (*s && *s != tok)
1141             s++;
1142     } else {
1143         convchar_t del2;
1144         len = 0;
1145         while (*s) {
1146             len = MB_METACHARLENCONV(s, &del2);
1147 #ifdef MULTIBYTE_SUPPORT
1148             if (del2 == WEOF)
1149                 del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
1150 #endif
1151             if (del == del2)
1152                 break;
1153             s += len;
1154         }
1155     }
1156
1157     *lenp = len;
1158     return s;
1159 }
1160
1161 /*
1162  * Get an integer argument; update *s to the end of the
1163  * final delimiter.  *delmatchp is set to the length of the
1164  * matched delimiter if we have matching, delimiters and there was no error in
1165  * the evaluation, else 0.
1166  */
1167
1168 /**/
1169 static int
1170 get_intarg(char **s, int *delmatchp)
1171 {
1172     int arglen;
1173     char *t = get_strarg(*s, &arglen);
1174     char *p, sav;
1175     zlong ret;
1176
1177     *delmatchp = 0;
1178     if (!*t)
1179         return -1;
1180     sav = *t;
1181     *t = '\0';
1182     p = dupstring(*s + arglen);
1183     *s = t + arglen;
1184     *t = sav;
1185     if (parsestr(p))
1186         return -1;
1187     singsub(&p);
1188     if (errflag)
1189         return -1;
1190     ret = mathevali(p);
1191     if (errflag)
1192         return -1;
1193     if (ret < 0)
1194         ret = -ret;
1195     *delmatchp = arglen;
1196     return ret < 0 ? -ret : ret;
1197 }
1198
1199 /* Parsing for the (e) flag. */
1200
1201 static int
1202 subst_parse_str(char **sp, int single, int err)
1203 {
1204     char *s;
1205
1206     *sp = s = dupstring(*sp);
1207
1208     if (!(err ? parsestr(s) : parsestrnoerr(s))) {
1209         if (!single) {
1210             int qt = 0;
1211
1212             for (; *s; s++)
1213                 if (!qt) {
1214                     if (*s == Qstring)
1215                         *s = String;
1216                     else if (*s == Qtick)
1217                         *s = Tick;
1218                 } else if (*s == Dnull)
1219                     qt = !qt;
1220         }
1221         return 0;
1222     }
1223     return 1;
1224 }
1225
1226 /* Evaluation for (#) flag */
1227
1228 static char *
1229 substevalchar(char *ptr)
1230 {
1231     zlong ires = mathevali(ptr);
1232     int len = 0;
1233
1234     if (errflag)
1235         return NULL;
1236 #ifdef MULTIBYTE_SUPPORT
1237     if (isset(MULTIBYTE) && ires > 127) {
1238         /* '\\' + 'U' + 8 bytes of character + '\0' */
1239         char buf[11];
1240
1241         /* inefficient: should separate out \U handling from getkeystring */
1242         sprintf(buf, "\\U%.8x", (unsigned int)ires & 0xFFFFFFFFu);
1243         ptr = getkeystring(buf, &len, GETKEYS_BINDKEY, NULL);
1244     }
1245     if (len == 0)
1246 #endif
1247     {
1248         ptr = zhalloc(2);
1249         len = 1;
1250         sprintf(ptr, "%c", (int)ires);
1251     }
1252     return metafy(ptr, len, META_USEHEAP);
1253 }
1254
1255 /*
1256  * Helper function for arguments to parameter flags which
1257  * handles the (p) and (~) flags as escapes and tok_arg respectively.
1258  */
1259
1260 static char *
1261 untok_and_escape(char *s, int escapes, int tok_arg)
1262 {
1263     int klen;
1264     char *dst;
1265
1266     untokenize(dst = dupstring(s));
1267     if (escapes) {
1268         dst = getkeystring(dst, &klen, GETKEYS_SEP, NULL);
1269         dst = metafy(dst, klen, META_HREALLOC);
1270     }
1271     if (tok_arg)
1272         shtokenize(dst);
1273     return dst;
1274 }
1275
1276 /* parameter substitution */
1277
1278 #define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring)
1279 #define isbrack(c)  ((c) == '[' || (char)(c) == Inbrack)
1280
1281 /*
1282  * Given a linked list l with node n, perform parameter substitution
1283  * starting from *str.  Return the node with the substitutuion performed
1284  * or NULL if it failed.
1285  *
1286  * If qt is true, the `$' was quoted.  TODO: why can't we just look
1287  * to see if the first character was String or Qstring?
1288  *
1289  * If ssub is true, we are being called via singsubst(), which means
1290  * the result will be a single word.  TODO: can we generate the
1291  * single word at the end?  TODO: if not, or maybe in any case,
1292  * can we pass down the ssub flag from prefork with the other flags
1293  * instead of pushing it into different arguments?  (How exactly
1294  * to qt and ssub differ?  Are both necessary, if so is there some
1295  * better way of separating the two?)
1296  */
1297
1298 /**/
1299 static LinkNode
1300 paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
1301 {
1302     char *aptr = *str, c, cc;
1303     char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n);
1304     int colf;                   /* != 0 means we found a colon after the name */
1305     /*
1306      * There are far too many flags.  They need to be grouped
1307      * together into some structure which ties them to where they
1308      * came from.
1309      *
1310      * Some flags have a an obscure relationship to their effect which
1311      * depends on incrementing them to particular values in particular
1312      * ways.
1313      */
1314     /*
1315      * Whether the value is an array (in aval) or not (in val).  There's
1316      * a movement from storing the value in the stuff read from the
1317      * parameter (the value v) to storing them in val and aval.
1318      * However, sometimes you find v reappearing temporarily.
1319      *
1320      * The values -1 and 2 are special to isarr.  The value -1 is used
1321      * to force us to keep an empty array.  It's tested in the YUK chunk
1322      * (I mean the one explicitly marked as such).  The value 2
1323      * indicates an array has come from splitting a scalar.  We use
1324      * that to override the usual rule that in double quotes we don't
1325      * remove empty elements (so "${(s.:):-foo::bar}" produces two
1326      * words).  This seems to me to be quite the wrong thing to do,
1327      * but it looks like code may be relying on it.  So we require (@)
1328      * as well before we keep the empty fields (look for assignments
1329      * like "isarr = nojoin ? 1 : 2").
1330      */
1331     int isarr = 0;
1332     /*
1333      * This is just the setting of the option except we need to
1334      * take account of ^ and ^^.
1335      */
1336     int plan9 = isset(RCEXPANDPARAM);
1337     /*
1338      * Likwise, but with ~ and ~~.  Also, we turn it off later
1339      * on if qt is passed down.
1340      */
1341     int globsubst = isset(GLOBSUBST);
1342     /*
1343      * Indicates ${(#)...}.
1344      */
1345     int evalchar = 0;
1346     /*
1347      * Indicates ${#pm}, massaged by whichlen which is set by
1348      * the (c), (w), and (W) flags to indicate how we take the length.
1349      */
1350     int getlen = 0;
1351     int whichlen = 0;
1352     /*
1353      * Indicates ${+pm}: a simple boolean for once.
1354      */
1355     int chkset = 0;
1356     /*
1357      * Indicates we have tried to get a value in v but that was
1358      * unset.  I don't quite understand why (v == NULL) isn't
1359      * good enough, but there are places where we seem to need
1360      * to second guess whether a value is a real value or not.
1361      */
1362     int vunset = 0;
1363     /*
1364      * Indicates (t) flag, i.e. print out types.  The code for
1365      * this actually isn't too horrifically inbred compared with
1366      * that for (P).
1367      */
1368     int wantt = 0;
1369     /*
1370      * Indicates spliting a string into an array.  There aren't
1371      * actually that many special cases for this --- which may
1372      * be why it doesn't work properly; we split in some cases
1373      * where we shouldn't, in particular on the multsubs for
1374      * handling embedded values for ${...=...} and the like.
1375      */
1376     int spbreak = isset(SHWORDSPLIT) && !ssub && !qt;
1377     /* Scalar and array value, see isarr above */
1378     char *val = NULL, **aval = NULL;
1379     /*
1380      * vbuf and v are both used to retrieve parameter values; this
1381      * is a kludge, we pass down vbuf and it may or may not return v.
1382      */
1383     struct value vbuf;
1384     Value v = NULL;
1385     /*
1386      * This expressive name refers to the set of flags which
1387      * is applied to matching for #, %, / and their doubled variants:
1388      * (M), (R), (B), (E), (N), (S).
1389      */
1390     int flags = 0;
1391     /* Value from (I) flag, used for ditto. */
1392     int flnum = 0;
1393     /*
1394      * sortit is to be passed to strmetasort().
1395      * indord is the (a) flag, which for consistency doesn't get
1396      * combined into sortit.
1397      */
1398     int sortit = SORTIT_ANYOLDHOW, indord = 0;
1399     /* (u): straightforward. */
1400     int unique = 0;
1401     /* combination of (L), (U) and (C) flags. */
1402     int casmod = CASMOD_NONE;
1403     /*
1404      * quotemod says we are doing either (q) (positive), (Q) (negative)
1405      * or not (0).  quotetype counts the q's for the first case.
1406      * quoterr is simply (X) but gets passed around a lot because the
1407      * combination (eX) needs it.
1408      */
1409     int quotemod = 0, quotetype = QT_NONE, quoteerr = 0;
1410     /*
1411      * (V) flag: fairly straightforward, except that as with so
1412      * many flags it's not easy to decide where to put it in the order.
1413      */
1414     int visiblemod = 0;
1415     /*
1416      * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied
1417      * spbreak, see above; fairly straighforward in use but c.f.
1418      * the comment for visiblemod.
1419      */
1420     int shsplit = 0;
1421     /*
1422      * The separator from (j) and (s) respectively, or (F) and (f)
1423      * respectively (hardwired to "\n" in that case).  Slightly
1424      * confusingly also used for ${#pm}, thought that's at least
1425      * documented in the manual
1426      */
1427     char *sep = NULL, *spsep = NULL;
1428     /*
1429      * Padding strings.  The left and right padding strings which
1430      * are repeated, then the ones which only occur once, for
1431      * the (l) and (r) flags.
1432      */
1433     char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL;
1434     /* Replacement string for /orig/repl and //orig/repl */
1435     char *replstr = NULL;
1436     /* The numbers for (l) and (r) */
1437     zlong prenum = 0, postnum = 0;
1438 #ifdef MULTIBYTE_SUPPORT
1439     /* The (m) flag: use width of multibyte characters */
1440     int multi_width = 0;
1441 #endif
1442     /*
1443      * Whether the value has been copied.  Optimisation:  if we
1444      * are modifying an expression, we only need to copy it the
1445      * first time, and if we don't modify it we can just use the
1446      * value from the parameter or input.
1447      */
1448     int copied = 0;
1449     /*
1450      * The (A) flag for array assignment, with consequences for
1451      * splitting and joining; (AA) gives arrasg == 2 for associative
1452      * arrays.
1453      */
1454     int arrasg = 0;
1455     /*
1456      * The (e) flag.  As we need to do extra work not quite
1457      * at the end, the effect of this is kludged in in several places.
1458      */
1459     int eval = 0;
1460     /*
1461      * The (P) flag.  This interacts a bit obscurely with whether
1462      * or not we are dealing with a sub expression (subexp).
1463      */
1464     int aspar = 0;
1465     /*
1466      * The (%) flag, c.f. visiblemod again.
1467      */
1468     int presc = 0;
1469     /*
1470      * The (@) flag; interacts obscurely with qt and isarr.
1471      * This is one of the things that decides whether multsub
1472      * will produce an array, but in an extremely indirect fashion.
1473      */
1474     int nojoin = 0;
1475     /*
1476      * != 0 means ${...}, otherwise $...  What works without braces
1477      * is largely a historical artefact (everything works with braces,
1478      * I sincerely hope).
1479      */
1480     char inbrace = 0;
1481     /*
1482      * Use for the (k) flag.  Goes down into the parameter code,
1483      * sometimes.
1484      */
1485     char hkeys = 0;
1486     /*
1487      * Used for the (v) flag, ditto.  Not quite sure why they're
1488      * separate, but the tradition seems to be that things only
1489      * get combined when that makes the result more obscure rather
1490      * than less.
1491      */
1492     char hvals = 0;
1493     /*
1494      * Whether we had to evaluate a subexpression, i.e. an
1495      * internal ${...} or $(...) or plain $pm.  We almost don't
1496      * need to remember this (which would be neater), but the (P)
1497      * flag means the subexp and !subexp code is obscurely combined,
1498      * and the argument passing to fetchvalue has another kludge.
1499      */
1500     int subexp;
1501
1502     *s++ = '\0';
1503     /*
1504      * Nothing to do unless the character following the $ is
1505      * something we recognise.
1506      *
1507      * Shouldn't this be a table or something?  We test for all
1508      * these later on, too.
1509      */
1510     c = *s;
1511     if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound &&
1512         c != '-' && c != '!' && c != '$' && c != String && c != Qstring &&
1513         c != '?' && c != Quest &&
1514         c != '*' && c != Star && c != '@' && c != '{' &&
1515         c != Inbrace && c != '=' && c != Equals && c != Hat &&
1516         c != '^' && c != '~' && c != Tilde && c != '+') {
1517         s[-1] = '$';
1518         *str = s;
1519         return n;
1520     }
1521     DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()");
1522     /*
1523      * Extra processing if there is an opening brace: mostly
1524      * flags in parentheses, but also one ksh hack.
1525      */
1526     if (c == Inbrace) {
1527         inbrace = 1;
1528         s++;
1529         /*
1530          * In ksh emulation a leading `!' is a special flag working
1531          * sort of like our (k).
1532          * TODO: this is one of very few cases tied directly to
1533          * the emulation mode rather than an option.  Since ksh
1534          * doesn't have parameter flags it might be neater to
1535          * handle this with the ^, =, ~ stuff, below.
1536          */
1537         if ((c = *s) == '!' && s[1] != Outbrace && EMULATION(EMULATE_KSH)) {
1538             hkeys = SCANPM_WANTKEYS;
1539             s++;
1540         } else if (c == '(' || c == Inpar) {
1541             char *t, sav;
1542             int tt = 0;
1543             zlong num;
1544             /*
1545              * The (p) flag is only remembered within
1546              * this block.  It says we do print-style handling
1547              * on the values for flags, but only on those.
1548              */
1549             int escapes = 0;
1550             /*
1551              * '~' in parentheses caused tokenization of string arg:
1552              * similar to (p).
1553              */
1554             int tok_arg = 0;
1555
1556             for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) {
1557                 int arglen;     /* length of modifier argument */
1558                 int dellen;     /* length of matched delimiter, 0 if not */
1559                 char *del0;     /* pointer to initial delimiter */
1560
1561                 switch (c) {
1562                 case ')':
1563                 case Outpar:
1564                     break;
1565                 case '~':
1566                 case Tilde:
1567                     tok_arg = !tok_arg;
1568                     break;
1569                 case 'A':
1570                     ++arrasg;
1571                     break;
1572                 case '@':
1573                     nojoin = 1;
1574                     break;
1575                 case 'M':
1576                     flags |= SUB_MATCH;
1577                     break;
1578                 case 'R':
1579                     flags |= SUB_REST;
1580                     break;
1581                 case 'B':
1582                     flags |= SUB_BIND;
1583                     break;
1584                 case 'E':
1585                     flags |= SUB_EIND;
1586                     break;
1587                 case 'N':
1588                     flags |= SUB_LEN;
1589                     break;
1590                 case 'S':
1591                     flags |= SUB_SUBSTR;
1592                     break;
1593                 case 'I':
1594                     s++;
1595                     flnum = get_intarg(&s, &dellen);
1596                     if (flnum < 0)
1597                         goto flagerr;
1598                     s--;
1599                     break;
1600
1601                 case 'L':
1602                     casmod = CASMOD_LOWER;
1603                     break;
1604                 case 'U':
1605                     casmod = CASMOD_UPPER;
1606                     break;
1607                 case 'C':
1608                     casmod = CASMOD_CAPS;
1609                     break;
1610
1611                 case 'o':
1612                     if (!sortit)
1613                         sortit |= SORTIT_SOMEHOW; /* sort, no modifiers */
1614                     break;
1615                 case 'O':
1616                     sortit |= SORTIT_BACKWARDS;
1617                     break;
1618                 case 'i':
1619                     sortit |= SORTIT_IGNORING_CASE;
1620                     break;
1621                 case 'n':
1622                     sortit |= SORTIT_NUMERICALLY;
1623                     break;
1624                 case 'a':
1625                     sortit |= SORTIT_SOMEHOW;
1626                     indord = 1;
1627                     break;
1628
1629                 case 'V':
1630                     visiblemod++;
1631                     break;
1632
1633                 case 'q':
1634                     quotemod++, quotetype++;
1635                     break;
1636                 case 'Q':
1637                     quotemod--;
1638                     break;
1639                 case 'X':
1640                     quoteerr = 1;
1641                     break;
1642
1643                 case 'e':
1644                     eval = 1;
1645                     break;
1646                 case 'P':
1647                     aspar = 1;
1648                     break;
1649
1650                 case 'c':
1651                     whichlen = 1;
1652                     break;
1653                 case 'w':
1654                     whichlen = 2;
1655                     break;
1656                 case 'W':
1657                     whichlen = 3;
1658                     break;
1659
1660                 case 'f':
1661                     spsep = "\n";
1662                     break;
1663                 case 'F':
1664                     sep = "\n";
1665                     break;
1666
1667                 case '0':
1668                     spsep = zhalloc(3);
1669                     spsep[0] = Meta;
1670                     spsep[1] = '\0' ^ 32;
1671                     spsep[2] = '\0';
1672                     break;
1673
1674                 case 's':
1675                     tt = 1;
1676                 /* fall through */
1677                 case 'j':
1678                     t = get_strarg(++s, &arglen);
1679                     if (*t) {
1680                         sav = *t;
1681                         *t = '\0';
1682                         if (tt)
1683                             spsep = untok_and_escape(s + arglen,
1684                                                      escapes, tok_arg);
1685                         else
1686                             sep = untok_and_escape(s + arglen,
1687                                                    escapes, tok_arg);
1688                         *t = sav;
1689                         s = t + arglen - 1;
1690                     } else
1691                         goto flagerr;
1692                     break;
1693
1694                 case 'l':
1695                     tt = 1;
1696                 /* fall through */
1697                 case 'r':
1698                     s++;
1699                     /* delimiter position */
1700                     del0 = s;
1701                     num = get_intarg(&s, &dellen);
1702                     if (num < 0)
1703                         goto flagerr;
1704                     if (tt)
1705                         prenum = num;
1706                     else
1707                         postnum = num;
1708                     /* must have same delimiter if more arguments */
1709                     if (!dellen || memcmp(del0, s, dellen)) {
1710                         /* decrement since loop will increment */
1711                         s--;
1712                         break;
1713                     }
1714                     t = get_strarg(s, &arglen);
1715                     if (!*t)
1716                         goto flagerr;
1717                     sav = *t;
1718                     *t = '\0';
1719                     if (tt)
1720                         premul = untok_and_escape(s + arglen, escapes,
1721                                                   tok_arg);
1722                     else
1723                         postmul = untok_and_escape(s + arglen, escapes,
1724                                                    tok_arg);
1725                     *t = sav;
1726                     sav = *s;
1727                     s = t + arglen;
1728                     /* again, continue only if another start delimiter */
1729                     if (memcmp(del0, s, dellen)) {
1730                         /* decrement since loop will increment */
1731                         s--;
1732                         break;
1733                     }
1734                     t = get_strarg(s, &arglen);
1735                     if (!*t)
1736                         goto flagerr;
1737                     sav = *t;
1738                     *t = '\0';
1739                     if (tt)
1740                         preone = untok_and_escape(s + arglen,
1741                                                   escapes, tok_arg);
1742                     else
1743                         postone = untok_and_escape(s + arglen,
1744                                                    escapes, tok_arg);
1745                     *t = sav;
1746                     /* -1 since loop will increment */
1747                     s = t + arglen - 1;
1748                     break;
1749
1750                 case 'm':
1751 #ifdef MULTIBYTE_SUPPORT
1752                     multi_width = 1;
1753 #endif
1754                     break;
1755
1756                 case 'p':
1757                     escapes = 1;
1758                     break;
1759
1760                 case 'k':
1761                     hkeys = SCANPM_WANTKEYS;
1762                     break;
1763                 case 'v':
1764                     hvals = SCANPM_WANTVALS;
1765                     break;
1766
1767                 case 't':
1768                     wantt = 1;
1769                     break;
1770
1771                 case '%':
1772                     presc++;
1773                     break;
1774
1775                 case 'z':
1776                     shsplit = 1;
1777                     break;
1778
1779                 case 'u':
1780                     unique = 1;
1781                     break;
1782
1783                 case '#':
1784                 case Pound:
1785                     evalchar = 1;
1786                     break;
1787
1788                 default:
1789                   flagerr:
1790                     zerr("error in flags");
1791                     return NULL;
1792                 }
1793             }
1794             s++;
1795         }
1796     }
1797
1798     /*
1799      * premul, postmul specify the padding character to be used
1800      * multiple times with the (l) and (r) flags respectively.
1801      */
1802     if (!premul)
1803         premul = " ";
1804     if (!postmul)
1805         postmul = " ";
1806
1807     /*
1808      * Look for special unparenthesised flags.
1809      * TODO: could make these able to appear inside parentheses, too,
1810      * i.e. ${(^)...} etc.
1811      */
1812     for (;;) {
1813         if ((c = *s) == '^' || c == Hat) {
1814             /* RC_EXPAND_PARAM on or off (doubled )*/
1815             if ((c = *++s) == '^' || c == Hat) {
1816                 plan9 = 0;
1817                 s++;
1818             } else
1819                 plan9 = 1;
1820         } else if ((c = *s) == '=' || c == Equals) {
1821             /* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */
1822             if ((c = *++s) == '=' || c == Equals) {
1823                 spbreak = 0;
1824                 s++;
1825             } else
1826                 spbreak = 2;
1827         } else if ((c == '#' || c == Pound) &&
1828                    (itype_end(s+1, IIDENT, 0) != s + 1
1829                     || (cc = s[1]) == '*' || cc == Star || cc == '@'
1830                     || cc == '-' || (cc == ':' && s[2] == '-')
1831                     || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
1832             getlen = 1 + whichlen, s++;
1833             /*
1834              * Return the length of the parameter.
1835              * getlen can be more than 1 to indicate characters (2),
1836              * words ignoring multiple delimiters (3), words taking
1837              * account of multiple delimiters.  delimiter is in
1838              * spsep, NULL means $IFS.
1839              */
1840         } else if (c == '~' || c == Tilde) {
1841             /* GLOB_SUBST on or off (doubled) */
1842             if ((c = *++s) == '~' || c == Tilde) {
1843                 globsubst = 0;
1844                 s++;
1845             } else
1846                 globsubst = 1;
1847         } else if (c == '+') {
1848             /*
1849              * Return whether indicated parameter is set.
1850              * Try to handle this when parameter is named
1851              * by (P) (second part of test).
1852              */
1853             if (itype_end(s+1, IIDENT, 0) != s+1 || (aspar && isstring(s[1]) &&
1854                                  (s[2] == Inbrace || s[2] == Inpar)))
1855                 chkset = 1, s++;
1856             else if (!inbrace) {
1857                 /* Special case for `$+' on its own --- leave unmodified */
1858                 *aptr = '$';
1859                 *str = aptr + 1;
1860                 return n;
1861             } else {
1862                 zerr("bad substitution");
1863                 return NULL;
1864             }
1865         } else if (inbrace && inull(*s)) {
1866             /*
1867              * Handles things like ${(f)"$(<file)"} by skipping
1868              * the double quotes.  We don't need to know what was
1869              * actually there; the presence of a String or Qstring
1870              * is good enough.
1871              */
1872             s++;
1873         } else
1874             break;
1875     }
1876     /* Don't activate special pattern characters if inside quotes */
1877     globsubst = globsubst && !qt;
1878
1879     /*
1880      * At this point, we usually expect a parameter name.
1881      * However, there may be a nested ${...} or $(...).
1882      * These say that the parameter itself is somewhere inside,
1883      * or that there isn't a parameter and we will get the values
1884      * from a command substitution itself.  In either case,
1885      * the current instance of paramsubst() doesn't fetch a value,
1886      * it just operates on what gets passed up.
1887      * (The first ought to have been {...}, reserving ${...}
1888      * for substituting a value at that point, but it's too late now.)
1889      */
1890     idbeg = s;
1891     if ((subexp = (inbrace && s[-1] && isstring(*s) &&
1892                    (s[1] == Inbrace || s[1] == Inpar)))) {
1893         int sav;
1894         int quoted = *s == Qstring;
1895
1896         val = s++;
1897         skipparens(*s, *s == Inpar ? Outpar : Outbrace, &s);
1898         sav = *s;
1899         *s = 0;
1900         /*
1901          * This handles arrays.  TODO: this is not the most obscure call to
1902          * multsub() (see below) but even so it would be nicer to pass down
1903          * and back the arrayness more rationally.  In that case, we should
1904          * remove the aspar test and extract a value from an array, if
1905          * necessary, when we handle (P) lower down.
1906          */
1907         if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL) && quoted) {
1908             /* Empty quoted string --- treat as null string, not elided */
1909             isarr = -1;
1910             aval = (char **) hcalloc(sizeof(char *));
1911             aspar = 0;
1912         } else if (aspar)
1913             idbeg = val;
1914         *s = sav;
1915         /*
1916          * This tests for the second double quote in an expression
1917          * like ${(f)"$(<file)"}, compare above.
1918          */
1919         while (inull(*s))
1920             s++;
1921         v = (Value) NULL;
1922     } else if (aspar) {
1923         /*
1924          * No subexpression, but in any case the value is going
1925          * to give us the name of a parameter on which we do
1926          * our remaining processing.  In other words, this
1927          * makes ${(P)param} work like ${(P)${param}}.  (Probably
1928          * better looked at, this is the basic code for ${(P)param}
1929          * and it's been kludged into the subexp code because no
1930          * opportunity for a kludge has been neglected.)
1931          */
1932         if ((v = fetchvalue(&vbuf, &s, 1, (qt ? SCANPM_DQUOTED : 0)))) {
1933             val = idbeg = getstrvalue(v);
1934             subexp = 1;
1935         } else
1936             vunset = 1;
1937     }
1938     /*
1939      * We need to retrieve a value either if we haven't already
1940      * got it from a subexpression, or if the processing so
1941      * far has just yielded us a parameter name to be processed
1942      * with (P).
1943      */
1944     if (!subexp || aspar) {
1945         char *ov = val;
1946
1947         /*
1948          * Second argument: decide whether to use the subexpression or
1949          *   the string next on the line as the parameter name.
1950          * Third argument:  decide how processing for brackets
1951          *   1 means full processing
1952          *   -1 appears to mean something along the lines of
1953          *     only handle single digits and don't handle brackets.
1954          *     I *think* (but it's really only a guess) that this
1955          *     is used by the test below the wantt handling, so
1956          *     that in certain cases we handle brackets there.
1957          *   0 would apparently mean something like we know we
1958          *     should have the name of a scalar and we get cross
1959          *     if there's anything present which disagrees with that
1960          * but you will search fetchvalue() in vain for comments on this.
1961          * Fourth argument gives flags to do with keys, values, quoting,
1962          * assigning depending on context and parameter flags.
1963          *
1964          * This is the last mention of subexp, so presumably this
1965          * is what the code which makes sure subexp is set if aspar (the
1966          * (P) flag) is set.  I *think* what's going on here is the
1967          * second argument is for both input and output: with
1968          * subexp, we only want the input effect, whereas normally
1969          * we let fetchvalue set the main string pointer s to
1970          * the end of the bit it's fetched.
1971          */
1972         if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
1973                              (wantt ? -1 :
1974                               ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
1975                              hkeys|hvals|
1976                              (arrasg ? SCANPM_ASSIGNING : 0)|
1977                              (qt ? SCANPM_DQUOTED : 0))) ||
1978             (v->pm && (v->pm->node.flags & PM_UNSET)) ||
1979             (v->flags & VALFLAG_EMPTY))
1980             vunset = 1;
1981
1982         if (wantt) {
1983             /*
1984              * Handle the (t) flag: value now becomes the type
1985              * information for the parameter.
1986              */
1987             if (v && v->pm && !(v->pm->node.flags & PM_UNSET)) {
1988                 int f = v->pm->node.flags;
1989
1990                 switch (PM_TYPE(f)) {
1991                 case PM_SCALAR:  val = "scalar"; break;
1992                 case PM_ARRAY:   val = "array"; break;
1993                 case PM_INTEGER: val = "integer"; break;
1994                 case PM_EFLOAT:
1995                 case PM_FFLOAT:  val = "float"; break;
1996                 case PM_HASHED:  val = "association"; break;
1997                 }
1998                 val = dupstring(val);
1999                 if (v->pm->level)
2000                     val = dyncat(val, "-local");
2001                 if (f & PM_LEFT)
2002                     val = dyncat(val, "-left");
2003                 if (f & PM_RIGHT_B)
2004                     val = dyncat(val, "-right_blanks");
2005                 if (f & PM_RIGHT_Z)
2006                     val = dyncat(val, "-right_zeros");
2007                 if (f & PM_LOWER)
2008                     val = dyncat(val, "-lower");
2009                 if (f & PM_UPPER)
2010                     val = dyncat(val, "-upper");
2011                 if (f & PM_READONLY)
2012                     val = dyncat(val, "-readonly");
2013                 if (f & PM_TAGGED)
2014                     val = dyncat(val, "-tag");
2015                 if (f & PM_EXPORTED)
2016                     val = dyncat(val, "-export");
2017                 if (f & PM_UNIQUE)
2018                     val = dyncat(val, "-unique");
2019                 if (f & PM_HIDE)
2020                     val = dyncat(val, "-hide");
2021                 if (f & PM_HIDE)
2022                     val = dyncat(val, "-hideval");
2023                 if (f & PM_SPECIAL)
2024                     val = dyncat(val, "-special");
2025                 vunset = 0;
2026             } else
2027                 val = dupstring("");
2028
2029             v = NULL;
2030             isarr = 0;
2031         }
2032     }
2033     /*
2034      * We get in here two ways; either we need to convert v into
2035      * the local value system, or we need to get rid of brackets
2036      * even if there isn't a v.
2037      */
2038     while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) {
2039         if (!v) {
2040             /*
2041              * Index applied to non-existent parameter; we may or may
2042              * not have a value to index, however.  Create a temporary
2043              * empty parameter as a trick, and index on that.  This
2044              * usually happens the second time around the loop when
2045              * we've used up the original parameter value and want to
2046              * apply a subscript to what's left.  However, it's also
2047              * possible it's got something to do with some of that murky
2048              * passing of -1's as the third argument to fetchvalue() to
2049              * inhibit bracket parsing at that stage.
2050              */
2051             Param pm;
2052             char *os = s;
2053
2054             if (!isbrack(*s))
2055                 break;
2056             if (vunset) {
2057                 val = dupstring("");
2058                 isarr = 0;
2059             }
2060             pm = createparam(nulstring, isarr ? PM_ARRAY : PM_SCALAR);
2061             DPUTS(!pm, "BUG: parameter not created");
2062             if (isarr)
2063                 pm->u.arr = aval;
2064             else
2065                 pm->u.str = val;
2066             v = (Value) hcalloc(sizeof *v);
2067             v->isarr = isarr;
2068             v->pm = pm;
2069             v->end = -1;
2070             if (getindex(&s, v, qt ? SCANPM_DQUOTED : 0) || s == os)
2071                 break;
2072         }
2073         /*
2074          * This is where we extract a value (we know now we have
2075          * one) into the local parameters for a scalar (val) or
2076          * array (aval) value.  TODO: move val and aval into
2077          * a structure with a discriminator.  Hope we can make
2078          * more things array values at this point and dearrayify later.
2079          * v->isarr tells us whether the stuff from down below looks
2080          * like an array.
2081          *
2082          * I think we get to discard the existing value of isarr
2083          * here because it's already been taken account of, either
2084          * in the subexp stuff or immediately above.
2085          */
2086         if ((isarr = v->isarr)) {
2087             /*
2088              * No way to get here with v->flags & VALFLAG_INV, so
2089              * getvaluearr() is called by getarrvalue(); needn't test
2090              * PM_HASHED.
2091              */
2092             if (v->isarr == SCANPM_WANTINDEX) {
2093                 isarr = v->isarr = 0;
2094                 val = dupstring(v->pm->node.nam);
2095             } else
2096                 aval = getarrvalue(v);
2097         } else {
2098             /* Value retrieved from parameter/subexpression is scalar */
2099             if (v->pm->node.flags & PM_ARRAY) {
2100                 /*
2101                  * Although the value is a scalar, the parameter
2102                  * itself is an array.  Presumably this is due to
2103                  * being quoted, or doing single substitution or something,
2104                  * TODO: we're about to do some definitely stringy
2105                  * stuff, so something like this bit is probably
2106                  * necessary.  However, I'd like to leave any
2107                  * necessary joining of arrays until this point
2108                  * to avoid the multsub() horror.
2109                  */
2110                 int tmplen = arrlen(v->pm->gsu.a->getfn(v->pm));
2111
2112                 if (v->start < 0)
2113                     v->start += tmplen + ((v->flags & VALFLAG_INV) ? 1 : 0);
2114                 if (!(v->flags & VALFLAG_INV) &&
2115                     (v->start >= tmplen || v->start < 0))
2116                     vunset = 1;
2117             }
2118             if (!vunset) {
2119                 /*
2120                  * There really is a value.  Padding and case
2121                  * transformations used to be handled here, but
2122                  * are now handled in getstrvalue() for greater
2123                  * consistency.  However, we get unexpected effects
2124                  * if we allow them to applied on every call, so
2125                  * set the flag that allows them to be substituted.
2126                  */
2127                 v->flags |= VALFLAG_SUBST;
2128                 val = getstrvalue(v);
2129             }
2130         }
2131         /*
2132          * Finished with the original parameter and its indices;
2133          * carry on looping to see if we need to do more indexing.
2134          * This means we final get rid of v in favour of val and
2135          * aval.  We could do with somehow encapsulating the bit
2136          * where we need v.
2137          */
2138         v = NULL;
2139         if (!inbrace)
2140             break;
2141     }
2142     /*
2143      * We're now past the name or subexpression; the only things
2144      * which can happen now are a closing brace, one of the standard
2145      * parameter postmodifiers, or a history-style colon-modifier.
2146      *
2147      * Again, this duplicates tests for characters we're about to
2148      * examine properly later on.
2149      */
2150     if (inbrace &&
2151         (c = *s) != '-' && c != '+' && c != ':' && c != '%'  && c != '/' &&
2152         c != '=' && c != Equals &&
2153         c != '#' && c != Pound &&
2154         c != '?' && c != Quest &&
2155         c != '}' && c != Outbrace) {
2156         zerr("bad substitution");
2157         return NULL;
2158     }
2159     /*
2160      * Join arrays up if we're in quotes and there isn't some
2161      * override such as (@).
2162      * TODO: hmm, if we're called as part of some recursive
2163      * substitution do we want to delay this until we get back to
2164      * the top level?  Or is if there's a qt (i.e. this parameter
2165      * substitution is in quotes) always good enough?  Potentially
2166      * we may be OK by now --- all potential `@'s and subexpressions
2167      * have been handled, including any [@] index which comes up
2168      * by virture of v->isarr being set to SCANPM_ISVAR_AT which
2169      * is now in isarr.
2170      *
2171      * However, if we are replacing multsub() with something that
2172      * doesn't mangle arrays, we may need to delay this step until after
2173      * the foo:- or foo:= or whatever that causes that.  Note the value
2174      * (string or array) at this point is irrelevant if we are going to
2175      * be doing that.  This would mean // and stuff get applied
2176      * arraywise even if quoted.  That's probably wrong, so maybe
2177      * this just stays.
2178      *
2179      * We do a separate stage of dearrayification in the YUK chunk,
2180      * I think mostly because of the way we make array or scalar
2181      * values appear to the caller.
2182      */
2183     if (isarr) {
2184         if (nojoin)
2185             isarr = -1;
2186         if (qt && !getlen && isarr > 0) {
2187             val = sepjoin(aval, sep, 1);
2188             isarr = 0;
2189         }
2190     }
2191
2192     idend = s;
2193     if (inbrace) {
2194         /*
2195          * This is to match a closing double quote in case
2196          * we didn't have a subexpression, e.g. ${"foo"}.
2197          * This form is pointless, but logically it ought to work.
2198          */
2199         while (inull(*s))
2200             s++;
2201     }
2202     /*
2203      * We don't yet know whether a `:' introduces a history-style
2204      * colon modifier or qualifies something like ${...:=...}.
2205      * But if we remember the colon here it's easy to check later.
2206      */
2207     if ((colf = *s == ':'))
2208         s++;
2209
2210
2211     /* fstr is to be the text following the substitution.  If we have *
2212      * braces, we look for it here, else we infer it later on.        */
2213     fstr = s;
2214     if (inbrace) {
2215         int bct;
2216         for (bct = 1; (c = *fstr); fstr++) {
2217             if (c == Inbrace)
2218                 bct++;
2219             else if (c == Outbrace && !--bct)
2220                 break;
2221         }
2222
2223         if (bct) {
2224         noclosebrace:
2225             zerr("closing brace expected");
2226             return NULL;
2227         }
2228         if (c)
2229             *fstr++ = '\0';
2230     }
2231
2232     /* Check for ${..?..} or ${..=..} or one of those. *
2233      * Only works if the name is in braces.            */
2234
2235     if (inbrace && ((c = *s) == '-' ||
2236                     c == '+' ||
2237                     c == ':' || /* i.e. a doubled colon */
2238                     c == '=' || c == Equals ||
2239                     c == '%' ||
2240                     c == '#' || c == Pound ||
2241                     c == '?' || c == Quest ||
2242                     c == '/')) {
2243
2244         /*
2245          * Default index is 1 if no (I) or (I) gave zero.   But
2246          * why don't we set the default explicitly at the start
2247          * and massage any passed index where we set flnum anyway?
2248          */
2249         if (!flnum)
2250             flnum++;
2251         if (c == '%')
2252             flags |= SUB_END;
2253
2254         /* Check for ${..%%..} or ${..##..} */
2255         if ((c == '%' || c == '#' || c == Pound) && c == s[1]) {
2256             s++;
2257             /* we have %%, not %, or ##, not # */
2258             flags |= SUB_LONG;
2259         }
2260         s++;
2261         if (s[-1] == '/') {
2262             char *ptr;
2263             /*
2264              * previous flags are irrelevant, except for (S) which
2265              * indicates shortest substring; else look for longest.
2266              */
2267             flags = (flags & SUB_SUBSTR) ? 0 : SUB_LONG;
2268             if ((c = *s) == '/') {
2269                 /* doubled, so replace all occurrences */
2270                 flags |= SUB_GLOBAL;
2271                 c = *++s;
2272             }
2273             /* Check for anchored substitution */
2274             if (c == '#' || c == Pound) {
2275                 /*
2276                  * anchor at head: this is the `normal' case in
2277                  * getmatch and we only require the flag if SUB_END
2278                  * is also present.
2279                  */
2280                 flags |= SUB_START;
2281                 s++;
2282             }
2283             if (*s == '%') {
2284                 /* anchor at tail */
2285                 flags |= SUB_END;
2286                 s++;
2287             }
2288             if (!(flags & (SUB_START|SUB_END))) {
2289                 /* No anchor, so substring */
2290                 flags |= SUB_SUBSTR;
2291             }
2292             /*
2293              * Find the / marking the end of the search pattern.
2294              * If there isn't one, we're just going to delete that,
2295              * i.e. replace it with an empty string.
2296              *
2297              * We used to use double backslashes to quote slashes,
2298              * but actually that was buggy and using a single backslash
2299              * is easier and more obvious.
2300              */
2301             for (ptr = s; (c = *ptr) && c != '/'; ptr++)
2302             {
2303                 if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1])
2304                 {
2305                     if (ptr[1] == '/')
2306                         chuck(ptr);
2307                     else
2308                         ptr++;
2309                 }
2310             }
2311             replstr = (*ptr && ptr[1]) ? ptr+1 : "";
2312             *ptr = '\0';
2313         }
2314
2315         /* See if this was ${...:-...}, ${...:=...}, etc. */
2316         if (colf)
2317             flags |= SUB_ALL;
2318         /*
2319          * With no special flags, i.e. just a # or % or whatever,
2320          * the matched portion is removed and we keep the rest.
2321          * We also want the rest when we're doing a substitution.
2322          */
2323         if (!(flags & (SUB_MATCH|SUB_REST|SUB_BIND|SUB_EIND|SUB_LEN)))
2324             flags |= SUB_REST;
2325
2326         if (colf && !vunset)
2327             vunset = (isarr) ? !*aval : !*val || (*val == Nularg && !val[1]);
2328
2329         switch (s[-1]) {
2330         case '+':
2331             if (vunset) {
2332                 val = dupstring("");
2333                 copied = 1;
2334                 isarr = 0;
2335                 break;
2336             }
2337             vunset = 1;
2338         /* Fall Through! */
2339         case '-':
2340             if (vunset) {
2341                 int ws = opts[SHWORDSPLIT];
2342                 val = dupstring(s);
2343                 /* If word-splitting is enabled, we ask multsub() to split
2344                  * the substituted string at unquoted whitespace.  Then, we
2345                  * turn off spbreak so that no further splitting occurs.
2346                  * This allows a construct such as ${1+"$@"} to correctly
2347                  * keep its array splits, and weird constructs such as
2348                  * ${str+"one two" "3 2 1" foo "$str"} to only be split
2349                  * at the unquoted spaces. */
2350                 opts[SHWORDSPLIT] = spbreak;
2351                 multsub(&val, spbreak && !aspar, (aspar ? NULL : &aval), &isarr, NULL);
2352                 opts[SHWORDSPLIT] = ws;
2353                 copied = 1;
2354                 spbreak = 0;
2355             }
2356             break;
2357         case ':':
2358             /* this must be `::=', unconditional assignment */
2359             if (*s != '=' && *s != Equals)
2360                 goto noclosebrace;
2361             vunset = 1;
2362             s++;
2363             /* Fall through */
2364         case '=':
2365         case Equals:
2366             if (vunset) {
2367                 int ws = opts[SHWORDSPLIT];
2368                 char sav = *idend;
2369                 int l;
2370
2371                 *idend = '\0';
2372                 val = dupstring(s);
2373                 if (spsep || !arrasg) {
2374                     opts[SHWORDSPLIT] = 0;
2375                     multsub(&val, 0, NULL, &isarr, NULL);
2376                 } else {
2377                     opts[SHWORDSPLIT] = spbreak;
2378                     multsub(&val, spbreak, &aval, &isarr, NULL);
2379                     spbreak = 0;
2380                 }
2381                 opts[SHWORDSPLIT] = ws;
2382                 if (arrasg) {
2383                     /* This is an array assignment. */
2384                     char *arr[2], **t, **a, **p;
2385                     if (spsep || spbreak) {
2386                         aval = sepsplit(val, spsep, 0, 1);
2387                         isarr = nojoin ? 1 : 2;
2388                         l = arrlen(aval);
2389                         if (l && !*(aval[l-1]))
2390                             l--;
2391                         if (l && !**aval)
2392                             l--, t = aval + 1;
2393                         else
2394                             t = aval;
2395                     } else if (!isarr) {
2396                         if (!*val && arrasg > 1) {
2397                             arr[0] = NULL;
2398                             l = 0;
2399                         } else {
2400                             arr[0] = val;
2401                             arr[1] = NULL;
2402                             l = 1;
2403                         }
2404                         t = aval = arr;
2405                     } else
2406                         l = arrlen(aval), t = aval;
2407                     p = a = zalloc(sizeof(char *) * (l + 1));
2408                     while (l--) {
2409                         untokenize(*t);
2410                         *p++ = ztrdup(*t++);
2411                     }
2412                     *p++ = NULL;
2413                     if (arrasg > 1) {
2414                         Param pm = sethparam(idbeg, a);
2415                         if (pm)
2416                             aval = paramvalarr(pm->gsu.h->getfn(pm), hkeys|hvals);
2417                     } else
2418                         setaparam(idbeg, a);
2419                 } else {
2420                     untokenize(val);
2421                     setsparam(idbeg, ztrdup(val));
2422                 }
2423                 *idend = sav;
2424                 copied = 1;
2425                 if (isarr) {
2426                   if (nojoin)
2427                     isarr = -1;
2428                   if (qt && !getlen && isarr > 0 && !spsep && spbreak < 2) {
2429                     val = sepjoin(aval, sep, 1);
2430                     isarr = 0;
2431                   }
2432                   sep = spsep = NULL;
2433                   spbreak = 0;
2434                 }
2435             }
2436             break;
2437         case '?':
2438         case Quest:
2439             if (vunset) {
2440                 *idend = '\0';
2441                 zerr("%s: %s", idbeg, *s ? s : "parameter not set");
2442                 if (!interact) {
2443                     if (mypid == getpid()) {
2444                         /*
2445                          * paranoia: don't check for jobs, but there shouldn't
2446                          * be any if not interactive.
2447                          */
2448                         stopmsg = 1;
2449                         zexit(1, 0);
2450                     } else
2451                         _exit(1);
2452                 }
2453                 return NULL;
2454             }
2455             break;
2456         case '%':
2457         case '#':
2458         case Pound:
2459         case '/':
2460             /* This once was executed only `if (qt) ...'. But with that
2461              * patterns in a expansion resulting from a ${(e)...} aren't
2462              * tokenized even though this function thinks they are (it thinks
2463              * they are because parse_subst_str() turns Qstring tokens
2464              * into String tokens and for unquoted parameter expansions the
2465              * lexer normally does tokenize patterns inside parameter
2466              * expansions). */
2467             {
2468                 int one = noerrs, oef = errflag, haserr;
2469
2470                 if (!quoteerr)
2471                     noerrs = 1;
2472                 haserr = parse_subst_string(s);
2473                 noerrs = one;
2474                 if (!quoteerr) {
2475                     errflag = oef;
2476                     if (haserr)
2477                         shtokenize(s);
2478                 } else if (haserr || errflag) {
2479                     zerr("parse error in ${...%c...} substitution", s[-1]);
2480                     return NULL;
2481                 }
2482             }
2483             {
2484 #if 0
2485                 /*
2486                  * This allows # and % to be at the start of
2487                  * a parameter in the substitution, which is
2488                  * a bit nasty, and can be done (although
2489                  * less efficiently) with anchors.
2490                  */
2491
2492                 char t = s[-1];
2493
2494                 singsub(&s);
2495
2496                 if (t == '/' && (flags & SUB_SUBSTR)) {
2497                     if ((c = *s) == '#' || c == '%') {
2498                         flags &= ~SUB_SUBSTR;
2499                         if (c == '%')
2500                             flags |= SUB_END;
2501                         s++;
2502                     } else if (c == '\\') {
2503                         s++;
2504                     }
2505                 }
2506 #else
2507                 singsub(&s);
2508 #endif
2509             }
2510
2511             /*
2512              * Either loop over an array doing replacements or
2513              * do the replacment on a string.
2514              *
2515              * We need an untokenized value for matching.
2516              */
2517             if (!vunset && isarr) {
2518                 char **ap;
2519                 if (!copied) {
2520                     aval = arrdup(aval);
2521                     copied = 1;
2522                 }
2523                 for (ap = aval; *ap; ap++) {
2524                     untokenize(*ap);
2525                 }
2526                 getmatcharr(&aval, s, flags, flnum, replstr);
2527             } else {
2528                 if (vunset)
2529                     val = dupstring("");
2530                 if (!copied) {
2531                     val = dupstring(val);
2532                     copied = 1;
2533                     untokenize(val);
2534                 }
2535                 getmatch(&val, s, flags, flnum, replstr);
2536             }
2537             break;
2538         }
2539     } else {                    /* no ${...=...} or anything, but possible modifiers. */
2540         /*
2541          * Handler ${+...}.  TODO: strange, why do we handle this only
2542          * if there isn't a trailing modifier?  Why don't we do this
2543          * e.g. when we handle the ${(t)...} flag?
2544          */
2545         if (chkset) {
2546             val = dupstring(vunset ? "0" : "1");
2547             isarr = 0;
2548         } else if (vunset) {
2549             if (unset(UNSET)) {
2550                 *idend = '\0';
2551                 zerr("%s: parameter not set", idbeg);
2552                 return NULL;
2553             }
2554             val = dupstring("");
2555         }
2556         if (colf) {
2557             /*
2558              * History style colon modifiers.  May need to apply
2559              * on multiple elements of an array.
2560              */
2561             s--;
2562             if (unset(KSHARRAYS) || inbrace) {
2563                 if (!isarr)
2564                     modify(&val, &s);
2565                 else {
2566                     char *ss;
2567                     char **ap = aval;
2568                     char **pp = aval = (char **) hcalloc(sizeof(char *) *
2569                                                          (arrlen(aval) + 1));
2570
2571                     while ((*pp = *ap++)) {
2572                         ss = s;
2573                         modify(pp++, &ss);
2574                     }
2575                     if (pp == aval) {
2576                         char *t = "";
2577                         ss = s;
2578                         modify(&t, &ss);
2579                     }
2580                     s = ss;
2581                 }
2582                 copied = 1;
2583                 if (inbrace && *s) {
2584                     if (*s == ':' && !imeta(s[1]))
2585                         zerr("unrecognized modifier `%c'", s[1]);
2586                     else
2587                         zerr("unrecognized modifier");
2588                     return NULL;
2589                 }
2590             }
2591         }
2592         if (!inbrace)
2593             fstr = s;
2594     }
2595     if (errflag)
2596         return NULL;
2597     if (evalchar) {
2598         int one = noerrs, oef = errflag, haserr = 0;
2599
2600         if (!quoteerr)
2601             noerrs = 1;
2602         /*
2603          * Evaluate the value numerically and output the result as
2604          * a character.
2605          */
2606         if (isarr) {
2607             char **aval2, **avptr, **av2ptr;
2608
2609             aval2 = (char **)zhalloc((arrlen(aval)+1)*sizeof(char *));
2610
2611             for (avptr = aval, av2ptr = aval2; *avptr; avptr++, av2ptr++)
2612             {
2613                 /* When noerrs = 1, the only error is out-of-memory */
2614                 if (!(*av2ptr = substevalchar(*avptr))) {
2615                     haserr = 1;
2616                     break;
2617                 }
2618             }
2619             *av2ptr = NULL;
2620             aval = aval2;
2621         } else {
2622             /* When noerrs = 1, the only error is out-of-memory */
2623             if (!(val = substevalchar(val)))
2624                 haserr = 1;
2625         }
2626         noerrs = one;
2627         if (!quoteerr)
2628             errflag = oef;
2629         if (haserr || errflag)
2630             return NULL;
2631     }
2632     /*
2633      * This handles taking a length with ${#foo} and variations.
2634      * TODO: again. one might naively have thought this had the
2635      * same sort of effect as the ${(t)...} flag and the ${+...}
2636      * test, although in this case we do need the value rather
2637      * the the parameter, so maybe it's a bit different.
2638      */
2639     if (getlen) {
2640         long len = 0;
2641         char buf[14];
2642
2643         if (isarr) {
2644             char **ctr;
2645             int sl = sep ? MB_METASTRLEN(sep) : 1;
2646
2647             if (getlen == 1)
2648                 for (ctr = aval; *ctr; ctr++, len++);
2649             else if (getlen == 2) {
2650                 if (*aval)
2651                     for (len = -sl, ctr = aval;
2652                          len += sl + MB_METASTRLEN2(*ctr, multi_width),
2653                              *++ctr;);
2654             }
2655             else
2656                 for (ctr = aval;
2657                      *ctr;
2658                      len += wordcount(*ctr, spsep, getlen > 3), ctr++);
2659         } else {
2660             if (getlen < 3)
2661                 len = MB_METASTRLEN2(val, multi_width);
2662             else
2663                 len = wordcount(val, spsep, getlen > 3);
2664         }
2665
2666         sprintf(buf, "%ld", len);
2667         val = dupstring(buf);
2668         isarr = 0;
2669     }
2670     /* At this point we make sure that our arrayness has affected the
2671      * arrayness of the linked list.  Then, we can turn our value into
2672      * a scalar for convenience sake without affecting the arrayness
2673      * of the resulting value. */
2674     if (isarr)
2675         l->list.flags |= LF_ARRAY;
2676     else
2677         l->list.flags &= ~LF_ARRAY;
2678     if (isarr > 0 && !plan9 && (!aval || !aval[0])) {
2679         val = dupstring("");
2680         isarr = 0;
2681     } else if (isarr && aval && aval[0] && !aval[1]) {
2682         /* treat a one-element array as a scalar for purposes of   *
2683          * concatenation with surrounding text (some${param}thing) *
2684          * and rc_expand_param handling.  Note: LF_ARRAY (above)   *
2685          * propagates the true array type from nested expansions.  */
2686         val = aval[0];
2687         isarr = 0;
2688     }
2689     /* This is where we may join arrays together, e.g. (j:,:) sets "sep", and
2690      * (afterward) may split the joined value (e.g. (s:-:) sets "spsep").  One
2691      * exception is that ${name:-word} and ${name:+word} will have already
2692      * done any requested splitting of the word value with quoting preserved.
2693      * "ssub" is true when we are called from singsub (via prefork):
2694      * it means that we must join arrays and should not split words. */
2695     if (ssub || spbreak || spsep || sep) {
2696         if (isarr) {
2697             val = sepjoin(aval, sep, 1);
2698             isarr = 0;
2699         }
2700         if (!ssub && (spbreak || spsep)) {
2701             aval = sepsplit(val, spsep, 0, 1);
2702             if (!aval || !aval[0])
2703                 val = dupstring("");
2704             else if (!aval[1])
2705                 val = aval[0];
2706             else
2707                 isarr = nojoin ? 1 : 2;
2708         }
2709         if (isarr)
2710             l->list.flags |= LF_ARRAY;
2711         else
2712             l->list.flags &= ~LF_ARRAY;
2713     }
2714     /*
2715      * Perform case modififications.
2716      */
2717     if (casmod != CASMOD_NONE) {
2718         copied = 1;             /* string is always modified by copy */
2719         if (isarr) {
2720             char **ap, **ap2;
2721
2722             ap = aval;
2723             ap2 = aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1));
2724
2725             while (*ap)
2726                 *ap2++ = casemodify(*ap++, casmod);
2727             *ap2++ = NULL;
2728         } else {
2729             val = casemodify(val, casmod);
2730         }
2731     }
2732     /*
2733      * Perform prompt-style modifications.
2734      */
2735     if (presc) {
2736         int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG];
2737         int opp = opts[PROMPTPERCENT];
2738
2739         if (presc < 2) {
2740             opts[PROMPTPERCENT] = 1;
2741             opts[PROMPTSUBST] = opts[PROMPTBANG] = 0;
2742         }
2743         /*
2744          * TODO:  It would be really quite nice to abstract the
2745          * isarr and !issarr code into a function which gets
2746          * passed a pointer to a function with the effect of
2747          * the promptexpand bit.  Then we could use this for
2748          * a lot of stuff and bury val/aval/isarr inside a structure
2749          * which gets passed to it.
2750          */
2751         if (isarr) {
2752             char **ap;
2753
2754             if (!copied)
2755                 aval = arrdup(aval), copied = 1;
2756             ap = aval;
2757             for (; *ap; ap++) {
2758                 char *tmps;
2759                 untokenize(*ap);
2760                 tmps = promptexpand(*ap, 0, NULL, NULL, NULL);
2761                 *ap = dupstring(tmps);
2762                 free(tmps);
2763             }
2764         } else {
2765             char *tmps;
2766             if (!copied)
2767                 val = dupstring(val), copied = 1;
2768             untokenize(val);
2769             tmps = promptexpand(val, 0, NULL, NULL, NULL);
2770             val = dupstring(tmps);
2771             free(tmps);
2772         }
2773         opts[PROMPTSUBST] = ops;
2774         opts[PROMPTBANG] = opb;
2775         opts[PROMPTPERCENT] = opp;
2776     }
2777     /*
2778      * One of the possible set of quotes to apply, depending on
2779      * the repetitions of the (q) flag.
2780      */
2781     if (quotemod) {
2782         if (quotetype > QT_DOLLARS)
2783             quotetype = QT_DOLLARS;
2784         if (isarr) {
2785             char **ap;
2786
2787             if (!copied)
2788                 aval = arrdup(aval), copied = 1;
2789             ap = aval;
2790
2791             if (quotemod > 0) {
2792                 if (quotetype > QT_BACKSLASH) {
2793                     int sl;
2794                     char *tmp;
2795
2796                     for (; *ap; ap++) {
2797                         int pre = quotetype != QT_DOLLARS ? 1 : 2;
2798                         tmp = quotestring(*ap, NULL, quotetype);
2799                         sl = strlen(tmp);
2800                         *ap = (char *) zhalloc(pre + sl + 2);
2801                         strcpy((*ap) + pre, tmp);
2802                         ap[0][pre - 1] = ap[0][pre + sl] =
2803                             (quotetype != QT_DOUBLE ? '\'' : '"');
2804                         ap[0][pre + sl + 1] = '\0';
2805                         if (quotetype == QT_DOLLARS)
2806                           ap[0][0] = '$';
2807                     }
2808                 } else
2809                     for (; *ap; ap++)
2810                         *ap = quotestring(*ap, NULL, QT_BACKSLASH);
2811             } else {
2812                 int one = noerrs, oef = errflag, haserr = 0;
2813
2814                 if (!quoteerr)
2815                     noerrs = 1;
2816                 for (; *ap; ap++) {
2817                     haserr |= parse_subst_string(*ap);
2818                     remnulargs(*ap);
2819                     untokenize(*ap);
2820                 }
2821                 noerrs = one;
2822                 if (!quoteerr)
2823                     errflag = oef;
2824                 else if (haserr || errflag) {
2825                     zerr("parse error in parameter value");
2826                     return NULL;
2827                 }
2828             }
2829         } else {
2830             if (!copied)
2831                 val = dupstring(val), copied = 1;
2832             if (quotemod > 0) {
2833                 if (quotetype > QT_BACKSLASH) {
2834                     int pre = quotetype != QT_DOLLARS ? 1 : 2;
2835                     int sl;
2836                     char *tmp;
2837                     tmp = quotestring(val, NULL, quotetype);
2838                     sl = strlen(tmp);
2839                     val = (char *) zhalloc(pre + sl + 2);
2840                     strcpy(val + pre, tmp);
2841                     val[pre - 1] = val[pre + sl] =
2842                         (quotetype != QT_DOUBLE ? '\'' : '"');
2843                     val[pre + sl + 1] = '\0';
2844                     if (quotetype == QT_DOLLARS)
2845                       val[0] = '$';
2846                 } else
2847                     val = quotestring(val, NULL, QT_BACKSLASH);
2848             } else {
2849                 int one = noerrs, oef = errflag, haserr;
2850
2851                 if (!quoteerr)
2852                     noerrs = 1;
2853                 haserr = parse_subst_string(val);
2854                 noerrs = one;
2855                 if (!quoteerr)
2856                     errflag = oef;
2857                 else if (haserr || errflag) {
2858                     zerr("parse error in parameter value");
2859                     return NULL;
2860                 }
2861                 remnulargs(val);
2862                 untokenize(val);
2863             }
2864         }
2865     }
2866     /*
2867      * Transform special characters in the string to make them
2868      * printable.
2869      */
2870     if (visiblemod) {
2871         if (isarr) {
2872             char **ap;
2873             if (!copied)
2874                 aval = arrdup(aval), copied = 1;
2875             for (ap = aval; *ap; ap++)
2876                 *ap = nicedupstring(*ap);
2877         } else {
2878             if (!copied)
2879                 val = dupstring(val), copied = 1;
2880             val = nicedupstring(val);
2881         }
2882     }
2883     /*
2884      * Nothing particularly to do with SH_WORD_SPLIT --- this
2885      * performs lexical splitting on a string as specified by
2886      * the (z) flag.
2887      */
2888     if (shsplit) {
2889         LinkList list = NULL;
2890
2891         if (isarr) {
2892             char **ap;
2893             for (ap = aval; *ap; ap++)
2894                 list = bufferwords(list, *ap, NULL);
2895             isarr = 0;
2896         } else
2897             list = bufferwords(NULL, val, NULL);
2898
2899         if (!list || !firstnode(list))
2900             val = dupstring("");
2901         else if (!nextnode(firstnode(list)))
2902             val = getdata(firstnode(list));
2903         else {
2904             aval = hlinklist2array(list, 0);
2905             isarr = nojoin ? 1 : 2;
2906             l->list.flags |= LF_ARRAY;
2907         }
2908         copied = 1;
2909     }
2910     /*
2911      * TODO: hmm.  At this point we have to be on our toes about
2912      * whether we're putting stuff into a line or not, i.e.
2913      * we don't want to do this from a recursive call.
2914      * Rather than passing back flags in a non-trivial way, maybe
2915      * we could decide on the basis of flags passed down to us.
2916      *
2917      * This is the ideal place to do any last-minute conversion from
2918      * array to strings.  However, given all the transformations we've
2919      * already done, probably if it's going to be done it will already
2920      * have been.  (I'd really like to keep everying in aval or
2921      * equivalent and only locally decide if we need to treat it
2922      * as a scalar.)
2923      */
2924     if (isarr) {
2925         char *x;
2926         char *y;
2927         int xlen;
2928         int i;
2929         LinkNode on = n;
2930
2931         /* Handle the (u) flag; we need this before the next test */
2932         if (unique) {
2933             if(!copied)
2934                 aval = arrdup(aval);
2935
2936             i = arrlen(aval);
2937             if (i > 1)
2938                 zhuniqarray(aval);
2939         }
2940         if ((!aval[0] || !aval[1]) && !plan9) {
2941             /*
2942              * Empty array or single element.  Currently you only
2943              * get a single element array at this point from the
2944              * unique expansion above. but we can potentially
2945              * have other reasons.
2946              *
2947              * The following test removes the markers
2948              * from surrounding double quotes, but I don't know why
2949              * that's necessary.
2950              */
2951             int vallen;
2952             if (aptr > (char *) getdata(n) &&
2953                 aptr[-1] == Dnull && *fstr == Dnull)
2954                 *--aptr = '\0', fstr++;
2955             vallen = aval[0] ? strlen(aval[0]) : 0;
2956             y = (char *) hcalloc((aptr - ostr) + vallen + strlen(fstr) + 1);
2957             strcpy(y, ostr);
2958             *str = y + (aptr - ostr);
2959             if (vallen)
2960             {
2961                 strcpy(*str, aval[0]);
2962                 *str += vallen;
2963             }
2964             strcpy(*str, fstr);
2965             setdata(n, y);
2966             return n;
2967         }
2968         /* Handle (o) and (O) and their variants */
2969         if (sortit != SORTIT_ANYOLDHOW) {
2970             if (!copied)
2971                 aval = arrdup(aval);
2972             if (indord) {
2973                 if (sortit & SORTIT_BACKWARDS) {
2974                     char *copy;
2975                     char **end = aval + arrlen(aval) - 1, **start = aval;
2976
2977                     /* reverse the array */
2978                     while (start < end) {
2979                         copy = *end;
2980                         *end-- = *start;
2981                         *start++ = copy;
2982                     }
2983                 }
2984             } else {
2985                 /*
2986                  * HERE: we tested if the last element of the array
2987                  * was not a NULL string.  Why the last element?
2988                  * Why didn't we expect NULL strings to work?
2989                  * Was it just a clumsy way of testing whether there
2990                  * was enough in the array to sort?
2991                  */
2992                 strmetasort(aval, sortit, NULL);
2993             }
2994         }
2995         if (plan9) {
2996             /* Handle RC_EXPAND_PARAM */
2997             LinkNode tn;
2998             local_list1(tl);
2999
3000             *--fstr = Marker;
3001             init_list1(tl, fstr);
3002             if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, 0))
3003                 return NULL;
3004             *str = aptr;
3005             tn = firstnode(&tl);
3006             while ((x = *aval++)) {
3007                 if (prenum || postnum)
3008                     x = dopadding(x, prenum, postnum, preone, postone,
3009                                   premul, postmul
3010 #ifdef MULTIBYTE_SUPPORT
3011                                   , multi_width
3012 #endif
3013                         );
3014                 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3015                     return NULL;
3016                 xlen = strlen(x);
3017                 for (tn = firstnode(&tl);
3018                      tn && *(y = (char *) getdata(tn)) == Marker;
3019                      incnode(tn)) {
3020                     strcatsub(&y, ostr, aptr, x, xlen, y + 1, globsubst,
3021                               copied);
3022                     if (qt && !*y && isarr != 2)
3023                         y = dupstring(nulstring);
3024                     if (plan9)
3025                         setdata(n, (void *) y), plan9 = 0;
3026                     else
3027                         insertlinknode(l, n, (void *) y), incnode(n);
3028                 }
3029             }
3030             for (; tn; incnode(tn)) {
3031                 y = (char *) getdata(tn);
3032                 if (*y == Marker)
3033                     continue;
3034                 if (qt && !*y && isarr != 2)
3035                     y = dupstring(nulstring);
3036                 if (plan9)
3037                     setdata(n, (void *) y), plan9 = 0;
3038                 else
3039                     insertlinknode(l, n, (void *) y), incnode(n);
3040             }
3041             if (plan9) {
3042                 uremnode(l, n);
3043                 return n;
3044             }
3045         } else {
3046             /*
3047              * Not RC_EXPAND_PARAM: simply join the first and
3048              * last values.
3049              * TODO: how about removing the restriction that
3050              * aval[1] is non-NULL to promote consistency?, or
3051              * simply changing the test so that we drop into
3052              * the scalar branch, instead of tricking isarr?
3053              */
3054             x = aval[0];
3055             if (prenum || postnum)
3056                 x = dopadding(x, prenum, postnum, preone, postone,
3057                               premul, postmul
3058 #ifdef MULTIBYTE_SUPPORT
3059                               , multi_width
3060 #endif
3061                     );
3062             if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3063                 return NULL;
3064             xlen = strlen(x);
3065             strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst, copied);
3066             if (qt && !*y && isarr != 2)
3067                 y = dupstring(nulstring);
3068             setdata(n, (void *) y);
3069
3070             i = 1;
3071             /* aval[1] is non-null here */
3072             while (aval[i + 1]) {
3073                 x = aval[i++];
3074                 if (prenum || postnum)
3075                     x = dopadding(x, prenum, postnum, preone, postone,
3076                                   premul, postmul
3077 #ifdef MULTIBYTE_SUPPORT
3078                                   , multi_width
3079 #endif
3080                         );
3081                 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3082                     return NULL;
3083                 if (qt && !*x && isarr != 2)
3084                     y = dupstring(nulstring);
3085                 else {
3086                     y = dupstring(x);
3087                     if (globsubst)
3088                         shtokenize(y);
3089                 }
3090                 insertlinknode(l, n, (void *) y), incnode(n);
3091             }
3092
3093             x = aval[i];
3094             if (prenum || postnum)
3095                 x = dopadding(x, prenum, postnum, preone, postone,
3096                               premul, postmul
3097 #ifdef MULTIBYTE_SUPPORT
3098                               , multi_width
3099 #endif
3100                     );
3101             if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3102                 return NULL;
3103             xlen = strlen(x);
3104             *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst, copied);
3105             if (qt && !*y && isarr != 2)
3106                 y = dupstring(nulstring);
3107             insertlinknode(l, n, (void *) y), incnode(n);
3108         }
3109         if (eval)
3110             n = on;
3111     } else {
3112         /*
3113          * Scalar value.  Handle last minute transformations
3114          * such as left- or right-padding and the (e) flag to
3115          * revaluate the result.
3116          */
3117         int xlen;
3118         char *x;
3119         char *y;
3120
3121         x = val;
3122         if (prenum || postnum)
3123             x = dopadding(x, prenum, postnum, preone, postone,
3124                           premul, postmul
3125 #ifdef MULTIBYTE_SUPPORT
3126                           , multi_width
3127 #endif
3128                 );
3129         if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3130             return NULL;
3131         xlen = strlen(x);
3132         *str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst, copied);
3133         if (qt && !*y)
3134             y = dupstring(nulstring);
3135         setdata(n, (void *) y);
3136     }
3137     if (eval)
3138         *str = (char *) getdata(n);
3139
3140     return n;
3141 }
3142
3143 /*
3144  * Arithmetic substitution: `a' is the string to be evaluated, `bptr'
3145  * points to the beginning of the string containing it.  The tail of
3146  * the string is given by `rest'. *bptr is modified with the substituted
3147  * string. The function returns a pointer to the tail in the substituted
3148  * string.
3149  */
3150
3151 /**/
3152 static char *
3153 arithsubst(char *a, char **bptr, char *rest)
3154 {
3155     char *s = *bptr, *t;
3156     char buf[BDIGBUFSIZE], *b = buf;
3157     mnumber v;
3158
3159     singsub(&a);
3160     v = matheval(a);
3161     if ((v.type & MN_FLOAT) && !outputradix)
3162         b = convfloat(v.u.d, 0, 0, NULL);
3163     else {
3164         if (v.type & MN_FLOAT)
3165             v.u.l = (zlong) v.u.d;
3166         convbase(buf, v.u.l, outputradix);
3167     }
3168     t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) +
3169                                  strlen(rest) + 1);
3170     t--;
3171     while ((*++t = *s++));
3172     t--;
3173     while ((*++t = *b++));
3174     strcat(t, rest);
3175     return t;
3176 }
3177
3178 /**/
3179 void
3180 modify(char **str, char **ptr)
3181 {
3182     char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e;
3183     char *copy, *all, *tmp, sav, sav1, *ptr1end;
3184     int gbal, wall, rec, al, nl, charlen, dellen;
3185     convchar_t del;
3186
3187     test = NULL;
3188
3189     if (**ptr == ':')
3190         *str = dupstring(*str);
3191
3192     while (**ptr == ':') {
3193         lptr = *ptr;
3194         (*ptr)++;
3195         wall = gbal = 0;
3196         rec = 1;
3197         c = '\0';
3198         sep = NULL;
3199
3200         for (; !c && **ptr;) {
3201             switch (**ptr) {
3202             case 'a':
3203             case 'A':
3204             case 'h':
3205             case 'r':
3206             case 'e':
3207             case 't':
3208             case 'l':
3209             case 'u':
3210             case 'q':
3211             case 'Q':
3212                 c = **ptr;
3213                 break;
3214
3215             case 's':
3216                 c = **ptr;
3217                 (*ptr)++;
3218                 ptr1 = *ptr;
3219                 MB_METACHARINIT();
3220                 charlen = MB_METACHARLENCONV(ptr1, &del);
3221 #ifdef MULTIBYTE_SUPPORT
3222                 if (del == WEOF)
3223                     del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1);
3224 #endif
3225                 ptr1 += charlen;
3226                 for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) {
3227                     convchar_t del2;
3228                     charlen = MB_METACHARLENCONV(ptr2, &del2);
3229 #ifdef MULTIBYTE_SUPPORT
3230                     if (del2 == WEOF)
3231                         del2 = (wint_t)((*ptr2 == Meta) ?
3232                                         ptr2[1] ^ 32 : *ptr2);
3233 #endif
3234                     if (del2 == del)
3235                         break;
3236                 }
3237                 if (!*ptr2) {
3238                     zerr("bad substitution");
3239                     return;
3240                 }
3241                 ptr1end = ptr2;
3242                 ptr2 += charlen;
3243                 sav1 = *ptr1end;
3244                 *ptr1end = '\0';
3245                 for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) {
3246                     convchar_t del3;
3247                     charlen = MB_METACHARLENCONV(ptr3, &del3);
3248 #ifdef MULTIBYTE_SUPPORT
3249                     if (del3 == WEOF)
3250                         del3 = (wint_t)((*ptr3 == Meta) ?
3251                                         ptr3[1] ^ 32 : *ptr3);
3252 #endif
3253                     if (del3 == del)
3254                         break;
3255                 }
3256                 sav = *ptr3;
3257                 *ptr3 = '\0';
3258                 if (*ptr1) {
3259                     zsfree(hsubl);
3260                     hsubl = ztrdup(ptr1);
3261                 }
3262                 if (!hsubl) {
3263                     zerr("no previous substitution");
3264                     return;
3265                 }
3266                 zsfree(hsubr);
3267                 for (tt = hsubl; *tt; tt++)
3268                     if (inull(*tt) && *tt != Bnullkeep)
3269                         chuck(tt--);
3270                 if (!isset(HISTSUBSTPATTERN))
3271                     untokenize(hsubl);
3272                 for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
3273                     if (inull(*tt) && *tt != Bnullkeep)
3274                         chuck(tt--);
3275                 *ptr1end = sav1;
3276                 *ptr3 = sav;
3277                 *ptr = ptr3 - 1;
3278                 if (*ptr3) {
3279                     /* Final terminator is optional. */
3280                     *ptr += charlen;
3281                 }
3282                 break;
3283
3284             case '&':
3285                 c = 's';
3286                 break;
3287
3288             case 'g':
3289                 (*ptr)++;
3290                 gbal = 1;
3291                 break;
3292
3293             case 'w':
3294                 wall = 1;
3295                 (*ptr)++;
3296                 break;
3297             case 'W':
3298                 wall = 1;
3299                 (*ptr)++;
3300                 ptr1 = get_strarg(ptr2 = *ptr, &charlen);
3301                 if ((sav = *ptr1))
3302                     *ptr1 = '\0';
3303                 sep = dupstring(ptr2 + charlen);
3304                 if (sav)
3305                     *ptr1 = sav;
3306                 *ptr = ptr1 + charlen;
3307                 c = '\0';
3308                 break;
3309
3310             case 'f':
3311                 rec = -1;
3312                 (*ptr)++;
3313                 break;
3314             case 'F':
3315                 (*ptr)++;
3316                 rec = get_intarg(ptr, &dellen);
3317                 break;
3318             default:
3319                 *ptr = lptr;
3320                 return;
3321             }
3322         }
3323         (*ptr)++;
3324         if (!c) {
3325             *ptr = lptr;
3326             return;
3327         }
3328         if (rec < 0)
3329             test = dupstring(*str);
3330
3331         while (rec--) {
3332             if (wall) {
3333                 al = 0;
3334                 all = NULL;
3335                 for (t = e = *str; (tt = findword(&e, sep));) {
3336                     tc = *e;
3337                     *e = '\0';
3338                     if (c != 'l' && c != 'u')
3339                         copy = dupstring(tt);
3340                     *e = tc;
3341                     switch (c) {
3342                     case 'a':
3343                         chabspath(&copy);
3344                         break;
3345                     case 'A':
3346                         chrealpath(&copy);
3347                         break;
3348                     case 'h':
3349                         remtpath(&copy);
3350                         break;
3351                     case 'r':
3352                         remtext(&copy);
3353                         break;
3354                     case 'e':
3355                         rembutext(&copy);
3356                         break;
3357                     case 't':
3358                         remlpaths(&copy);
3359                         break;
3360                     case 'l':
3361                         copy = casemodify(tt, CASMOD_LOWER);
3362                         break;
3363                     case 'u':
3364                         copy = casemodify(tt, CASMOD_UPPER);
3365                         break;
3366                     case 's':
3367                         if (hsubl && hsubr)
3368                             subst(&copy, hsubl, hsubr, gbal);
3369                         break;
3370                     case 'q':
3371                         copy = quotestring(copy, NULL, QT_BACKSLASH);
3372                         break;
3373                     case 'Q':
3374                         {
3375                             int one = noerrs, oef = errflag;
3376
3377                             noerrs = 1;
3378                             parse_subst_string(copy);
3379                             noerrs = one;
3380                             errflag = oef;
3381                             remnulargs(copy);
3382                             untokenize(copy);
3383                         }
3384                         break;
3385                     }
3386                     tc = *tt;
3387                     *tt = '\0';
3388                     nl = al + strlen(t) + strlen(copy);
3389                     ptr1 = tmp = (char *)zhalloc(nl + 1);
3390                     if (all)
3391                         for (ptr2 = all; *ptr2;)
3392                             *ptr1++ = *ptr2++;
3393                     for (ptr2 = t; *ptr2;)
3394                         *ptr1++ = *ptr2++;
3395                     *tt = tc;
3396                     for (ptr2 = copy; *ptr2;)
3397                         *ptr1++ = *ptr2++;
3398                     *ptr1 = '\0';
3399                     al = nl;
3400                     all = tmp;
3401                     t = e;
3402                 }
3403                 *str = all;
3404
3405             } else {
3406                 switch (c) {
3407                 case 'a':
3408                     chabspath(str);
3409                     break;
3410                 case 'A':
3411                     chrealpath(str);
3412                     break;
3413                 case 'h':
3414                     remtpath(str);
3415                     break;
3416                 case 'r':
3417                     remtext(str);
3418                     break;
3419                 case 'e':
3420                     rembutext(str);
3421                     break;
3422                 case 't':
3423                     remlpaths(str);
3424                     break;
3425                 case 'l':
3426                     *str = casemodify(*str, CASMOD_LOWER);
3427                     break;
3428                 case 'u':
3429                     *str = casemodify(*str, CASMOD_UPPER);
3430                     break;
3431                 case 's':
3432                     if (hsubl && hsubr)
3433                         subst(str, hsubl, hsubr, gbal);
3434                     break;
3435                 case 'q':
3436                     *str = quotestring(*str, NULL, QT_BACKSLASH);
3437                     break;
3438                 case 'Q':
3439                     {
3440                         int one = noerrs, oef = errflag;
3441
3442                         noerrs = 1;
3443                         parse_subst_string(*str);
3444                         noerrs = one;
3445                         errflag = oef;
3446                         remnulargs(*str);
3447                         untokenize(*str);
3448                     }
3449                     break;
3450                 }
3451             }
3452             if (rec < 0) {
3453                 if (!strcmp(test, *str))
3454                     rec = 0;
3455                 else
3456                     test = dupstring(*str);
3457             }
3458         }
3459     }
3460 }
3461
3462 /* get a directory stack entry */
3463
3464 /**/
3465 static char *
3466 dstackent(char ch, int val)
3467 {
3468     int backwards;
3469     LinkNode end=(LinkNode)dirstack, n;
3470
3471     backwards = ch == (isset(PUSHDMINUS) ? '+' : '-');
3472     if(!backwards && !val--)
3473         return pwd;
3474     if (backwards)
3475         for (n=lastnode(dirstack); n != end && val; val--, n=prevnode(n));
3476     else
3477         for (end=NULL, n=firstnode(dirstack); n && val; val--, n=nextnode(n));
3478     if (n == end) {
3479         if (backwards && !val)
3480             return pwd;
3481         if (isset(NOMATCH))
3482             zerr("not enough directory stack entries.");
3483         return NULL;
3484     }
3485     return (char *)getdata(n);
3486 }