26763: fix problem on failed cd -s to relative path
[zsh.git] / Src / subst.c
blob5033dd492148a9c4696ff593404f699c79e5bcfa
1 /*
2 * subst.c - various substitutions
4 * This file is part of zsh, the Z shell.
6 * Copyright (c) 1992-1997 Paul Falstad
7 * All rights reserved.
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
15 * In no event shall Paul Falstad or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Paul Falstad and the Zsh Development Group have been advised of
19 * the possibility of such damage.
21 * Paul Falstad and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose. The software
24 * provided hereunder is on an "as is" basis, and Paul Falstad and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
30 #include "zsh.mdh"
31 #include "subst.pro"
33 #define LF_ARRAY 1
35 /**/
36 char nulstring[] = {Nularg, '\0'};
38 /* Do substitutions before fork. These are:
39 * - Process substitution: <(...), >(...), =(...)
40 * - Parameter substitution
41 * - Command substitution
42 * Followed by
43 * - Quote removal
44 * - Brace expansion
45 * - Tilde and equals substitution
47 * PF_* flags are defined in zsh.h
50 /**/
51 mod_export void
52 prefork(LinkList list, int flags)
54 LinkNode node, stop = 0;
55 int keep = 0, asssub = (flags & PF_TYPESET) && isset(KSHTYPESET);
57 queue_signals();
58 for (node = firstnode(list); node; incnode(node)) {
59 if (isset(SHFILEEXPANSION)) {
61 * Here and below we avoid taking the address
62 * of a void * and then pretending it's a char **
63 * instead of a void ** by a little inefficiency.
64 * This could be avoided with some extra linked list
65 * machinery, but that would need quite a lot of work
66 * to ensure consistency. What we really need is
67 * templates...
69 char *cptr = (char *)getdata(node);
70 filesub(&cptr, flags & (PF_TYPESET|PF_ASSIGN));
72 * The assignment is so simple it's not worth
73 * testing if cptr changed...
75 setdata(node, cptr);
77 if (!(node = stringsubst(list, node, flags & PF_SINGLE, asssub))) {
78 unqueue_signals();
79 return;
82 for (node = firstnode(list); node; incnode(node)) {
83 if (node == stop)
84 keep = 0;
85 if (*(char *)getdata(node)) {
86 remnulargs(getdata(node));
87 if (unset(IGNOREBRACES) && !(flags & PF_SINGLE)) {
88 if (!keep)
89 stop = nextnode(node);
90 while (hasbraces(getdata(node))) {
91 keep = 1;
92 xpandbraces(list, &node);
95 if (unset(SHFILEEXPANSION)) {
96 char *cptr = (char *)getdata(node);
97 filesub(&cptr, flags & (PF_TYPESET|PF_ASSIGN));
98 setdata(node, cptr);
100 } else if (!(flags & PF_SINGLE) && !keep)
101 uremnode(list, node);
102 if (errflag) {
103 unqueue_signals();
104 return;
107 unqueue_signals();
111 * Perform $'...' quoting. The arguments are
112 * strstart The start of the string
113 * pstrdpos Initially, *pstrdpos is the position where the $ of the $'
114 * occurs. It will be updated to the next character after the
115 * last ' of the $'...'.
116 * The return value is the entire allocated string from strstart on the heap.
117 * Note the original string may be modified in the process.
119 /**/
120 static char *
121 stringsubstquote(char *strstart, char **pstrdpos)
123 int len;
124 char *strdpos = *pstrdpos, *strsub, *strret;
126 strsub = getkeystring(strdpos+2, &len,
127 GETKEYS_DOLLARS_QUOTE, NULL);
128 len += 2; /* measured from strdpos */
130 if (strstart != strdpos) {
131 *strdpos = '\0';
132 if (strdpos[len])
133 strret = zhtricat(strstart, strsub, strdpos + len);
134 else
135 strret = dyncat(strstart, strsub);
136 } else if (strdpos[len])
137 strret = dyncat(strsub, strdpos + len);
138 else
139 strret = strsub;
141 *pstrdpos = strret + (strdpos - strstart) + strlen(strsub);
143 return strret;
146 /**/
147 static LinkNode
148 stringsubst(LinkList list, LinkNode node, int ssub, int asssub)
150 int qt;
151 char *str3 = (char *)getdata(node);
152 char *str = str3, c;
154 while (!errflag && (c = *str)) {
155 if (((c = *str) == Inang || c == OutangProc ||
156 (str == str3 && c == Equals))
157 && str[1] == Inpar) {
158 char *subst, *rest, *snew, *sptr;
159 int str3len = str - str3, sublen, restlen;
161 if (c == Inang || c == OutangProc)
162 subst = getproc(str, &rest); /* <(...) or >(...) */
163 else
164 subst = getoutputfile(str, &rest); /* =(...) */
165 if (!subst)
166 subst = "";
168 sublen = strlen(subst);
169 restlen = strlen(rest);
170 sptr = snew = hcalloc(str3len + sublen + restlen + 1);
171 if (str3len) {
172 memcpy(sptr, str3, str3len);
173 sptr += str3len;
175 if (sublen) {
176 memcpy(sptr, subst, sublen);
177 sptr += sublen;
179 if (restlen)
180 memcpy(sptr, rest, restlen);
181 sptr[restlen] = '\0';
182 str3 = snew;
183 str = snew + str3len + sublen;
184 setdata(node, str3);
185 } else
186 str++;
188 str = str3;
190 while (!errflag && (c = *str)) {
191 if ((qt = c == Qstring) || c == String) {
192 if ((c = str[1]) == Inpar) {
193 if (!qt)
194 list->list.flags |= LF_ARRAY;
195 str++;
196 goto comsub;
197 } else if (c == Inbrack) {
198 /* $[...] */
199 char *str2 = str;
200 str2++;
201 if (skipparens(Inbrack, Outbrack, &str2)) {
202 zerr("closing bracket missing");
203 return NULL;
205 str2[-1] = *str = '\0';
206 str = arithsubst(str + 2, &str3, str2);
207 setdata(node, (void *) str3);
208 continue;
209 } else if (c == Snull) {
210 str3 = stringsubstquote(str3, &str);
211 setdata(node, (void *) str3);
212 continue;
213 } else {
214 node = paramsubst(list, node, &str, qt, ssub);
215 if (errflag || !node)
216 return NULL;
217 str3 = (char *)getdata(node);
218 continue;
220 } else if ((qt = c == Qtick) || (c == Tick ? (list->list.flags |= LF_ARRAY) : 0))
221 comsub: {
222 LinkList pl;
223 char *s, *str2 = str;
224 char endchar;
225 int l1, l2;
227 if (c == Inpar) {
228 endchar = Outpar;
229 str[-1] = '\0';
230 #ifdef DEBUG
231 if (skipparens(Inpar, Outpar, &str))
232 dputs("BUG: parse error in command substitution");
233 #else
234 skipparens(Inpar, Outpar, &str);
235 #endif
236 str--;
237 } else {
238 endchar = c;
239 *str = '\0';
241 while (*++str != endchar)
242 DPUTS(!*str, "BUG: parse error in command substitution");
244 *str++ = '\0';
245 if (endchar == Outpar && str2[1] == '(' && str[-2] == ')') {
246 /* Math substitution of the form $((...)) */
247 str[-2] = '\0';
248 str = arithsubst(str2 + 2, &str3, str);
249 setdata(node, (void *) str3);
250 continue;
253 /* It is a command substitution, which will be parsed again *
254 * by the lexer, so we untokenize it first, but we cannot use *
255 * untokenize() since in the case of `...` some Bnulls should *
256 * be left unchanged. Note that the lexer doesn't tokenize *
257 * the body of a command substitution so if there are some *
258 * tokens here they are from a ${(e)~...} substitution. */
259 for (str = str2; (c = *++str); )
260 if (itok(c) && c != Nularg &&
261 !(endchar != Outpar && c == Bnull &&
262 (str[1] == '$' || str[1] == '\\' || str[1] == '`' ||
263 (qt && str[1] == '"'))))
264 *str = ztokens[c - Pound];
265 str++;
266 if (!(pl = getoutput(str2 + 1, qt || ssub))) {
267 zerr("parse error in command substitution");
268 return NULL;
270 if (endchar == Outpar)
271 str2--;
272 if (!(s = (char *) ugetnode(pl))) {
273 str = strcpy(str2, str);
274 continue;
276 if (!qt && ssub && isset(GLOBSUBST))
277 shtokenize(s);
278 l1 = str2 - str3;
279 l2 = strlen(s);
280 if (nonempty(pl)) {
281 LinkNode n = lastnode(pl);
282 str2 = (char *) hcalloc(l1 + l2 + 1);
283 strcpy(str2, str3);
284 strcpy(str2 + l1, s);
285 setdata(node, str2);
286 insertlinklist(pl, node, list);
287 s = (char *) getdata(node = n);
288 l1 = 0;
289 l2 = strlen(s);
291 str2 = (char *) hcalloc(l1 + l2 + strlen(str) + 1);
292 if (l1)
293 strcpy(str2, str3);
294 strcpy(str2 + l1, s);
295 str = strcpy(str2 + l1 + l2, str);
296 str3 = str2;
297 setdata(node, str3);
298 continue;
299 } else if (asssub && ((c == '=') || c == Equals) && str != str3) {
301 * We are in a normal argument which looks like an assignment
302 * and is to be treated like one, with no word splitting.
304 ssub = 1;
306 str++;
308 return errflag ? NULL : node;
312 * Simplified version of the prefork/singsub processing where
313 * we only do substitutions appropriate to quoting. Currently
314 * this means only the expansions in $'....'. This is used
315 * for the end tag for here documents. As we are not doing
316 * `...` expansions, we just use those for quoting. However,
317 * they stay in the text. This is weird, but that's not
318 * my fault.
320 * The remnulargs() makes this consistent with the other forms
321 * of substitution, indicating that quotes have been fully
322 * processed.
324 * The fully processed string is returned.
327 /**/
328 char *
329 quotesubst(char *str)
331 char *s = str;
333 while (*s) {
334 if (*s == String && s[1] == Snull) {
335 str = stringsubstquote(str, &s);
336 } else {
337 s++;
340 remnulargs(str);
341 return str;
344 /**/
345 mod_export void
346 globlist(LinkList list, int nountok)
348 LinkNode node, next;
350 badcshglob = 0;
351 for (node = firstnode(list); !errflag && node; node = next) {
352 next = nextnode(node);
353 zglob(list, node, nountok);
355 if (badcshglob == 1)
356 zerr("no match");
359 /* perform substitution on a single word */
361 /**/
362 mod_export void
363 singsub(char **s)
365 local_list1(foo);
367 init_list1(foo, *s);
369 prefork(&foo, PF_SINGLE);
370 if (errflag)
371 return;
372 *s = (char *) ugetnode(&foo);
373 DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!");
376 /* Perform substitution on a single word, *s. Unlike with singsub(), the
377 * result can be more than one word. If split is non-zero, the string is
378 * first word-split using IFS, but only for non-quoted "whitespace" (as
379 * indicated by Dnull, Snull, Tick, Bnull, Inpar, and Outpar).
381 * If arg "a" was non-NULL and we got an array as a result of the parsing,
382 * the strings are stored in *a (even for a 1-element array) and *isarr is
383 * set to 1. Otherwise, *isarr is set to 0, and the result is put into *s,
384 * with any necessary joining of multiple elements using sep (which can be
385 * NULL to use IFS). The return value is true iff the expansion resulted
386 * in an empty list. */
388 /**/
389 static int
390 multsub(char **s, int split, char ***a, int *isarr, char *sep)
392 int l;
393 char **r, **p, *x = *s;
394 local_list1(foo);
396 if (split) {
398 * This doesn't handle multibyte characters, but we're
399 * looking for whitespace separators which must be ASCII.
401 for ( ; *x; x += l) {
402 char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
403 l++;
404 if (!iwsep(STOUC(c)))
405 break;
409 init_list1(foo, x);
411 if (split) {
412 LinkNode n = firstnode(&foo);
413 int inq = 0, inp = 0;
414 MB_METACHARINIT();
415 for ( ; *x; x += l) {
416 int rawc = -1;
417 convchar_t c;
418 if (itok(STOUC(*x))) {
419 /* token, can't be separator, must be single byte */
420 rawc = *x;
421 l = 1;
422 } else {
423 l = MB_METACHARLENCONV(x, &c);
424 if (!inq && !inp && WC_ZISTYPE(c, ISEP)) {
425 *x = '\0';
426 for (x += l; *x; x += l) {
427 if (itok(STOUC(*x))) {
428 /* as above */
429 rawc = *x;
430 l = 1;
431 break;
433 l = MB_METACHARLENCONV(x, &c);
434 if (!WC_ZISTYPE(c, ISEP))
435 break;
437 if (!*x)
438 break;
439 insertlinknode(&foo, n, (void *)x), incnode(n);
442 switch (rawc) {
443 case Dnull: /* " */
444 case Snull: /* ' */
445 case Tick: /* ` (note: no Qtick!) */
446 /* These always occur in unnested pairs. */
447 inq = !inq;
448 break;
449 case Inpar: /* ( */
450 inp++;
451 break;
452 case Outpar: /* ) */
453 inp--;
454 break;
455 case Bnull: /* \ */
456 case Bnullkeep:
457 /* The parser verified the following char's existence. */
458 x += l;
459 l = MB_METACHARLEN(x);
460 break;
465 prefork(&foo, 0);
466 if (errflag) {
467 if (isarr)
468 *isarr = 0;
469 return 0;
472 if ((l = countlinknodes(&foo)) > 1 || (foo.list.flags & LF_ARRAY && a)) {
473 p = r = hcalloc((l + 1) * sizeof(char*));
474 while (nonempty(&foo))
475 *p++ = (char *)ugetnode(&foo);
476 *p = NULL;
477 /* We need a way to figure out if a one-item result was a scalar
478 * or a single-item array. The parser will have set LF_ARRAY
479 * in the latter case, allowing us to return it as an array to
480 * our caller (if they provided for that result). */
481 if (a && (l > 1 || foo.list.flags & LF_ARRAY)) {
482 *a = r;
483 *isarr = SCANPM_MATCHMANY;
484 return 0;
486 *s = sepjoin(r, sep, 1);
487 if (isarr)
488 *isarr = 0;
489 return 0;
491 if (l)
492 *s = (char *) ugetnode(&foo);
493 else
494 *s = dupstring("");
495 if (isarr)
496 *isarr = 0;
497 return !l;
501 * ~, = subs: assign & PF_TYPESET => typeset or magic equals
502 * assign & PF_ASSIGN => normal assignment
505 /**/
506 mod_export void
507 filesub(char **namptr, int assign)
509 char *eql = NULL, *sub = NULL, *str, *ptr;
510 int len;
512 filesubstr(namptr, assign);
514 if (!assign)
515 return;
517 if (assign & PF_TYPESET) {
518 if ((*namptr)[1] && (eql = sub = strchr(*namptr + 1, Equals))) {
519 str = sub + 1;
520 if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) {
521 sub[1] = '\0';
522 *namptr = dyncat(*namptr, str);
524 } else
525 return;
528 ptr = *namptr;
529 while ((sub = strchr(ptr, ':'))) {
530 str = sub + 1;
531 len = sub - *namptr;
532 if (sub > eql &&
533 (sub[1] == Tilde || sub[1] == Equals) &&
534 filesubstr(&str, assign)) {
535 sub[1] = '\0';
536 *namptr = dyncat(*namptr, str);
538 ptr = *namptr + len + 1;
542 /**/
543 mod_export int
544 filesubstr(char **namptr, int assign)
546 #define isend(c) ( !(c) || (c)=='/' || (c)==Inpar || (assign && (c)==':') )
547 #define isend2(c) ( !(c) || (c)==Inpar || (assign && (c)==':') )
548 char *str = *namptr;
550 if (*str == Tilde && str[1] != '=' && str[1] != Equals) {
551 Shfunc dirfunc;
552 char *ptr, *tmp, *res, *ptr2;
553 int val;
555 val = zstrtol(str + 1, &ptr, 10);
556 if (isend(str[1])) { /* ~ */
557 *namptr = dyncat(home ? home : "", str + 1);
558 return 1;
559 } else if (str[1] == '+' && isend(str[2])) { /* ~+ */
560 *namptr = dyncat(pwd, str + 2);
561 return 1;
562 } else if (str[1] == '-' && isend(str[2])) { /* ~- */
563 *namptr = dyncat((tmp = oldpwd) ? tmp : pwd, str + 2);
564 return 1;
565 } else if (str[1] == Inbrack &&
566 (dirfunc = getshfunc("zsh_directory_name")) &&
567 (ptr2 = strchr(str+2, Outbrack))) {
568 char **arr;
569 untokenize(tmp = dupstrpfx(str+2, ptr2 - (str+2)));
570 remnulargs(tmp);
571 arr = subst_string_by_func(dirfunc, "n", tmp);
572 res = arr ? *arr : NULL;
573 if (res) {
574 *namptr = dyncat(res, ptr2+1);
575 return 1;
577 if (isset(NOMATCH))
578 zerr("no directory expansion: ~[%s]", tmp);
579 return 0;
580 } else if (!inblank(str[1]) && isend(*ptr) &&
581 (!idigit(str[1]) || (ptr - str < 4))) {
582 char *ds;
584 if (val < 0)
585 val = -val;
586 ds = dstackent(str[1], val);
587 if (!ds)
588 return 0;
589 *namptr = dyncat(ds, ptr);
590 return 1;
591 } else if ((ptr = itype_end(str+1, IUSER, 0)) != str+1) { /* ~foo */
592 char *hom, save;
594 save = *ptr;
595 if (!isend(save))
596 return 0;
597 *ptr = 0;
598 if (!(hom = getnameddir(++str))) {
599 if (isset(NOMATCH))
600 zerr("no such user or named directory: %s", str);
601 *ptr = save;
602 return 0;
604 *ptr = save;
605 *namptr = dyncat(hom, ptr);
606 return 1;
608 } else if (*str == Equals && isset(EQUALS) && str[1]) { /* =foo */
609 char *pp, *cnam, *cmdstr, *str1 = str+1;
611 for (pp = str1; !isend2(*pp); pp++)
613 cmdstr = dupstrpfx(str1, pp-str1);
614 untokenize(cmdstr);
615 remnulargs(cmdstr);
616 if (!(cnam = findcmd(cmdstr, 1))) {
617 if (isset(NOMATCH))
618 zerr("%s not found", cmdstr);
619 return 0;
621 *namptr = dupstring(cnam);
622 if (*pp)
623 *namptr = dyncat(*namptr, pp);
624 return 1;
626 return 0;
627 #undef isend
628 #undef isend2
631 /**/
632 static char *
633 strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub,
634 int copied)
636 char *dest;
637 int pl = pe - pb;
639 if (!pl && (!s || !*s)) {
640 *d = dest = (copied ? src : dupstring(src));
641 if (glbsub)
642 shtokenize(dest);
643 } else {
644 *d = dest = hcalloc(pl + l + (s ? strlen(s) : 0) + 1);
645 strncpy(dest, pb, pl);
646 dest += pl;
647 strcpy(dest, src);
648 if (glbsub)
649 shtokenize(dest);
650 dest += l;
651 if (s)
652 strcpy(dest, s);
654 return dest;
658 * Pad the string str, returning a result from the heap (or str itself,
659 * if it didn't need padding). If str is too large, it will be truncated.
660 * Calculations are in terms of width if MULTIBYTE is in effect and
661 * multi_width is non-zero, else characters.
663 * prenum and postnum are the width to which the string needs padding
664 * on the left and right.
666 * preone and postone are string to insert once only before and after
667 * str. They will be truncated on the left or right, respectively,
668 * if necessary to fit the width. Either or both may be NULL in which
669 * case they will not be used.
671 * premul and postmul are the padding strings to be repeated before
672 * on the left (if prenum is non-zero) and right (if postnum is non-zero). If
673 * NULL the first character of IFS (typically but not necessarily a space)
674 * will be used.
677 static char *
678 dopadding(char *str, int prenum, int postnum, char *preone, char *postone,
679 char *premul, char *postmul
680 #ifdef MULTIBYTE_SUPPORT
681 , int multi_width
682 #endif
685 #ifdef MULTIBYTE_SUPPORT
686 #define WCPADWIDTH(cchar) (multi_width ? WCWIDTH(cchar) : 1)
687 #else
688 #define WCPADWIDTH(cchar) (1)
689 #endif
691 char *def, *ret, *t, *r;
692 int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc, cl;
693 convchar_t cchar;
695 MB_METACHARINIT();
696 if (*ifs)
697 def = dupstrpfx(ifs, MB_METACHARLEN(ifs));
698 else
699 def = "";
700 if (preone && !*preone)
701 preone = def;
702 if (postone && !*postone)
703 postone = def;
704 if (!premul || !*premul)
705 premul = def;
706 if (!postmul || !*postmul)
707 postmul = def;
709 ls = MB_METASTRLEN2(str, multi_width);
710 lpreone = preone ? MB_METASTRLEN2(preone, multi_width) : 0;
711 lpostone = postone ? MB_METASTRLEN2(postone, multi_width) : 0;
712 lpremul = MB_METASTRLEN2(premul, multi_width);
713 lpostmul = MB_METASTRLEN2(postmul, multi_width);
715 if (prenum + postnum == ls)
716 return str;
719 * Try to be careful with allocated lengths. The following
720 * is a maximum, in case we need the entire repeated string
721 * for each repetition. We probably don't, but in case the user
722 * has given us something pathological which doesn't convert
723 * easily into a width we'd better be safe.
725 lr = strlen(str) + strlen(premul) * prenum + strlen(postmul) * postnum;
727 * Same logic for preone and postone, except those may be NULL.
729 if (preone)
730 lr += strlen(preone);
731 if (postone)
732 lr += strlen(postone);
733 r = ret = (char *)zhalloc(lr + 1);
735 if (prenum) {
737 * Pad on the left.
739 if (postnum) {
741 * Pad on both right and left.
742 * The strategy is to divide the string into two halves.
743 * The first half is dealt with by the left hand padding
744 * code, the second by the right hand.
746 ls2 = ls / 2;
748 /* The width left to pad for the first half. */
749 f = prenum - ls2;
750 if (f <= 0) {
751 /* First half doesn't fit. Skip the first -f width. */
752 f = -f;
753 MB_METACHARINIT();
754 while (f > 0) {
755 str += MB_METACHARLENCONV(str, &cchar);
756 f -= WCPADWIDTH(cchar);
758 /* Now finish the first half. */
759 for (c = prenum; c > 0; ) {
760 cl = MB_METACHARLENCONV(str, &cchar);
761 while (cl--)
762 *r++ = *str++;
763 c -= WCPADWIDTH(cchar);
765 } else {
766 if (f <= lpreone) {
767 if (preone) {
769 * The unrepeated string doesn't fit.
771 MB_METACHARINIT();
772 /* The width we need to skip */
773 f = lpreone - f;
774 /* So skip. */
775 for (t = preone; f > 0; ) {
776 t += MB_METACHARLENCONV(t, &cchar);
777 f -= WCPADWIDTH(cchar);
779 /* Then copy the entire remainder. */
780 while (*t)
781 *r++ = *t++;
783 } else {
784 f -= lpreone;
785 if (lpremul) {
786 if ((m = f % lpremul)) {
788 * Left over fraction of repeated string.
790 MB_METACHARINIT();
791 /* Skip this much. */
792 m = lpremul - m;
793 for (t = premul; m > 0; ) {
794 t += MB_METACHARLENCONV(t, &cchar);
795 m -= WCPADWIDTH(cchar);
797 /* Output the rest. */
798 while (*t)
799 *r++ = *t++;
801 for (cc = f / lpremul; cc--;) {
802 /* Repeat the repeated string */
803 MB_METACHARINIT();
804 for (c = lpremul, t = premul; c > 0; ) {
805 cl = MB_METACHARLENCONV(t, &cchar);
806 while (cl--)
807 *r++ = *t++;
808 c -= WCPADWIDTH(cchar);
812 if (preone) {
813 /* Output the full unrepeated string */
814 while (*preone)
815 *r++ = *preone++;
818 /* Output the first half width of the original string. */
819 for (c = ls2; c > 0; ) {
820 cl = MB_METACHARLENCONV(str, &cchar);
821 c -= WCPADWIDTH(cchar);
822 while (cl--)
823 *r++ = *str++;
826 /* Other half. In case the string had an odd length... */
827 ls2 = ls - ls2;
828 /* Width that needs padding... */
829 f = postnum - ls2;
830 if (f <= 0) {
831 /* ...is negative, truncate original string */
832 MB_METACHARINIT();
833 for (c = postnum; c > 0; ) {
834 cl = MB_METACHARLENCONV(str, &cchar);
835 c -= WCPADWIDTH(cchar);
836 while (cl--)
837 *r++ = *str++;
839 } else {
840 /* Rest of original string fits, output it complete */
841 while (*str)
842 *r++ = *str++;
843 if (f <= lpostone) {
844 if (postone) {
845 /* Can't fit unrepeated string, truncate it */
846 for (c = f; c > 0; ) {
847 cl = MB_METACHARLENCONV(postone, &cchar);
848 c -= WCPADWIDTH(cchar);
849 while (cl--)
850 *r++ = *postone++;
853 } else {
854 if (postone) {
855 f -= lpostone;
856 /* Output entire unrepeated string */
857 while (*postone)
858 *r++ = *postone++;
860 if (lpostmul) {
861 for (cc = f / lpostmul; cc--;) {
862 /* Begin the beguine */
863 for (t = postmul; *t; )
864 *r++ = *t++;
866 if ((m = f % lpostmul)) {
867 /* Fill leftovers with chunk of repeated string */
868 MB_METACHARINIT();
869 while (m > 0) {
870 cl = MB_METACHARLENCONV(postmul, &cchar);
871 m -= WCPADWIDTH(cchar);
872 while (cl--)
873 *r++ = *postmul++;
879 } else {
881 * Pad only on the left.
883 f = prenum - ls;
884 if (f <= 0) {
886 * Original string is at least as wide as padding.
887 * Truncate original string to width.
888 * Truncate on left, so skip the characters we
889 * don't need.
891 f = -f;
892 MB_METACHARINIT();
893 while (f > 0) {
894 str += MB_METACHARLENCONV(str, &cchar);
895 f -= WCPADWIDTH(cchar);
897 /* Copy the rest of the original string */
898 for (c = prenum; c > 0; ) {
899 cl = MB_METACHARLENCONV(str, &cchar);
900 while (cl--)
901 *r++ = *str++;
902 c -= WCPADWIDTH(cchar);
904 } else {
906 * We can fit the entire string...
908 if (f <= lpreone) {
909 if (preone) {
911 * ...with some fraction of the unrepeated string.
913 /* We need this width of characters. */
914 c = f;
916 * We therefore need to skip this width of
917 * characters.
919 f = lpreone - f;
920 MB_METACHARINIT();
921 for (t = preone; f > 0; ) {
922 t += MB_METACHARLENCONV(t, &cchar);
923 f -= WCPADWIDTH(cchar);
925 /* Copy the rest of preone */
926 while (*t)
927 *r++ = *t++;
929 } else {
931 * We can fit the whole of preone, needing this width
932 * first
934 f -= lpreone;
935 if (lpremul) {
936 if ((m = f % lpremul)) {
938 * Some fraction of the repeated string needed.
940 /* Need this much... */
941 c = m;
942 /* ...skipping this much first. */
943 m = lpremul - m;
944 MB_METACHARINIT();
945 for (t = premul; m > 0; ) {
946 t += MB_METACHARLENCONV(t, &cchar);
947 m -= WCPADWIDTH(cchar);
949 /* Now the rest of the repeated string. */
950 while (c > 0) {
951 cl = MB_METACHARLENCONV(t, &cchar);
952 while (cl--)
953 *r++ = *t++;
954 c -= WCPADWIDTH(cchar);
957 for (cc = f / lpremul; cc--;) {
959 * Repeat the repeated string.
961 MB_METACHARINIT();
962 for (c = lpremul, t = premul; c > 0; ) {
963 cl = MB_METACHARLENCONV(t, &cchar);
964 while (cl--)
965 *r++ = *t++;
966 c -= WCPADWIDTH(cchar);
970 if (preone) {
972 * Now the entire unrepeated string. Don't
973 * count the width, just dump it. This is
974 * significant if there are special characters
975 * in this string. It's sort of a historical
976 * accident that this worked, but there's nothing
977 * to stop us just dumping the thing out and assuming
978 * the user knows what they're doing.
980 while (*preone)
981 *r++ = *preone++;
984 /* Now the string being padded */
985 while (*str)
986 *r++ = *str++;
989 } else if (postnum) {
991 * Pad on the right.
993 f = postnum - ls;
994 MB_METACHARINIT();
995 if (f <= 0) {
997 * Original string is at least as wide as padding.
998 * Truncate original string to width.
1000 for (c = postnum; c > 0; ) {
1001 cl = MB_METACHARLENCONV(str, &cchar);
1002 while (cl--)
1003 *r++ = *str++;
1004 c -= WCPADWIDTH(cchar);
1006 } else {
1008 * There's some space to fill. First copy the original
1009 * string, counting the width. Make sure we copy the
1010 * entire string.
1012 for (c = ls; *str; ) {
1013 cl = MB_METACHARLENCONV(str, &cchar);
1014 while (cl--)
1015 *r++ = *str++;
1016 c -= WCPADWIDTH(cchar);
1018 MB_METACHARINIT();
1019 if (f <= lpostone) {
1020 if (postone) {
1022 * Not enough or only just enough space to fit
1023 * the unrepeated string. Truncate as necessary.
1025 for (c = f; c > 0; ) {
1026 cl = MB_METACHARLENCONV(postone, &cchar);
1027 while (cl--)
1028 *r++ = *postone++;
1029 c -= WCPADWIDTH(cchar);
1032 } else {
1033 if (postone) {
1034 f -= lpostone;
1035 /* Copy the entire unrepeated string */
1036 for (c = lpostone; *postone; ) {
1037 cl = MB_METACHARLENCONV(postone, &cchar);
1038 while (cl--)
1039 *r++ = *postone++;
1040 c -= WCPADWIDTH(cchar);
1043 if (lpostmul) {
1044 /* Repeat the repeated string */
1045 for (cc = f / lpostmul; cc--;) {
1046 MB_METACHARINIT();
1047 for (c = lpostmul, t = postmul; *t; ) {
1048 cl = MB_METACHARLENCONV(t, &cchar);
1049 while (cl--)
1050 *r++ = *t++;
1051 c -= WCPADWIDTH(cchar);
1055 * See if there's any fraction of the repeated
1056 * string needed to fill up the remaining space.
1058 if ((m = f % lpostmul)) {
1059 MB_METACHARINIT();
1060 while (m > 0) {
1061 cl = MB_METACHARLENCONV(postmul, &cchar);
1062 while (cl--)
1063 *r++ = *postmul++;
1064 m -= WCPADWIDTH(cchar);
1071 *r = '\0';
1073 return ret;
1078 * Look for a delimited portion of a string. The first (possibly
1079 * multibyte) character at s is the delimiter. Various forms
1080 * of brackets are treated separately, as documented.
1082 * Returns a pointer to the final delimiter. Sets *len to the
1083 * length of the final delimiter; a NULL causes *len to be set
1084 * to zero since we shouldn't advance past it. (The string is
1085 * tokenized, so a NULL is a real end of string.)
1088 /**/
1089 char *
1090 get_strarg(char *s, int *lenp)
1092 convchar_t del;
1093 int len;
1094 char tok = 0;
1096 MB_METACHARINIT();
1097 len = MB_METACHARLENCONV(s, &del);
1098 if (!len) {
1099 *lenp = 0;
1100 return s;
1103 #ifdef MULTIBYTE_SUPPORT
1104 if (del == WEOF)
1105 del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
1106 #endif
1107 s += len;
1108 switch (del) {
1109 case ZWC('('):
1110 del = ZWC(')');
1111 break;
1112 case '[':
1113 del = ZWC(']');
1114 break;
1115 case '{':
1116 del = ZWC('}');
1117 break;
1118 case '<':
1119 del = ZWC('>');
1120 break;
1121 case Inpar:
1122 tok = Outpar;
1123 break;
1124 case Inang:
1125 tok = Outang;
1126 break;
1127 case Inbrace:
1128 tok = Outbrace;
1129 break;
1130 case Inbrack:
1131 tok = Outbrack;
1132 break;
1135 if (tok) {
1137 * Looking for a matching token; we want the literal byte,
1138 * not a decoded multibyte character, so search specially.
1140 while (*s && *s != tok)
1141 s++;
1142 } else {
1143 convchar_t del2;
1144 len = 0;
1145 while (*s) {
1146 len = MB_METACHARLENCONV(s, &del2);
1147 #ifdef MULTIBYTE_SUPPORT
1148 if (del2 == WEOF)
1149 del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
1150 #endif
1151 if (del == del2)
1152 break;
1153 s += len;
1157 *lenp = len;
1158 return s;
1162 * Get an integer argument; update *s to the end of the
1163 * final delimiter. *delmatchp is set to the length of the
1164 * matched delimiter if we have matching, delimiters and there was no error in
1165 * the evaluation, else 0.
1168 /**/
1169 static int
1170 get_intarg(char **s, int *delmatchp)
1172 int arglen;
1173 char *t = get_strarg(*s, &arglen);
1174 char *p, sav;
1175 zlong ret;
1177 *delmatchp = 0;
1178 if (!*t)
1179 return -1;
1180 sav = *t;
1181 *t = '\0';
1182 p = dupstring(*s + arglen);
1183 *s = t + arglen;
1184 *t = sav;
1185 if (parsestr(p))
1186 return -1;
1187 singsub(&p);
1188 if (errflag)
1189 return -1;
1190 ret = mathevali(p);
1191 if (errflag)
1192 return -1;
1193 if (ret < 0)
1194 ret = -ret;
1195 *delmatchp = arglen;
1196 return ret < 0 ? -ret : ret;
1199 /* Parsing for the (e) flag. */
1201 static int
1202 subst_parse_str(char **sp, int single, int err)
1204 char *s;
1206 *sp = s = dupstring(*sp);
1208 if (!(err ? parsestr(s) : parsestrnoerr(s))) {
1209 if (!single) {
1210 int qt = 0;
1212 for (; *s; s++)
1213 if (!qt) {
1214 if (*s == Qstring)
1215 *s = String;
1216 else if (*s == Qtick)
1217 *s = Tick;
1218 } else if (*s == Dnull)
1219 qt = !qt;
1221 return 0;
1223 return 1;
1226 /* Evaluation for (#) flag */
1228 static char *
1229 substevalchar(char *ptr)
1231 zlong ires = mathevali(ptr);
1232 int len = 0;
1234 if (errflag)
1235 return NULL;
1236 #ifdef MULTIBYTE_SUPPORT
1237 if (isset(MULTIBYTE) && ires > 127) {
1238 /* '\\' + 'U' + 8 bytes of character + '\0' */
1239 char buf[11];
1241 /* inefficient: should separate out \U handling from getkeystring */
1242 sprintf(buf, "\\U%.8x", (unsigned int)ires & 0xFFFFFFFFu);
1243 ptr = getkeystring(buf, &len, GETKEYS_BINDKEY, NULL);
1245 if (len == 0)
1246 #endif
1248 ptr = zhalloc(2);
1249 len = 1;
1250 sprintf(ptr, "%c", (int)ires);
1252 return metafy(ptr, len, META_USEHEAP);
1256 * Helper function for arguments to parameter flags which
1257 * handles the (p) and (~) flags as escapes and tok_arg respectively.
1260 static char *
1261 untok_and_escape(char *s, int escapes, int tok_arg)
1263 int klen;
1264 char *dst;
1266 untokenize(dst = dupstring(s));
1267 if (escapes) {
1268 dst = getkeystring(dst, &klen, GETKEYS_SEP, NULL);
1269 dst = metafy(dst, klen, META_HREALLOC);
1271 if (tok_arg)
1272 shtokenize(dst);
1273 return dst;
1276 /* parameter substitution */
1278 #define isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring)
1279 #define isbrack(c) ((c) == '[' || (char)(c) == Inbrack)
1282 * Given a linked list l with node n, perform parameter substitution
1283 * starting from *str. Return the node with the substitutuion performed
1284 * or NULL if it failed.
1286 * If qt is true, the `$' was quoted. TODO: why can't we just look
1287 * to see if the first character was String or Qstring?
1289 * If ssub is true, we are being called via singsubst(), which means
1290 * the result will be a single word. TODO: can we generate the
1291 * single word at the end? TODO: if not, or maybe in any case,
1292 * can we pass down the ssub flag from prefork with the other flags
1293 * instead of pushing it into different arguments? (How exactly
1294 * to qt and ssub differ? Are both necessary, if so is there some
1295 * better way of separating the two?)
1298 /**/
1299 static LinkNode
1300 paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
1302 char *aptr = *str, c, cc;
1303 char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n);
1304 int colf; /* != 0 means we found a colon after the name */
1306 * There are far too many flags. They need to be grouped
1307 * together into some structure which ties them to where they
1308 * came from.
1310 * Some flags have a an obscure relationship to their effect which
1311 * depends on incrementing them to particular values in particular
1312 * ways.
1315 * Whether the value is an array (in aval) or not (in val). There's
1316 * a movement from storing the value in the stuff read from the
1317 * parameter (the value v) to storing them in val and aval.
1318 * However, sometimes you find v reappearing temporarily.
1320 * The values -1 and 2 are special to isarr. The value -1 is used
1321 * to force us to keep an empty array. It's tested in the YUK chunk
1322 * (I mean the one explicitly marked as such). The value 2
1323 * indicates an array has come from splitting a scalar. We use
1324 * that to override the usual rule that in double quotes we don't
1325 * remove empty elements (so "${(s.:):-foo::bar}" produces two
1326 * words). This seems to me to be quite the wrong thing to do,
1327 * but it looks like code may be relying on it. So we require (@)
1328 * as well before we keep the empty fields (look for assignments
1329 * like "isarr = nojoin ? 1 : 2").
1331 int isarr = 0;
1333 * This is just the setting of the option except we need to
1334 * take account of ^ and ^^.
1336 int plan9 = isset(RCEXPANDPARAM);
1338 * Likwise, but with ~ and ~~. Also, we turn it off later
1339 * on if qt is passed down.
1341 int globsubst = isset(GLOBSUBST);
1343 * Indicates ${(#)...}.
1345 int evalchar = 0;
1347 * Indicates ${#pm}, massaged by whichlen which is set by
1348 * the (c), (w), and (W) flags to indicate how we take the length.
1350 int getlen = 0;
1351 int whichlen = 0;
1353 * Indicates ${+pm}: a simple boolean for once.
1355 int chkset = 0;
1357 * Indicates we have tried to get a value in v but that was
1358 * unset. I don't quite understand why (v == NULL) isn't
1359 * good enough, but there are places where we seem to need
1360 * to second guess whether a value is a real value or not.
1362 int vunset = 0;
1364 * Indicates (t) flag, i.e. print out types. The code for
1365 * this actually isn't too horrifically inbred compared with
1366 * that for (P).
1368 int wantt = 0;
1370 * Indicates spliting a string into an array. There aren't
1371 * actually that many special cases for this --- which may
1372 * be why it doesn't work properly; we split in some cases
1373 * where we shouldn't, in particular on the multsubs for
1374 * handling embedded values for ${...=...} and the like.
1376 int spbreak = isset(SHWORDSPLIT) && !ssub && !qt;
1377 /* Scalar and array value, see isarr above */
1378 char *val = NULL, **aval = NULL;
1380 * vbuf and v are both used to retrieve parameter values; this
1381 * is a kludge, we pass down vbuf and it may or may not return v.
1383 struct value vbuf;
1384 Value v = NULL;
1386 * This expressive name refers to the set of flags which
1387 * is applied to matching for #, %, / and their doubled variants:
1388 * (M), (R), (B), (E), (N), (S).
1390 int flags = 0;
1391 /* Value from (I) flag, used for ditto. */
1392 int flnum = 0;
1394 * sortit is to be passed to strmetasort().
1395 * indord is the (a) flag, which for consistency doesn't get
1396 * combined into sortit.
1398 int sortit = SORTIT_ANYOLDHOW, indord = 0;
1399 /* (u): straightforward. */
1400 int unique = 0;
1401 /* combination of (L), (U) and (C) flags. */
1402 int casmod = CASMOD_NONE;
1404 * quotemod says we are doing either (q) (positive), (Q) (negative)
1405 * or not (0). quotetype counts the q's for the first case.
1406 * quoterr is simply (X) but gets passed around a lot because the
1407 * combination (eX) needs it.
1409 int quotemod = 0, quotetype = QT_NONE, quoteerr = 0;
1411 * (V) flag: fairly straightforward, except that as with so
1412 * many flags it's not easy to decide where to put it in the order.
1414 int visiblemod = 0;
1416 * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied
1417 * spbreak, see above; fairly straighforward in use but c.f.
1418 * the comment for visiblemod.
1420 int shsplit = 0;
1422 * The separator from (j) and (s) respectively, or (F) and (f)
1423 * respectively (hardwired to "\n" in that case). Slightly
1424 * confusingly also used for ${#pm}, thought that's at least
1425 * documented in the manual
1427 char *sep = NULL, *spsep = NULL;
1429 * Padding strings. The left and right padding strings which
1430 * are repeated, then the ones which only occur once, for
1431 * the (l) and (r) flags.
1433 char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL;
1434 /* Replacement string for /orig/repl and //orig/repl */
1435 char *replstr = NULL;
1436 /* The numbers for (l) and (r) */
1437 zlong prenum = 0, postnum = 0;
1438 #ifdef MULTIBYTE_SUPPORT
1439 /* The (m) flag: use width of multibyte characters */
1440 int multi_width = 0;
1441 #endif
1443 * Whether the value has been copied. Optimisation: if we
1444 * are modifying an expression, we only need to copy it the
1445 * first time, and if we don't modify it we can just use the
1446 * value from the parameter or input.
1448 int copied = 0;
1450 * The (A) flag for array assignment, with consequences for
1451 * splitting and joining; (AA) gives arrasg == 2 for associative
1452 * arrays.
1454 int arrasg = 0;
1456 * The (e) flag. As we need to do extra work not quite
1457 * at the end, the effect of this is kludged in in several places.
1459 int eval = 0;
1461 * The (P) flag. This interacts a bit obscurely with whether
1462 * or not we are dealing with a sub expression (subexp).
1464 int aspar = 0;
1466 * The (%) flag, c.f. visiblemod again.
1468 int presc = 0;
1470 * The (@) flag; interacts obscurely with qt and isarr.
1471 * This is one of the things that decides whether multsub
1472 * will produce an array, but in an extremely indirect fashion.
1474 int nojoin = 0;
1476 * != 0 means ${...}, otherwise $... What works without braces
1477 * is largely a historical artefact (everything works with braces,
1478 * I sincerely hope).
1480 char inbrace = 0;
1482 * Use for the (k) flag. Goes down into the parameter code,
1483 * sometimes.
1485 char hkeys = 0;
1487 * Used for the (v) flag, ditto. Not quite sure why they're
1488 * separate, but the tradition seems to be that things only
1489 * get combined when that makes the result more obscure rather
1490 * than less.
1492 char hvals = 0;
1494 * Whether we had to evaluate a subexpression, i.e. an
1495 * internal ${...} or $(...) or plain $pm. We almost don't
1496 * need to remember this (which would be neater), but the (P)
1497 * flag means the subexp and !subexp code is obscurely combined,
1498 * and the argument passing to fetchvalue has another kludge.
1500 int subexp;
1502 *s++ = '\0';
1504 * Nothing to do unless the character following the $ is
1505 * something we recognise.
1507 * Shouldn't this be a table or something? We test for all
1508 * these later on, too.
1510 c = *s;
1511 if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound &&
1512 c != '-' && c != '!' && c != '$' && c != String && c != Qstring &&
1513 c != '?' && c != Quest &&
1514 c != '*' && c != Star && c != '@' && c != '{' &&
1515 c != Inbrace && c != '=' && c != Equals && c != Hat &&
1516 c != '^' && c != '~' && c != Tilde && c != '+') {
1517 s[-1] = '$';
1518 *str = s;
1519 return n;
1521 DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()");
1523 * Extra processing if there is an opening brace: mostly
1524 * flags in parentheses, but also one ksh hack.
1526 if (c == Inbrace) {
1527 inbrace = 1;
1528 s++;
1530 * In ksh emulation a leading `!' is a special flag working
1531 * sort of like our (k).
1532 * TODO: this is one of very few cases tied directly to
1533 * the emulation mode rather than an option. Since ksh
1534 * doesn't have parameter flags it might be neater to
1535 * handle this with the ^, =, ~ stuff, below.
1537 if ((c = *s) == '!' && s[1] != Outbrace && EMULATION(EMULATE_KSH)) {
1538 hkeys = SCANPM_WANTKEYS;
1539 s++;
1540 } else if (c == '(' || c == Inpar) {
1541 char *t, sav;
1542 int tt = 0;
1543 zlong num;
1545 * The (p) flag is only remembered within
1546 * this block. It says we do print-style handling
1547 * on the values for flags, but only on those.
1549 int escapes = 0;
1551 * '~' in parentheses caused tokenization of string arg:
1552 * similar to (p).
1554 int tok_arg = 0;
1556 for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) {
1557 int arglen; /* length of modifier argument */
1558 int dellen; /* length of matched delimiter, 0 if not */
1559 char *del0; /* pointer to initial delimiter */
1561 switch (c) {
1562 case ')':
1563 case Outpar:
1564 break;
1565 case '~':
1566 case Tilde:
1567 tok_arg = !tok_arg;
1568 break;
1569 case 'A':
1570 ++arrasg;
1571 break;
1572 case '@':
1573 nojoin = 1;
1574 break;
1575 case 'M':
1576 flags |= SUB_MATCH;
1577 break;
1578 case 'R':
1579 flags |= SUB_REST;
1580 break;
1581 case 'B':
1582 flags |= SUB_BIND;
1583 break;
1584 case 'E':
1585 flags |= SUB_EIND;
1586 break;
1587 case 'N':
1588 flags |= SUB_LEN;
1589 break;
1590 case 'S':
1591 flags |= SUB_SUBSTR;
1592 break;
1593 case 'I':
1594 s++;
1595 flnum = get_intarg(&s, &dellen);
1596 if (flnum < 0)
1597 goto flagerr;
1598 s--;
1599 break;
1601 case 'L':
1602 casmod = CASMOD_LOWER;
1603 break;
1604 case 'U':
1605 casmod = CASMOD_UPPER;
1606 break;
1607 case 'C':
1608 casmod = CASMOD_CAPS;
1609 break;
1611 case 'o':
1612 if (!sortit)
1613 sortit |= SORTIT_SOMEHOW; /* sort, no modifiers */
1614 break;
1615 case 'O':
1616 sortit |= SORTIT_BACKWARDS;
1617 break;
1618 case 'i':
1619 sortit |= SORTIT_IGNORING_CASE;
1620 break;
1621 case 'n':
1622 sortit |= SORTIT_NUMERICALLY;
1623 break;
1624 case 'a':
1625 sortit |= SORTIT_SOMEHOW;
1626 indord = 1;
1627 break;
1629 case 'V':
1630 visiblemod++;
1631 break;
1633 case 'q':
1634 quotemod++, quotetype++;
1635 break;
1636 case 'Q':
1637 quotemod--;
1638 break;
1639 case 'X':
1640 quoteerr = 1;
1641 break;
1643 case 'e':
1644 eval = 1;
1645 break;
1646 case 'P':
1647 aspar = 1;
1648 break;
1650 case 'c':
1651 whichlen = 1;
1652 break;
1653 case 'w':
1654 whichlen = 2;
1655 break;
1656 case 'W':
1657 whichlen = 3;
1658 break;
1660 case 'f':
1661 spsep = "\n";
1662 break;
1663 case 'F':
1664 sep = "\n";
1665 break;
1667 case '0':
1668 spsep = zhalloc(3);
1669 spsep[0] = Meta;
1670 spsep[1] = '\0' ^ 32;
1671 spsep[2] = '\0';
1672 break;
1674 case 's':
1675 tt = 1;
1676 /* fall through */
1677 case 'j':
1678 t = get_strarg(++s, &arglen);
1679 if (*t) {
1680 sav = *t;
1681 *t = '\0';
1682 if (tt)
1683 spsep = untok_and_escape(s + arglen,
1684 escapes, tok_arg);
1685 else
1686 sep = untok_and_escape(s + arglen,
1687 escapes, tok_arg);
1688 *t = sav;
1689 s = t + arglen - 1;
1690 } else
1691 goto flagerr;
1692 break;
1694 case 'l':
1695 tt = 1;
1696 /* fall through */
1697 case 'r':
1698 s++;
1699 /* delimiter position */
1700 del0 = s;
1701 num = get_intarg(&s, &dellen);
1702 if (num < 0)
1703 goto flagerr;
1704 if (tt)
1705 prenum = num;
1706 else
1707 postnum = num;
1708 /* must have same delimiter if more arguments */
1709 if (!dellen || memcmp(del0, s, dellen)) {
1710 /* decrement since loop will increment */
1711 s--;
1712 break;
1714 t = get_strarg(s, &arglen);
1715 if (!*t)
1716 goto flagerr;
1717 sav = *t;
1718 *t = '\0';
1719 if (tt)
1720 premul = untok_and_escape(s + arglen, escapes,
1721 tok_arg);
1722 else
1723 postmul = untok_and_escape(s + arglen, escapes,
1724 tok_arg);
1725 *t = sav;
1726 sav = *s;
1727 s = t + arglen;
1728 /* again, continue only if another start delimiter */
1729 if (memcmp(del0, s, dellen)) {
1730 /* decrement since loop will increment */
1731 s--;
1732 break;
1734 t = get_strarg(s, &arglen);
1735 if (!*t)
1736 goto flagerr;
1737 sav = *t;
1738 *t = '\0';
1739 if (tt)
1740 preone = untok_and_escape(s + arglen,
1741 escapes, tok_arg);
1742 else
1743 postone = untok_and_escape(s + arglen,
1744 escapes, tok_arg);
1745 *t = sav;
1746 /* -1 since loop will increment */
1747 s = t + arglen - 1;
1748 break;
1750 case 'm':
1751 #ifdef MULTIBYTE_SUPPORT
1752 multi_width = 1;
1753 #endif
1754 break;
1756 case 'p':
1757 escapes = 1;
1758 break;
1760 case 'k':
1761 hkeys = SCANPM_WANTKEYS;
1762 break;
1763 case 'v':
1764 hvals = SCANPM_WANTVALS;
1765 break;
1767 case 't':
1768 wantt = 1;
1769 break;
1771 case '%':
1772 presc++;
1773 break;
1775 case 'z':
1776 shsplit = 1;
1777 break;
1779 case 'u':
1780 unique = 1;
1781 break;
1783 case '#':
1784 case Pound:
1785 evalchar = 1;
1786 break;
1788 default:
1789 flagerr:
1790 zerr("error in flags");
1791 return NULL;
1794 s++;
1799 * premul, postmul specify the padding character to be used
1800 * multiple times with the (l) and (r) flags respectively.
1802 if (!premul)
1803 premul = " ";
1804 if (!postmul)
1805 postmul = " ";
1808 * Look for special unparenthesised flags.
1809 * TODO: could make these able to appear inside parentheses, too,
1810 * i.e. ${(^)...} etc.
1812 for (;;) {
1813 if ((c = *s) == '^' || c == Hat) {
1814 /* RC_EXPAND_PARAM on or off (doubled )*/
1815 if ((c = *++s) == '^' || c == Hat) {
1816 plan9 = 0;
1817 s++;
1818 } else
1819 plan9 = 1;
1820 } else if ((c = *s) == '=' || c == Equals) {
1821 /* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */
1822 if ((c = *++s) == '=' || c == Equals) {
1823 spbreak = 0;
1824 s++;
1825 } else
1826 spbreak = 2;
1827 } else if ((c == '#' || c == Pound) &&
1828 (itype_end(s+1, IIDENT, 0) != s + 1
1829 || (cc = s[1]) == '*' || cc == Star || cc == '@'
1830 || cc == '-' || (cc == ':' && s[2] == '-')
1831 || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
1832 getlen = 1 + whichlen, s++;
1834 * Return the length of the parameter.
1835 * getlen can be more than 1 to indicate characters (2),
1836 * words ignoring multiple delimiters (3), words taking
1837 * account of multiple delimiters. delimiter is in
1838 * spsep, NULL means $IFS.
1840 } else if (c == '~' || c == Tilde) {
1841 /* GLOB_SUBST on or off (doubled) */
1842 if ((c = *++s) == '~' || c == Tilde) {
1843 globsubst = 0;
1844 s++;
1845 } else
1846 globsubst = 1;
1847 } else if (c == '+') {
1849 * Return whether indicated parameter is set.
1850 * Try to handle this when parameter is named
1851 * by (P) (second part of test).
1853 if (itype_end(s+1, IIDENT, 0) != s+1 || (aspar && isstring(s[1]) &&
1854 (s[2] == Inbrace || s[2] == Inpar)))
1855 chkset = 1, s++;
1856 else if (!inbrace) {
1857 /* Special case for `$+' on its own --- leave unmodified */
1858 *aptr = '$';
1859 *str = aptr + 1;
1860 return n;
1861 } else {
1862 zerr("bad substitution");
1863 return NULL;
1865 } else if (inbrace && inull(*s)) {
1867 * Handles things like ${(f)"$(<file)"} by skipping
1868 * the double quotes. We don't need to know what was
1869 * actually there; the presence of a String or Qstring
1870 * is good enough.
1872 s++;
1873 } else
1874 break;
1876 /* Don't activate special pattern characters if inside quotes */
1877 globsubst = globsubst && !qt;
1880 * At this point, we usually expect a parameter name.
1881 * However, there may be a nested ${...} or $(...).
1882 * These say that the parameter itself is somewhere inside,
1883 * or that there isn't a parameter and we will get the values
1884 * from a command substitution itself. In either case,
1885 * the current instance of paramsubst() doesn't fetch a value,
1886 * it just operates on what gets passed up.
1887 * (The first ought to have been {...}, reserving ${...}
1888 * for substituting a value at that point, but it's too late now.)
1890 idbeg = s;
1891 if ((subexp = (inbrace && s[-1] && isstring(*s) &&
1892 (s[1] == Inbrace || s[1] == Inpar)))) {
1893 int sav;
1894 int quoted = *s == Qstring;
1896 val = s++;
1897 skipparens(*s, *s == Inpar ? Outpar : Outbrace, &s);
1898 sav = *s;
1899 *s = 0;
1901 * This handles arrays. TODO: this is not the most obscure call to
1902 * multsub() (see below) but even so it would be nicer to pass down
1903 * and back the arrayness more rationally. In that case, we should
1904 * remove the aspar test and extract a value from an array, if
1905 * necessary, when we handle (P) lower down.
1907 if (multsub(&val, 0, (aspar ? NULL : &aval), &isarr, NULL) && quoted) {
1908 /* Empty quoted string --- treat as null string, not elided */
1909 isarr = -1;
1910 aval = (char **) hcalloc(sizeof(char *));
1911 aspar = 0;
1912 } else if (aspar)
1913 idbeg = val;
1914 *s = sav;
1916 * This tests for the second double quote in an expression
1917 * like ${(f)"$(<file)"}, compare above.
1919 while (inull(*s))
1920 s++;
1921 v = (Value) NULL;
1922 } else if (aspar) {
1924 * No subexpression, but in any case the value is going
1925 * to give us the name of a parameter on which we do
1926 * our remaining processing. In other words, this
1927 * makes ${(P)param} work like ${(P)${param}}. (Probably
1928 * better looked at, this is the basic code for ${(P)param}
1929 * and it's been kludged into the subexp code because no
1930 * opportunity for a kludge has been neglected.)
1932 if ((v = fetchvalue(&vbuf, &s, 1, (qt ? SCANPM_DQUOTED : 0)))) {
1933 val = idbeg = getstrvalue(v);
1934 subexp = 1;
1935 } else
1936 vunset = 1;
1939 * We need to retrieve a value either if we haven't already
1940 * got it from a subexpression, or if the processing so
1941 * far has just yielded us a parameter name to be processed
1942 * with (P).
1944 if (!subexp || aspar) {
1945 char *ov = val;
1948 * Second argument: decide whether to use the subexpression or
1949 * the string next on the line as the parameter name.
1950 * Third argument: decide how processing for brackets
1951 * 1 means full processing
1952 * -1 appears to mean something along the lines of
1953 * only handle single digits and don't handle brackets.
1954 * I *think* (but it's really only a guess) that this
1955 * is used by the test below the wantt handling, so
1956 * that in certain cases we handle brackets there.
1957 * 0 would apparently mean something like we know we
1958 * should have the name of a scalar and we get cross
1959 * if there's anything present which disagrees with that
1960 * but you will search fetchvalue() in vain for comments on this.
1961 * Fourth argument gives flags to do with keys, values, quoting,
1962 * assigning depending on context and parameter flags.
1964 * This is the last mention of subexp, so presumably this
1965 * is what the code which makes sure subexp is set if aspar (the
1966 * (P) flag) is set. I *think* what's going on here is the
1967 * second argument is for both input and output: with
1968 * subexp, we only want the input effect, whereas normally
1969 * we let fetchvalue set the main string pointer s to
1970 * the end of the bit it's fetched.
1972 if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
1973 (wantt ? -1 :
1974 ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
1975 hkeys|hvals|
1976 (arrasg ? SCANPM_ASSIGNING : 0)|
1977 (qt ? SCANPM_DQUOTED : 0))) ||
1978 (v->pm && (v->pm->node.flags & PM_UNSET)) ||
1979 (v->flags & VALFLAG_EMPTY))
1980 vunset = 1;
1982 if (wantt) {
1984 * Handle the (t) flag: value now becomes the type
1985 * information for the parameter.
1987 if (v && v->pm && !(v->pm->node.flags & PM_UNSET)) {
1988 int f = v->pm->node.flags;
1990 switch (PM_TYPE(f)) {
1991 case PM_SCALAR: val = "scalar"; break;
1992 case PM_ARRAY: val = "array"; break;
1993 case PM_INTEGER: val = "integer"; break;
1994 case PM_EFLOAT:
1995 case PM_FFLOAT: val = "float"; break;
1996 case PM_HASHED: val = "association"; break;
1998 val = dupstring(val);
1999 if (v->pm->level)
2000 val = dyncat(val, "-local");
2001 if (f & PM_LEFT)
2002 val = dyncat(val, "-left");
2003 if (f & PM_RIGHT_B)
2004 val = dyncat(val, "-right_blanks");
2005 if (f & PM_RIGHT_Z)
2006 val = dyncat(val, "-right_zeros");
2007 if (f & PM_LOWER)
2008 val = dyncat(val, "-lower");
2009 if (f & PM_UPPER)
2010 val = dyncat(val, "-upper");
2011 if (f & PM_READONLY)
2012 val = dyncat(val, "-readonly");
2013 if (f & PM_TAGGED)
2014 val = dyncat(val, "-tag");
2015 if (f & PM_EXPORTED)
2016 val = dyncat(val, "-export");
2017 if (f & PM_UNIQUE)
2018 val = dyncat(val, "-unique");
2019 if (f & PM_HIDE)
2020 val = dyncat(val, "-hide");
2021 if (f & PM_HIDE)
2022 val = dyncat(val, "-hideval");
2023 if (f & PM_SPECIAL)
2024 val = dyncat(val, "-special");
2025 vunset = 0;
2026 } else
2027 val = dupstring("");
2029 v = NULL;
2030 isarr = 0;
2034 * We get in here two ways; either we need to convert v into
2035 * the local value system, or we need to get rid of brackets
2036 * even if there isn't a v.
2038 while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) {
2039 if (!v) {
2041 * Index applied to non-existent parameter; we may or may
2042 * not have a value to index, however. Create a temporary
2043 * empty parameter as a trick, and index on that. This
2044 * usually happens the second time around the loop when
2045 * we've used up the original parameter value and want to
2046 * apply a subscript to what's left. However, it's also
2047 * possible it's got something to do with some of that murky
2048 * passing of -1's as the third argument to fetchvalue() to
2049 * inhibit bracket parsing at that stage.
2051 Param pm;
2052 char *os = s;
2054 if (!isbrack(*s))
2055 break;
2056 if (vunset) {
2057 val = dupstring("");
2058 isarr = 0;
2060 pm = createparam(nulstring, isarr ? PM_ARRAY : PM_SCALAR);
2061 DPUTS(!pm, "BUG: parameter not created");
2062 if (isarr)
2063 pm->u.arr = aval;
2064 else
2065 pm->u.str = val;
2066 v = (Value) hcalloc(sizeof *v);
2067 v->isarr = isarr;
2068 v->pm = pm;
2069 v->end = -1;
2070 if (getindex(&s, v, qt ? SCANPM_DQUOTED : 0) || s == os)
2071 break;
2074 * This is where we extract a value (we know now we have
2075 * one) into the local parameters for a scalar (val) or
2076 * array (aval) value. TODO: move val and aval into
2077 * a structure with a discriminator. Hope we can make
2078 * more things array values at this point and dearrayify later.
2079 * v->isarr tells us whether the stuff from down below looks
2080 * like an array.
2082 * I think we get to discard the existing value of isarr
2083 * here because it's already been taken account of, either
2084 * in the subexp stuff or immediately above.
2086 if ((isarr = v->isarr)) {
2088 * No way to get here with v->flags & VALFLAG_INV, so
2089 * getvaluearr() is called by getarrvalue(); needn't test
2090 * PM_HASHED.
2092 if (v->isarr == SCANPM_WANTINDEX) {
2093 isarr = v->isarr = 0;
2094 val = dupstring(v->pm->node.nam);
2095 } else
2096 aval = getarrvalue(v);
2097 } else {
2098 /* Value retrieved from parameter/subexpression is scalar */
2099 if (v->pm->node.flags & PM_ARRAY) {
2101 * Although the value is a scalar, the parameter
2102 * itself is an array. Presumably this is due to
2103 * being quoted, or doing single substitution or something,
2104 * TODO: we're about to do some definitely stringy
2105 * stuff, so something like this bit is probably
2106 * necessary. However, I'd like to leave any
2107 * necessary joining of arrays until this point
2108 * to avoid the multsub() horror.
2110 int tmplen = arrlen(v->pm->gsu.a->getfn(v->pm));
2112 if (v->start < 0)
2113 v->start += tmplen + ((v->flags & VALFLAG_INV) ? 1 : 0);
2114 if (!(v->flags & VALFLAG_INV) &&
2115 (v->start >= tmplen || v->start < 0))
2116 vunset = 1;
2118 if (!vunset) {
2120 * There really is a value. Padding and case
2121 * transformations used to be handled here, but
2122 * are now handled in getstrvalue() for greater
2123 * consistency. However, we get unexpected effects
2124 * if we allow them to applied on every call, so
2125 * set the flag that allows them to be substituted.
2127 v->flags |= VALFLAG_SUBST;
2128 val = getstrvalue(v);
2132 * Finished with the original parameter and its indices;
2133 * carry on looping to see if we need to do more indexing.
2134 * This means we final get rid of v in favour of val and
2135 * aval. We could do with somehow encapsulating the bit
2136 * where we need v.
2138 v = NULL;
2139 if (!inbrace)
2140 break;
2143 * We're now past the name or subexpression; the only things
2144 * which can happen now are a closing brace, one of the standard
2145 * parameter postmodifiers, or a history-style colon-modifier.
2147 * Again, this duplicates tests for characters we're about to
2148 * examine properly later on.
2150 if (inbrace &&
2151 (c = *s) != '-' && c != '+' && c != ':' && c != '%' && c != '/' &&
2152 c != '=' && c != Equals &&
2153 c != '#' && c != Pound &&
2154 c != '?' && c != Quest &&
2155 c != '}' && c != Outbrace) {
2156 zerr("bad substitution");
2157 return NULL;
2160 * Join arrays up if we're in quotes and there isn't some
2161 * override such as (@).
2162 * TODO: hmm, if we're called as part of some recursive
2163 * substitution do we want to delay this until we get back to
2164 * the top level? Or is if there's a qt (i.e. this parameter
2165 * substitution is in quotes) always good enough? Potentially
2166 * we may be OK by now --- all potential `@'s and subexpressions
2167 * have been handled, including any [@] index which comes up
2168 * by virture of v->isarr being set to SCANPM_ISVAR_AT which
2169 * is now in isarr.
2171 * However, if we are replacing multsub() with something that
2172 * doesn't mangle arrays, we may need to delay this step until after
2173 * the foo:- or foo:= or whatever that causes that. Note the value
2174 * (string or array) at this point is irrelevant if we are going to
2175 * be doing that. This would mean // and stuff get applied
2176 * arraywise even if quoted. That's probably wrong, so maybe
2177 * this just stays.
2179 * We do a separate stage of dearrayification in the YUK chunk,
2180 * I think mostly because of the way we make array or scalar
2181 * values appear to the caller.
2183 if (isarr) {
2184 if (nojoin)
2185 isarr = -1;
2186 if (qt && !getlen && isarr > 0) {
2187 val = sepjoin(aval, sep, 1);
2188 isarr = 0;
2192 idend = s;
2193 if (inbrace) {
2195 * This is to match a closing double quote in case
2196 * we didn't have a subexpression, e.g. ${"foo"}.
2197 * This form is pointless, but logically it ought to work.
2199 while (inull(*s))
2200 s++;
2203 * We don't yet know whether a `:' introduces a history-style
2204 * colon modifier or qualifies something like ${...:=...}.
2205 * But if we remember the colon here it's easy to check later.
2207 if ((colf = *s == ':'))
2208 s++;
2211 /* fstr is to be the text following the substitution. If we have *
2212 * braces, we look for it here, else we infer it later on. */
2213 fstr = s;
2214 if (inbrace) {
2215 int bct;
2216 for (bct = 1; (c = *fstr); fstr++) {
2217 if (c == Inbrace)
2218 bct++;
2219 else if (c == Outbrace && !--bct)
2220 break;
2223 if (bct) {
2224 noclosebrace:
2225 zerr("closing brace expected");
2226 return NULL;
2228 if (c)
2229 *fstr++ = '\0';
2232 /* Check for ${..?..} or ${..=..} or one of those. *
2233 * Only works if the name is in braces. */
2235 if (inbrace && ((c = *s) == '-' ||
2236 c == '+' ||
2237 c == ':' || /* i.e. a doubled colon */
2238 c == '=' || c == Equals ||
2239 c == '%' ||
2240 c == '#' || c == Pound ||
2241 c == '?' || c == Quest ||
2242 c == '/')) {
2245 * Default index is 1 if no (I) or (I) gave zero. But
2246 * why don't we set the default explicitly at the start
2247 * and massage any passed index where we set flnum anyway?
2249 if (!flnum)
2250 flnum++;
2251 if (c == '%')
2252 flags |= SUB_END;
2254 /* Check for ${..%%..} or ${..##..} */
2255 if ((c == '%' || c == '#' || c == Pound) && c == s[1]) {
2256 s++;
2257 /* we have %%, not %, or ##, not # */
2258 flags |= SUB_LONG;
2260 s++;
2261 if (s[-1] == '/') {
2262 char *ptr;
2264 * previous flags are irrelevant, except for (S) which
2265 * indicates shortest substring; else look for longest.
2267 flags = (flags & SUB_SUBSTR) ? 0 : SUB_LONG;
2268 if ((c = *s) == '/') {
2269 /* doubled, so replace all occurrences */
2270 flags |= SUB_GLOBAL;
2271 c = *++s;
2273 /* Check for anchored substitution */
2274 if (c == '#' || c == Pound) {
2276 * anchor at head: this is the `normal' case in
2277 * getmatch and we only require the flag if SUB_END
2278 * is also present.
2280 flags |= SUB_START;
2281 s++;
2283 if (*s == '%') {
2284 /* anchor at tail */
2285 flags |= SUB_END;
2286 s++;
2288 if (!(flags & (SUB_START|SUB_END))) {
2289 /* No anchor, so substring */
2290 flags |= SUB_SUBSTR;
2293 * Find the / marking the end of the search pattern.
2294 * If there isn't one, we're just going to delete that,
2295 * i.e. replace it with an empty string.
2297 * We used to use double backslashes to quote slashes,
2298 * but actually that was buggy and using a single backslash
2299 * is easier and more obvious.
2301 for (ptr = s; (c = *ptr) && c != '/'; ptr++)
2303 if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1])
2305 if (ptr[1] == '/')
2306 chuck(ptr);
2307 else
2308 ptr++;
2311 replstr = (*ptr && ptr[1]) ? ptr+1 : "";
2312 *ptr = '\0';
2315 /* See if this was ${...:-...}, ${...:=...}, etc. */
2316 if (colf)
2317 flags |= SUB_ALL;
2319 * With no special flags, i.e. just a # or % or whatever,
2320 * the matched portion is removed and we keep the rest.
2321 * We also want the rest when we're doing a substitution.
2323 if (!(flags & (SUB_MATCH|SUB_REST|SUB_BIND|SUB_EIND|SUB_LEN)))
2324 flags |= SUB_REST;
2326 if (colf && !vunset)
2327 vunset = (isarr) ? !*aval : !*val || (*val == Nularg && !val[1]);
2329 switch (s[-1]) {
2330 case '+':
2331 if (vunset) {
2332 val = dupstring("");
2333 copied = 1;
2334 isarr = 0;
2335 break;
2337 vunset = 1;
2338 /* Fall Through! */
2339 case '-':
2340 if (vunset) {
2341 int ws = opts[SHWORDSPLIT];
2342 val = dupstring(s);
2343 /* If word-splitting is enabled, we ask multsub() to split
2344 * the substituted string at unquoted whitespace. Then, we
2345 * turn off spbreak so that no further splitting occurs.
2346 * This allows a construct such as ${1+"$@"} to correctly
2347 * keep its array splits, and weird constructs such as
2348 * ${str+"one two" "3 2 1" foo "$str"} to only be split
2349 * at the unquoted spaces. */
2350 opts[SHWORDSPLIT] = spbreak;
2351 multsub(&val, spbreak && !aspar, (aspar ? NULL : &aval), &isarr, NULL);
2352 opts[SHWORDSPLIT] = ws;
2353 copied = 1;
2354 spbreak = 0;
2356 break;
2357 case ':':
2358 /* this must be `::=', unconditional assignment */
2359 if (*s != '=' && *s != Equals)
2360 goto noclosebrace;
2361 vunset = 1;
2362 s++;
2363 /* Fall through */
2364 case '=':
2365 case Equals:
2366 if (vunset) {
2367 int ws = opts[SHWORDSPLIT];
2368 char sav = *idend;
2369 int l;
2371 *idend = '\0';
2372 val = dupstring(s);
2373 if (spsep || !arrasg) {
2374 opts[SHWORDSPLIT] = 0;
2375 multsub(&val, 0, NULL, &isarr, NULL);
2376 } else {
2377 opts[SHWORDSPLIT] = spbreak;
2378 multsub(&val, spbreak, &aval, &isarr, NULL);
2379 spbreak = 0;
2381 opts[SHWORDSPLIT] = ws;
2382 if (arrasg) {
2383 /* This is an array assignment. */
2384 char *arr[2], **t, **a, **p;
2385 if (spsep || spbreak) {
2386 aval = sepsplit(val, spsep, 0, 1);
2387 isarr = nojoin ? 1 : 2;
2388 l = arrlen(aval);
2389 if (l && !*(aval[l-1]))
2390 l--;
2391 if (l && !**aval)
2392 l--, t = aval + 1;
2393 else
2394 t = aval;
2395 } else if (!isarr) {
2396 if (!*val && arrasg > 1) {
2397 arr[0] = NULL;
2398 l = 0;
2399 } else {
2400 arr[0] = val;
2401 arr[1] = NULL;
2402 l = 1;
2404 t = aval = arr;
2405 } else
2406 l = arrlen(aval), t = aval;
2407 p = a = zalloc(sizeof(char *) * (l + 1));
2408 while (l--) {
2409 untokenize(*t);
2410 *p++ = ztrdup(*t++);
2412 *p++ = NULL;
2413 if (arrasg > 1) {
2414 Param pm = sethparam(idbeg, a);
2415 if (pm)
2416 aval = paramvalarr(pm->gsu.h->getfn(pm), hkeys|hvals);
2417 } else
2418 setaparam(idbeg, a);
2419 } else {
2420 untokenize(val);
2421 setsparam(idbeg, ztrdup(val));
2423 *idend = sav;
2424 copied = 1;
2425 if (isarr) {
2426 if (nojoin)
2427 isarr = -1;
2428 if (qt && !getlen && isarr > 0 && !spsep && spbreak < 2) {
2429 val = sepjoin(aval, sep, 1);
2430 isarr = 0;
2432 sep = spsep = NULL;
2433 spbreak = 0;
2436 break;
2437 case '?':
2438 case Quest:
2439 if (vunset) {
2440 *idend = '\0';
2441 zerr("%s: %s", idbeg, *s ? s : "parameter not set");
2442 if (!interact) {
2443 if (mypid == getpid()) {
2445 * paranoia: don't check for jobs, but there shouldn't
2446 * be any if not interactive.
2448 stopmsg = 1;
2449 zexit(1, 0);
2450 } else
2451 _exit(1);
2453 return NULL;
2455 break;
2456 case '%':
2457 case '#':
2458 case Pound:
2459 case '/':
2460 /* This once was executed only `if (qt) ...'. But with that
2461 * patterns in a expansion resulting from a ${(e)...} aren't
2462 * tokenized even though this function thinks they are (it thinks
2463 * they are because parse_subst_str() turns Qstring tokens
2464 * into String tokens and for unquoted parameter expansions the
2465 * lexer normally does tokenize patterns inside parameter
2466 * expansions). */
2468 int one = noerrs, oef = errflag, haserr;
2470 if (!quoteerr)
2471 noerrs = 1;
2472 haserr = parse_subst_string(s);
2473 noerrs = one;
2474 if (!quoteerr) {
2475 errflag = oef;
2476 if (haserr)
2477 shtokenize(s);
2478 } else if (haserr || errflag) {
2479 zerr("parse error in ${...%c...} substitution", s[-1]);
2480 return NULL;
2484 #if 0
2486 * This allows # and % to be at the start of
2487 * a parameter in the substitution, which is
2488 * a bit nasty, and can be done (although
2489 * less efficiently) with anchors.
2492 char t = s[-1];
2494 singsub(&s);
2496 if (t == '/' && (flags & SUB_SUBSTR)) {
2497 if ((c = *s) == '#' || c == '%') {
2498 flags &= ~SUB_SUBSTR;
2499 if (c == '%')
2500 flags |= SUB_END;
2501 s++;
2502 } else if (c == '\\') {
2503 s++;
2506 #else
2507 singsub(&s);
2508 #endif
2512 * Either loop over an array doing replacements or
2513 * do the replacment on a string.
2515 * We need an untokenized value for matching.
2517 if (!vunset && isarr) {
2518 char **ap;
2519 if (!copied) {
2520 aval = arrdup(aval);
2521 copied = 1;
2523 for (ap = aval; *ap; ap++) {
2524 untokenize(*ap);
2526 getmatcharr(&aval, s, flags, flnum, replstr);
2527 } else {
2528 if (vunset)
2529 val = dupstring("");
2530 if (!copied) {
2531 val = dupstring(val);
2532 copied = 1;
2533 untokenize(val);
2535 getmatch(&val, s, flags, flnum, replstr);
2537 break;
2539 } else { /* no ${...=...} or anything, but possible modifiers. */
2541 * Handler ${+...}. TODO: strange, why do we handle this only
2542 * if there isn't a trailing modifier? Why don't we do this
2543 * e.g. when we handle the ${(t)...} flag?
2545 if (chkset) {
2546 val = dupstring(vunset ? "0" : "1");
2547 isarr = 0;
2548 } else if (vunset) {
2549 if (unset(UNSET)) {
2550 *idend = '\0';
2551 zerr("%s: parameter not set", idbeg);
2552 return NULL;
2554 val = dupstring("");
2556 if (colf) {
2558 * History style colon modifiers. May need to apply
2559 * on multiple elements of an array.
2561 s--;
2562 if (unset(KSHARRAYS) || inbrace) {
2563 if (!isarr)
2564 modify(&val, &s);
2565 else {
2566 char *ss;
2567 char **ap = aval;
2568 char **pp = aval = (char **) hcalloc(sizeof(char *) *
2569 (arrlen(aval) + 1));
2571 while ((*pp = *ap++)) {
2572 ss = s;
2573 modify(pp++, &ss);
2575 if (pp == aval) {
2576 char *t = "";
2577 ss = s;
2578 modify(&t, &ss);
2580 s = ss;
2582 copied = 1;
2583 if (inbrace && *s) {
2584 if (*s == ':' && !imeta(s[1]))
2585 zerr("unrecognized modifier `%c'", s[1]);
2586 else
2587 zerr("unrecognized modifier");
2588 return NULL;
2592 if (!inbrace)
2593 fstr = s;
2595 if (errflag)
2596 return NULL;
2597 if (evalchar) {
2598 int one = noerrs, oef = errflag, haserr = 0;
2600 if (!quoteerr)
2601 noerrs = 1;
2603 * Evaluate the value numerically and output the result as
2604 * a character.
2606 if (isarr) {
2607 char **aval2, **avptr, **av2ptr;
2609 aval2 = (char **)zhalloc((arrlen(aval)+1)*sizeof(char *));
2611 for (avptr = aval, av2ptr = aval2; *avptr; avptr++, av2ptr++)
2613 /* When noerrs = 1, the only error is out-of-memory */
2614 if (!(*av2ptr = substevalchar(*avptr))) {
2615 haserr = 1;
2616 break;
2619 *av2ptr = NULL;
2620 aval = aval2;
2621 } else {
2622 /* When noerrs = 1, the only error is out-of-memory */
2623 if (!(val = substevalchar(val)))
2624 haserr = 1;
2626 noerrs = one;
2627 if (!quoteerr)
2628 errflag = oef;
2629 if (haserr || errflag)
2630 return NULL;
2633 * This handles taking a length with ${#foo} and variations.
2634 * TODO: again. one might naively have thought this had the
2635 * same sort of effect as the ${(t)...} flag and the ${+...}
2636 * test, although in this case we do need the value rather
2637 * the the parameter, so maybe it's a bit different.
2639 if (getlen) {
2640 long len = 0;
2641 char buf[14];
2643 if (isarr) {
2644 char **ctr;
2645 int sl = sep ? MB_METASTRLEN(sep) : 1;
2647 if (getlen == 1)
2648 for (ctr = aval; *ctr; ctr++, len++);
2649 else if (getlen == 2) {
2650 if (*aval)
2651 for (len = -sl, ctr = aval;
2652 len += sl + MB_METASTRLEN2(*ctr, multi_width),
2653 *++ctr;);
2655 else
2656 for (ctr = aval;
2657 *ctr;
2658 len += wordcount(*ctr, spsep, getlen > 3), ctr++);
2659 } else {
2660 if (getlen < 3)
2661 len = MB_METASTRLEN2(val, multi_width);
2662 else
2663 len = wordcount(val, spsep, getlen > 3);
2666 sprintf(buf, "%ld", len);
2667 val = dupstring(buf);
2668 isarr = 0;
2670 /* At this point we make sure that our arrayness has affected the
2671 * arrayness of the linked list. Then, we can turn our value into
2672 * a scalar for convenience sake without affecting the arrayness
2673 * of the resulting value. */
2674 if (isarr)
2675 l->list.flags |= LF_ARRAY;
2676 else
2677 l->list.flags &= ~LF_ARRAY;
2678 if (isarr > 0 && !plan9 && (!aval || !aval[0])) {
2679 val = dupstring("");
2680 isarr = 0;
2681 } else if (isarr && aval && aval[0] && !aval[1]) {
2682 /* treat a one-element array as a scalar for purposes of *
2683 * concatenation with surrounding text (some${param}thing) *
2684 * and rc_expand_param handling. Note: LF_ARRAY (above) *
2685 * propagates the true array type from nested expansions. */
2686 val = aval[0];
2687 isarr = 0;
2689 /* This is where we may join arrays together, e.g. (j:,:) sets "sep", and
2690 * (afterward) may split the joined value (e.g. (s:-:) sets "spsep"). One
2691 * exception is that ${name:-word} and ${name:+word} will have already
2692 * done any requested splitting of the word value with quoting preserved.
2693 * "ssub" is true when we are called from singsub (via prefork):
2694 * it means that we must join arrays and should not split words. */
2695 if (ssub || spbreak || spsep || sep) {
2696 if (isarr) {
2697 val = sepjoin(aval, sep, 1);
2698 isarr = 0;
2700 if (!ssub && (spbreak || spsep)) {
2701 aval = sepsplit(val, spsep, 0, 1);
2702 if (!aval || !aval[0])
2703 val = dupstring("");
2704 else if (!aval[1])
2705 val = aval[0];
2706 else
2707 isarr = nojoin ? 1 : 2;
2709 if (isarr)
2710 l->list.flags |= LF_ARRAY;
2711 else
2712 l->list.flags &= ~LF_ARRAY;
2715 * Perform case modififications.
2717 if (casmod != CASMOD_NONE) {
2718 copied = 1; /* string is always modified by copy */
2719 if (isarr) {
2720 char **ap, **ap2;
2722 ap = aval;
2723 ap2 = aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1));
2725 while (*ap)
2726 *ap2++ = casemodify(*ap++, casmod);
2727 *ap2++ = NULL;
2728 } else {
2729 val = casemodify(val, casmod);
2733 * Perform prompt-style modifications.
2735 if (presc) {
2736 int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG];
2737 int opp = opts[PROMPTPERCENT];
2739 if (presc < 2) {
2740 opts[PROMPTPERCENT] = 1;
2741 opts[PROMPTSUBST] = opts[PROMPTBANG] = 0;
2744 * TODO: It would be really quite nice to abstract the
2745 * isarr and !issarr code into a function which gets
2746 * passed a pointer to a function with the effect of
2747 * the promptexpand bit. Then we could use this for
2748 * a lot of stuff and bury val/aval/isarr inside a structure
2749 * which gets passed to it.
2751 if (isarr) {
2752 char **ap;
2754 if (!copied)
2755 aval = arrdup(aval), copied = 1;
2756 ap = aval;
2757 for (; *ap; ap++) {
2758 char *tmps;
2759 untokenize(*ap);
2760 tmps = promptexpand(*ap, 0, NULL, NULL, NULL);
2761 *ap = dupstring(tmps);
2762 free(tmps);
2764 } else {
2765 char *tmps;
2766 if (!copied)
2767 val = dupstring(val), copied = 1;
2768 untokenize(val);
2769 tmps = promptexpand(val, 0, NULL, NULL, NULL);
2770 val = dupstring(tmps);
2771 free(tmps);
2773 opts[PROMPTSUBST] = ops;
2774 opts[PROMPTBANG] = opb;
2775 opts[PROMPTPERCENT] = opp;
2778 * One of the possible set of quotes to apply, depending on
2779 * the repetitions of the (q) flag.
2781 if (quotemod) {
2782 if (quotetype > QT_DOLLARS)
2783 quotetype = QT_DOLLARS;
2784 if (isarr) {
2785 char **ap;
2787 if (!copied)
2788 aval = arrdup(aval), copied = 1;
2789 ap = aval;
2791 if (quotemod > 0) {
2792 if (quotetype > QT_BACKSLASH) {
2793 int sl;
2794 char *tmp;
2796 for (; *ap; ap++) {
2797 int pre = quotetype != QT_DOLLARS ? 1 : 2;
2798 tmp = quotestring(*ap, NULL, quotetype);
2799 sl = strlen(tmp);
2800 *ap = (char *) zhalloc(pre + sl + 2);
2801 strcpy((*ap) + pre, tmp);
2802 ap[0][pre - 1] = ap[0][pre + sl] =
2803 (quotetype != QT_DOUBLE ? '\'' : '"');
2804 ap[0][pre + sl + 1] = '\0';
2805 if (quotetype == QT_DOLLARS)
2806 ap[0][0] = '$';
2808 } else
2809 for (; *ap; ap++)
2810 *ap = quotestring(*ap, NULL, QT_BACKSLASH);
2811 } else {
2812 int one = noerrs, oef = errflag, haserr = 0;
2814 if (!quoteerr)
2815 noerrs = 1;
2816 for (; *ap; ap++) {
2817 haserr |= parse_subst_string(*ap);
2818 remnulargs(*ap);
2819 untokenize(*ap);
2821 noerrs = one;
2822 if (!quoteerr)
2823 errflag = oef;
2824 else if (haserr || errflag) {
2825 zerr("parse error in parameter value");
2826 return NULL;
2829 } else {
2830 if (!copied)
2831 val = dupstring(val), copied = 1;
2832 if (quotemod > 0) {
2833 if (quotetype > QT_BACKSLASH) {
2834 int pre = quotetype != QT_DOLLARS ? 1 : 2;
2835 int sl;
2836 char *tmp;
2837 tmp = quotestring(val, NULL, quotetype);
2838 sl = strlen(tmp);
2839 val = (char *) zhalloc(pre + sl + 2);
2840 strcpy(val + pre, tmp);
2841 val[pre - 1] = val[pre + sl] =
2842 (quotetype != QT_DOUBLE ? '\'' : '"');
2843 val[pre + sl + 1] = '\0';
2844 if (quotetype == QT_DOLLARS)
2845 val[0] = '$';
2846 } else
2847 val = quotestring(val, NULL, QT_BACKSLASH);
2848 } else {
2849 int one = noerrs, oef = errflag, haserr;
2851 if (!quoteerr)
2852 noerrs = 1;
2853 haserr = parse_subst_string(val);
2854 noerrs = one;
2855 if (!quoteerr)
2856 errflag = oef;
2857 else if (haserr || errflag) {
2858 zerr("parse error in parameter value");
2859 return NULL;
2861 remnulargs(val);
2862 untokenize(val);
2867 * Transform special characters in the string to make them
2868 * printable.
2870 if (visiblemod) {
2871 if (isarr) {
2872 char **ap;
2873 if (!copied)
2874 aval = arrdup(aval), copied = 1;
2875 for (ap = aval; *ap; ap++)
2876 *ap = nicedupstring(*ap);
2877 } else {
2878 if (!copied)
2879 val = dupstring(val), copied = 1;
2880 val = nicedupstring(val);
2884 * Nothing particularly to do with SH_WORD_SPLIT --- this
2885 * performs lexical splitting on a string as specified by
2886 * the (z) flag.
2888 if (shsplit) {
2889 LinkList list = NULL;
2891 if (isarr) {
2892 char **ap;
2893 for (ap = aval; *ap; ap++)
2894 list = bufferwords(list, *ap, NULL);
2895 isarr = 0;
2896 } else
2897 list = bufferwords(NULL, val, NULL);
2899 if (!list || !firstnode(list))
2900 val = dupstring("");
2901 else if (!nextnode(firstnode(list)))
2902 val = getdata(firstnode(list));
2903 else {
2904 aval = hlinklist2array(list, 0);
2905 isarr = nojoin ? 1 : 2;
2906 l->list.flags |= LF_ARRAY;
2908 copied = 1;
2911 * TODO: hmm. At this point we have to be on our toes about
2912 * whether we're putting stuff into a line or not, i.e.
2913 * we don't want to do this from a recursive call.
2914 * Rather than passing back flags in a non-trivial way, maybe
2915 * we could decide on the basis of flags passed down to us.
2917 * This is the ideal place to do any last-minute conversion from
2918 * array to strings. However, given all the transformations we've
2919 * already done, probably if it's going to be done it will already
2920 * have been. (I'd really like to keep everying in aval or
2921 * equivalent and only locally decide if we need to treat it
2922 * as a scalar.)
2924 if (isarr) {
2925 char *x;
2926 char *y;
2927 int xlen;
2928 int i;
2929 LinkNode on = n;
2931 /* Handle the (u) flag; we need this before the next test */
2932 if (unique) {
2933 if(!copied)
2934 aval = arrdup(aval);
2936 i = arrlen(aval);
2937 if (i > 1)
2938 zhuniqarray(aval);
2940 if ((!aval[0] || !aval[1]) && !plan9) {
2942 * Empty array or single element. Currently you only
2943 * get a single element array at this point from the
2944 * unique expansion above. but we can potentially
2945 * have other reasons.
2947 * The following test removes the markers
2948 * from surrounding double quotes, but I don't know why
2949 * that's necessary.
2951 int vallen;
2952 if (aptr > (char *) getdata(n) &&
2953 aptr[-1] == Dnull && *fstr == Dnull)
2954 *--aptr = '\0', fstr++;
2955 vallen = aval[0] ? strlen(aval[0]) : 0;
2956 y = (char *) hcalloc((aptr - ostr) + vallen + strlen(fstr) + 1);
2957 strcpy(y, ostr);
2958 *str = y + (aptr - ostr);
2959 if (vallen)
2961 strcpy(*str, aval[0]);
2962 *str += vallen;
2964 strcpy(*str, fstr);
2965 setdata(n, y);
2966 return n;
2968 /* Handle (o) and (O) and their variants */
2969 if (sortit != SORTIT_ANYOLDHOW) {
2970 if (!copied)
2971 aval = arrdup(aval);
2972 if (indord) {
2973 if (sortit & SORTIT_BACKWARDS) {
2974 char *copy;
2975 char **end = aval + arrlen(aval) - 1, **start = aval;
2977 /* reverse the array */
2978 while (start < end) {
2979 copy = *end;
2980 *end-- = *start;
2981 *start++ = copy;
2984 } else {
2986 * HERE: we tested if the last element of the array
2987 * was not a NULL string. Why the last element?
2988 * Why didn't we expect NULL strings to work?
2989 * Was it just a clumsy way of testing whether there
2990 * was enough in the array to sort?
2992 strmetasort(aval, sortit, NULL);
2995 if (plan9) {
2996 /* Handle RC_EXPAND_PARAM */
2997 LinkNode tn;
2998 local_list1(tl);
3000 *--fstr = Marker;
3001 init_list1(tl, fstr);
3002 if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, 0))
3003 return NULL;
3004 *str = aptr;
3005 tn = firstnode(&tl);
3006 while ((x = *aval++)) {
3007 if (prenum || postnum)
3008 x = dopadding(x, prenum, postnum, preone, postone,
3009 premul, postmul
3010 #ifdef MULTIBYTE_SUPPORT
3011 , multi_width
3012 #endif
3014 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3015 return NULL;
3016 xlen = strlen(x);
3017 for (tn = firstnode(&tl);
3018 tn && *(y = (char *) getdata(tn)) == Marker;
3019 incnode(tn)) {
3020 strcatsub(&y, ostr, aptr, x, xlen, y + 1, globsubst,
3021 copied);
3022 if (qt && !*y && isarr != 2)
3023 y = dupstring(nulstring);
3024 if (plan9)
3025 setdata(n, (void *) y), plan9 = 0;
3026 else
3027 insertlinknode(l, n, (void *) y), incnode(n);
3030 for (; tn; incnode(tn)) {
3031 y = (char *) getdata(tn);
3032 if (*y == Marker)
3033 continue;
3034 if (qt && !*y && isarr != 2)
3035 y = dupstring(nulstring);
3036 if (plan9)
3037 setdata(n, (void *) y), plan9 = 0;
3038 else
3039 insertlinknode(l, n, (void *) y), incnode(n);
3041 if (plan9) {
3042 uremnode(l, n);
3043 return n;
3045 } else {
3047 * Not RC_EXPAND_PARAM: simply join the first and
3048 * last values.
3049 * TODO: how about removing the restriction that
3050 * aval[1] is non-NULL to promote consistency?, or
3051 * simply changing the test so that we drop into
3052 * the scalar branch, instead of tricking isarr?
3054 x = aval[0];
3055 if (prenum || postnum)
3056 x = dopadding(x, prenum, postnum, preone, postone,
3057 premul, postmul
3058 #ifdef MULTIBYTE_SUPPORT
3059 , multi_width
3060 #endif
3062 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3063 return NULL;
3064 xlen = strlen(x);
3065 strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst, copied);
3066 if (qt && !*y && isarr != 2)
3067 y = dupstring(nulstring);
3068 setdata(n, (void *) y);
3070 i = 1;
3071 /* aval[1] is non-null here */
3072 while (aval[i + 1]) {
3073 x = aval[i++];
3074 if (prenum || postnum)
3075 x = dopadding(x, prenum, postnum, preone, postone,
3076 premul, postmul
3077 #ifdef MULTIBYTE_SUPPORT
3078 , multi_width
3079 #endif
3081 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3082 return NULL;
3083 if (qt && !*x && isarr != 2)
3084 y = dupstring(nulstring);
3085 else {
3086 y = dupstring(x);
3087 if (globsubst)
3088 shtokenize(y);
3090 insertlinknode(l, n, (void *) y), incnode(n);
3093 x = aval[i];
3094 if (prenum || postnum)
3095 x = dopadding(x, prenum, postnum, preone, postone,
3096 premul, postmul
3097 #ifdef MULTIBYTE_SUPPORT
3098 , multi_width
3099 #endif
3101 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3102 return NULL;
3103 xlen = strlen(x);
3104 *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst, copied);
3105 if (qt && !*y && isarr != 2)
3106 y = dupstring(nulstring);
3107 insertlinknode(l, n, (void *) y), incnode(n);
3109 if (eval)
3110 n = on;
3111 } else {
3113 * Scalar value. Handle last minute transformations
3114 * such as left- or right-padding and the (e) flag to
3115 * revaluate the result.
3117 int xlen;
3118 char *x;
3119 char *y;
3121 x = val;
3122 if (prenum || postnum)
3123 x = dopadding(x, prenum, postnum, preone, postone,
3124 premul, postmul
3125 #ifdef MULTIBYTE_SUPPORT
3126 , multi_width
3127 #endif
3129 if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
3130 return NULL;
3131 xlen = strlen(x);
3132 *str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst, copied);
3133 if (qt && !*y)
3134 y = dupstring(nulstring);
3135 setdata(n, (void *) y);
3137 if (eval)
3138 *str = (char *) getdata(n);
3140 return n;
3144 * Arithmetic substitution: `a' is the string to be evaluated, `bptr'
3145 * points to the beginning of the string containing it. The tail of
3146 * the string is given by `rest'. *bptr is modified with the substituted
3147 * string. The function returns a pointer to the tail in the substituted
3148 * string.
3151 /**/
3152 static char *
3153 arithsubst(char *a, char **bptr, char *rest)
3155 char *s = *bptr, *t;
3156 char buf[BDIGBUFSIZE], *b = buf;
3157 mnumber v;
3159 singsub(&a);
3160 v = matheval(a);
3161 if ((v.type & MN_FLOAT) && !outputradix)
3162 b = convfloat(v.u.d, 0, 0, NULL);
3163 else {
3164 if (v.type & MN_FLOAT)
3165 v.u.l = (zlong) v.u.d;
3166 convbase(buf, v.u.l, outputradix);
3168 t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) +
3169 strlen(rest) + 1);
3170 t--;
3171 while ((*++t = *s++));
3172 t--;
3173 while ((*++t = *b++));
3174 strcat(t, rest);
3175 return t;
3178 /**/
3179 void
3180 modify(char **str, char **ptr)
3182 char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e;
3183 char *copy, *all, *tmp, sav, sav1, *ptr1end;
3184 int gbal, wall, rec, al, nl, charlen, dellen;
3185 convchar_t del;
3187 test = NULL;
3189 if (**ptr == ':')
3190 *str = dupstring(*str);
3192 while (**ptr == ':') {
3193 lptr = *ptr;
3194 (*ptr)++;
3195 wall = gbal = 0;
3196 rec = 1;
3197 c = '\0';
3198 sep = NULL;
3200 for (; !c && **ptr;) {
3201 switch (**ptr) {
3202 case 'a':
3203 case 'A':
3204 case 'h':
3205 case 'r':
3206 case 'e':
3207 case 't':
3208 case 'l':
3209 case 'u':
3210 case 'q':
3211 case 'Q':
3212 c = **ptr;
3213 break;
3215 case 's':
3216 c = **ptr;
3217 (*ptr)++;
3218 ptr1 = *ptr;
3219 MB_METACHARINIT();
3220 charlen = MB_METACHARLENCONV(ptr1, &del);
3221 #ifdef MULTIBYTE_SUPPORT
3222 if (del == WEOF)
3223 del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1);
3224 #endif
3225 ptr1 += charlen;
3226 for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) {
3227 convchar_t del2;
3228 charlen = MB_METACHARLENCONV(ptr2, &del2);
3229 #ifdef MULTIBYTE_SUPPORT
3230 if (del2 == WEOF)
3231 del2 = (wint_t)((*ptr2 == Meta) ?
3232 ptr2[1] ^ 32 : *ptr2);
3233 #endif
3234 if (del2 == del)
3235 break;
3237 if (!*ptr2) {
3238 zerr("bad substitution");
3239 return;
3241 ptr1end = ptr2;
3242 ptr2 += charlen;
3243 sav1 = *ptr1end;
3244 *ptr1end = '\0';
3245 for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) {
3246 convchar_t del3;
3247 charlen = MB_METACHARLENCONV(ptr3, &del3);
3248 #ifdef MULTIBYTE_SUPPORT
3249 if (del3 == WEOF)
3250 del3 = (wint_t)((*ptr3 == Meta) ?
3251 ptr3[1] ^ 32 : *ptr3);
3252 #endif
3253 if (del3 == del)
3254 break;
3256 sav = *ptr3;
3257 *ptr3 = '\0';
3258 if (*ptr1) {
3259 zsfree(hsubl);
3260 hsubl = ztrdup(ptr1);
3262 if (!hsubl) {
3263 zerr("no previous substitution");
3264 return;
3266 zsfree(hsubr);
3267 for (tt = hsubl; *tt; tt++)
3268 if (inull(*tt) && *tt != Bnullkeep)
3269 chuck(tt--);
3270 if (!isset(HISTSUBSTPATTERN))
3271 untokenize(hsubl);
3272 for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
3273 if (inull(*tt) && *tt != Bnullkeep)
3274 chuck(tt--);
3275 *ptr1end = sav1;
3276 *ptr3 = sav;
3277 *ptr = ptr3 - 1;
3278 if (*ptr3) {
3279 /* Final terminator is optional. */
3280 *ptr += charlen;
3282 break;
3284 case '&':
3285 c = 's';
3286 break;
3288 case 'g':
3289 (*ptr)++;
3290 gbal = 1;
3291 break;
3293 case 'w':
3294 wall = 1;
3295 (*ptr)++;
3296 break;
3297 case 'W':
3298 wall = 1;
3299 (*ptr)++;
3300 ptr1 = get_strarg(ptr2 = *ptr, &charlen);
3301 if ((sav = *ptr1))
3302 *ptr1 = '\0';
3303 sep = dupstring(ptr2 + charlen);
3304 if (sav)
3305 *ptr1 = sav;
3306 *ptr = ptr1 + charlen;
3307 c = '\0';
3308 break;
3310 case 'f':
3311 rec = -1;
3312 (*ptr)++;
3313 break;
3314 case 'F':
3315 (*ptr)++;
3316 rec = get_intarg(ptr, &dellen);
3317 break;
3318 default:
3319 *ptr = lptr;
3320 return;
3323 (*ptr)++;
3324 if (!c) {
3325 *ptr = lptr;
3326 return;
3328 if (rec < 0)
3329 test = dupstring(*str);
3331 while (rec--) {
3332 if (wall) {
3333 al = 0;
3334 all = NULL;
3335 for (t = e = *str; (tt = findword(&e, sep));) {
3336 tc = *e;
3337 *e = '\0';
3338 if (c != 'l' && c != 'u')
3339 copy = dupstring(tt);
3340 *e = tc;
3341 switch (c) {
3342 case 'a':
3343 chabspath(&copy);
3344 break;
3345 case 'A':
3346 chrealpath(&copy);
3347 break;
3348 case 'h':
3349 remtpath(&copy);
3350 break;
3351 case 'r':
3352 remtext(&copy);
3353 break;
3354 case 'e':
3355 rembutext(&copy);
3356 break;
3357 case 't':
3358 remlpaths(&copy);
3359 break;
3360 case 'l':
3361 copy = casemodify(tt, CASMOD_LOWER);
3362 break;
3363 case 'u':
3364 copy = casemodify(tt, CASMOD_UPPER);
3365 break;
3366 case 's':
3367 if (hsubl && hsubr)
3368 subst(&copy, hsubl, hsubr, gbal);
3369 break;
3370 case 'q':
3371 copy = quotestring(copy, NULL, QT_BACKSLASH);
3372 break;
3373 case 'Q':
3375 int one = noerrs, oef = errflag;
3377 noerrs = 1;
3378 parse_subst_string(copy);
3379 noerrs = one;
3380 errflag = oef;
3381 remnulargs(copy);
3382 untokenize(copy);
3384 break;
3386 tc = *tt;
3387 *tt = '\0';
3388 nl = al + strlen(t) + strlen(copy);
3389 ptr1 = tmp = (char *)zhalloc(nl + 1);
3390 if (all)
3391 for (ptr2 = all; *ptr2;)
3392 *ptr1++ = *ptr2++;
3393 for (ptr2 = t; *ptr2;)
3394 *ptr1++ = *ptr2++;
3395 *tt = tc;
3396 for (ptr2 = copy; *ptr2;)
3397 *ptr1++ = *ptr2++;
3398 *ptr1 = '\0';
3399 al = nl;
3400 all = tmp;
3401 t = e;
3403 *str = all;
3405 } else {
3406 switch (c) {
3407 case 'a':
3408 chabspath(str);
3409 break;
3410 case 'A':
3411 chrealpath(str);
3412 break;
3413 case 'h':
3414 remtpath(str);
3415 break;
3416 case 'r':
3417 remtext(str);
3418 break;
3419 case 'e':
3420 rembutext(str);
3421 break;
3422 case 't':
3423 remlpaths(str);
3424 break;
3425 case 'l':
3426 *str = casemodify(*str, CASMOD_LOWER);
3427 break;
3428 case 'u':
3429 *str = casemodify(*str, CASMOD_UPPER);
3430 break;
3431 case 's':
3432 if (hsubl && hsubr)
3433 subst(str, hsubl, hsubr, gbal);
3434 break;
3435 case 'q':
3436 *str = quotestring(*str, NULL, QT_BACKSLASH);
3437 break;
3438 case 'Q':
3440 int one = noerrs, oef = errflag;
3442 noerrs = 1;
3443 parse_subst_string(*str);
3444 noerrs = one;
3445 errflag = oef;
3446 remnulargs(*str);
3447 untokenize(*str);
3449 break;
3452 if (rec < 0) {
3453 if (!strcmp(test, *str))
3454 rec = 0;
3455 else
3456 test = dupstring(*str);
3462 /* get a directory stack entry */
3464 /**/
3465 static char *
3466 dstackent(char ch, int val)
3468 int backwards;
3469 LinkNode end=(LinkNode)dirstack, n;
3471 backwards = ch == (isset(PUSHDMINUS) ? '+' : '-');
3472 if(!backwards && !val--)
3473 return pwd;
3474 if (backwards)
3475 for (n=lastnode(dirstack); n != end && val; val--, n=prevnode(n));
3476 else
3477 for (end=NULL, n=firstnode(dirstack); n && val; val--, n=nextnode(n));
3478 if (n == end) {
3479 if (backwards && !val)
3480 return pwd;
3481 if (isset(NOMATCH))
3482 zerr("not enough directory stack entries.");
3483 return NULL;
3485 return (char *)getdata(n);