Src/lex.c

   1 /*
   2  * lex.c - lexical analysis
   3  *
   4  * This file is part of zsh, the Z shell.
   5  *
   6  * Copyright (c) 1992-1997 Paul Falstad
   7  * All rights reserved.
   8  *
   9  * Permission is hereby granted, without written agreement and without
  10  * license or royalty fees, to use, copy, modify, and distribute this
  11  * software and to distribute modified versions of this software for any
  12  * purpose, provided that the above copyright notice and the following
  13  * two paragraphs appear in all copies of this software.
  14  *
  15  * In no event shall Paul Falstad or the Zsh Development Group be liable
  16  * to any party for direct, indirect, special, incidental, or consequential
  17  * damages arising out of the use of this software and its documentation,
  18  * even if Paul Falstad and the Zsh Development Group have been advised of
  19  * the possibility of such damage.
  20  *
  21  * Paul Falstad and the Zsh Development Group specifically disclaim any
  22  * warranties, including, but not limited to, the implied warranties of
  23  * merchantability and fitness for a particular purpose.  The software
  24  * provided hereunder is on an "as is" basis, and Paul Falstad and the
  25  * Zsh Development Group have no obligation to provide maintenance,
  26  * support, updates, enhancements, or modifications.
  27  *
  28  */
  29
  30 #include "zsh.mdh"
  31 #include "lex.pro"
  32
  33 /* tokens */
  34
  35 /**/
  36 mod_export char ztokens[] = "#$^*()$=|{}[]`<>>?~`,'\"\\\\";
  37
  38 /* parts of the current token */
  39
  40 /**/
  41 char *zshlextext;
  42 /**/
  43 mod_export char *tokstr;
  44 /**/
  45 mod_export int tok;
  46 /**/
  47 mod_export int tokfd;
  48
  49 /*
  50  * Line number at which the first character of a token was found.
  51  * We always set this in gettok(), which is always called from
  52  * zshlex() unless we have reached an error.  So it is always
  53  * valid when parsing.  It is not useful during execution
  54  * of the parsed structure.
  55  */
  56
  57 /**/
  58 zlong toklineno;
  59
  60 /* lexical analyzer error flag */
  61
  62 /**/
  63 mod_export int lexstop;
  64
  65 /* if != 0, this is the first line of the command */
  66
  67 /**/
  68 mod_export int isfirstln;
  69
  70 /* if != 0, this is the first char of the command (not including white space) */
  71
  72 /**/
  73 int isfirstch;
  74
  75 /* flag that an alias should be expanded after expansion ending in space */
  76
  77 /**/
  78 int inalmore;
  79
  80 /*
  81  * Don't do spelling correction.
  82  * Bit 1 is only valid for the current word.  It's
  83  * set when we detect a lookahead that stops the word from
  84  * needing correction.
  85  */
  86
  87 /**/
  88 int nocorrect;
  89
  90 /*
  91  * Cursor position and line length in zle when the line is
  92  * metafied for access from the main shell.
  93  */
  94
  95 /**/
  96 mod_export int zlemetacs, zlemetall;
  97
  98 /* inwhat says what exactly we are in     *
  99  * (its value is one of the IN_* things). */
 100
 101 /**/
 102 mod_export int inwhat;
 103
 104 /* 1 if x added to complete in a blank between words */
 105
 106 /**/
 107 mod_export int addedx;
 108
 109 /* wb and we hold the beginning/end position of the word we are completing. */
 110
 111 /**/
 112 mod_export int wb, we;
 113
 114 /* 1 if aliases should not be expanded */
 115
 116 /**/
 117 mod_export int noaliases;
 118
 119 /* we are parsing a line sent to use by the editor */
 120
 121 /**/
 122 mod_export int zleparse;
 123
 124 /**/
 125 mod_export int wordbeg;
 126
 127 /**/
 128 mod_export int parbegin;
 129
 130 /**/
 131 mod_export int parend;
 132
 133 /* don't recognize comments */
 134
 135 /**/
 136 mod_export int nocomments;
 137
 138 /* text of punctuation tokens */
 139
 140 /**/
 141 mod_export char *tokstrings[WHILE + 1] = {
 142     NULL,       /* NULLTOK        0  */
 143     ";",        /* SEPER             */
 144     "\\n",      /* NEWLIN            */
 145     ";",        /* SEMI              */
 146     ";;",       /* DSEMI             */
 147     "&",        /* AMPER          5  */
 148     "(",        /* INPAR             */
 149     ")",        /* OUTPAR            */
 150     "||",       /* DBAR              */
 151     "&&",       /* DAMPER            */
 152     ">",        /* OUTANG         10 */
 153     ">|",       /* OUTANGBANG        */
 154     ">>",       /* DOUTANG           */
 155     ">>|",      /* DOUTANGBANG       */
 156     "<",        /* INANG             */
 157     "<>",       /* INOUTANG       15 */
 158     "<<",       /* DINANG            */
 159     "<<-",      /* DINANGDASH        */
 160     "<&",       /* INANGAMP          */
 161     ">&",       /* OUTANGAMP         */
 162     "&>",       /* AMPOUTANG      20 */
 163     "&>|",      /* OUTANGAMPBANG     */
 164     ">>&",      /* DOUTANGAMP        */
 165     ">>&|",     /* DOUTANGAMPBANG    */
 166     "<<<",      /* TRINANG           */
 167     "|",        /* BAR            25 */
 168     "|&",       /* BARAMP            */
 169     "()",       /* INOUTPAR          */
 170     "((",       /* DINPAR            */
 171     "))",       /* DOUTPAR           */
 172     "&|",       /* AMPERBANG      30 */
 173     ";&",       /* SEMIAMP           */
 174     ";|",       /* SEMIBAR           */
 175 };
 176
 177 /* lexical state */
 178
 179 static int dbparens;
 180 static int len = 0, bsiz = 256;
 181 static char *bptr;
 182
 183 struct lexstack {
 184     struct lexstack *next;
 185
 186     int incmdpos;
 187     int incond;
 188     int incasepat;
 189     int dbparens;
 190     int isfirstln;
 191     int isfirstch;
 192     int histactive;
 193     int histdone;
 194     int stophist;
 195     int hlinesz;
 196     char *hline;
 197     char *hptr;
 198     int tok;
 199     int isnewlin;
 200     char *tokstr;
 201     char *zshlextext;
 202     char *bptr;
 203     int bsiz;
 204     int len;
 205     short *chwords;
 206     int chwordlen;
 207     int chwordpos;
 208     int hwgetword;
 209     int lexstop;
 210     struct heredocs *hdocs;
 211     int (*hgetc) _((void));
 212     void (*hungetc) _((int));
 213     void (*hwaddc) _((int));
 214     void (*hwbegin) _((int));
 215     void (*hwend) _((void));
 216     void (*addtoline) _((int));
 217
 218     int eclen, ecused, ecnpats;
 219     Wordcode ecbuf;
 220     Eccstr ecstrs;
 221     int ecsoffs, ecssub, ecnfunc;
 222
 223     unsigned char *cstack;
 224     int csp;
 225     zlong toklineno;
 226 };
 227
 228 static struct lexstack *lstack = NULL;
 229
 230 /* save the lexical state */
 231
 232 /* is this a hack or what? */
 233
 234 /**/
 235 mod_export void
 236 lexsave(void)
 237 {
 238     struct lexstack *ls;
 239
 240     ls = (struct lexstack *)malloc(sizeof(struct lexstack));
 241
 242     ls->incmdpos = incmdpos;
 243     ls->incond = incond;
 244     ls->incasepat = incasepat;
 245     ls->dbparens = dbparens;
 246     ls->isfirstln = isfirstln;
 247     ls->isfirstch = isfirstch;
 248     ls->histactive = histactive;
 249     ls->histdone = histdone;
 250     ls->stophist = stophist;
 251     ls->hline = chline;
 252     ls->hptr = hptr;
 253     ls->hlinesz = hlinesz;
 254     ls->cstack = cmdstack;
 255     ls->csp = cmdsp;
 256     cmdstack = (unsigned char *)zalloc(CMDSTACKSZ);
 257     ls->tok = tok;
 258     ls->isnewlin = isnewlin;
 259     ls->tokstr = tokstr;
 260     ls->zshlextext = zshlextext;
 261     ls->bptr = bptr;
 262     ls->bsiz = bsiz;
 263     ls->len = len;
 264     ls->chwords = chwords;
 265     ls->chwordlen = chwordlen;
 266     ls->chwordpos = chwordpos;
 267     ls->hwgetword = hwgetword;
 268     ls->lexstop = lexstop;
 269     ls->hdocs = hdocs;
 270     ls->hgetc = hgetc;
 271     ls->hungetc = hungetc;
 272     ls->hwaddc = hwaddc;
 273     ls->hwbegin = hwbegin;
 274     ls->hwend = hwend;
 275     ls->addtoline = addtoline;
 276     ls->eclen = eclen;
 277     ls->ecused = ecused;
 278     ls->ecnpats = ecnpats;
 279     ls->ecbuf = ecbuf;
 280     ls->ecstrs = ecstrs;
 281     ls->ecsoffs = ecsoffs;
 282     ls->ecssub = ecssub;
 283     ls->ecnfunc = ecnfunc;
 284     ls->toklineno = toklineno;
 285     cmdsp = 0;
 286     inredir = 0;
 287     hdocs = NULL;
 288     histactive = 0;
 289     ecbuf = NULL;
 290
 291     ls->next = lstack;
 292     lstack = ls;
 293 }
 294
 295 /* restore lexical state */
 296
 297 /**/
 298 mod_export void
 299 lexrestore(void)
 300 {
 301     struct lexstack *ln;
 302
 303     DPUTS(!lstack, "BUG: lexrestore() without lexsave()");
 304     incmdpos = lstack->incmdpos;
 305     incond = lstack->incond;
 306     incasepat = lstack->incasepat;
 307     dbparens = lstack->dbparens;
 308     isfirstln = lstack->isfirstln;
 309     isfirstch = lstack->isfirstch;
 310     histactive = lstack->histactive;
 311     histdone = lstack->histdone;
 312     stophist = lstack->stophist;
 313     chline = lstack->hline;
 314     hptr = lstack->hptr;
 315     if (cmdstack)
 316         free(cmdstack);
 317     cmdstack = lstack->cstack;
 318     cmdsp = lstack->csp;
 319     tok = lstack->tok;
 320     isnewlin = lstack->isnewlin;
 321     tokstr = lstack->tokstr;
 322     zshlextext = lstack->zshlextext;
 323     bptr = lstack->bptr;
 324     bsiz = lstack->bsiz;
 325     len = lstack->len;
 326     chwords = lstack->chwords;
 327     chwordlen = lstack->chwordlen;
 328     chwordpos = lstack->chwordpos;
 329     hwgetword = lstack->hwgetword;
 330     lexstop = lstack->lexstop;
 331     hdocs = lstack->hdocs;
 332     hgetc = lstack->hgetc;
 333     hungetc = lstack->hungetc;
 334     hwaddc = lstack->hwaddc;
 335     hwbegin = lstack->hwbegin;
 336     hwend = lstack->hwend;
 337     addtoline = lstack->addtoline;
 338     if (ecbuf)
 339         zfree(ecbuf, eclen);
 340     eclen = lstack->eclen;
 341     ecused = lstack->ecused;
 342     ecnpats = lstack->ecnpats;
 343     ecbuf = lstack->ecbuf;
 344     ecstrs = lstack->ecstrs;
 345     ecsoffs = lstack->ecsoffs;
 346     ecssub = lstack->ecssub;
 347     ecnfunc = lstack->ecnfunc;
 348     hlinesz = lstack->hlinesz;
 349     toklineno = lstack->toklineno;
 350     errflag = 0;
 351
 352     ln = lstack->next;
 353     free(lstack);
 354     lstack = ln;
 355 }
 356
 357 /**/
 358 void
 359 zshlex(void)
 360 {
 361     if (tok == LEXERR)
 362         return;
 363     do
 364         tok = gettok();
 365     while (tok != ENDINPUT && exalias());
 366     nocorrect &= 1;
 367     if (tok == NEWLIN || tok == ENDINPUT) {
 368         while (hdocs) {
 369             struct heredocs *next = hdocs->next;
 370             char *name;
 371
 372             hwbegin(0);
 373             cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
 374             STOPHIST
 375             name = gethere(hdocs->str, hdocs->type);
 376             ALLOWHIST
 377             cmdpop();
 378             hwend();
 379             if (!name) {
 380                 zerr("here document too large");
 381                 while (hdocs) {
 382                     next = hdocs->next;
 383                     zfree(hdocs, sizeof(struct heredocs));
 384                     hdocs = next;
 385                 }
 386                 tok = LEXERR;
 387                 break;
 388             }
 389             setheredoc(hdocs->pc, REDIR_HERESTR, name);
 390             zfree(hdocs, sizeof(struct heredocs));
 391             hdocs = next;
 392         }
 393     }
 394     if (tok != NEWLIN)
 395         isnewlin = 0;
 396     else
 397         isnewlin = (inbufct) ? -1 : 1;
 398     if (tok == SEMI || tok == NEWLIN)
 399         tok = SEPER;
 400 }
 401
 402 /**/
 403 mod_export void
 404 ctxtlex(void)
 405 {
 406     static int oldpos;
 407
 408     zshlex();
 409     switch (tok) {
 410     case SEPER:
 411     case NEWLIN:
 412     case SEMI:
 413     case DSEMI:
 414     case SEMIAMP:
 415     case SEMIBAR:
 416     case AMPER:
 417     case AMPERBANG:
 418     case INPAR:
 419     case INBRACE:
 420     case DBAR:
 421     case DAMPER:
 422     case BAR:
 423     case BARAMP:
 424     case INOUTPAR:
 425     case DOLOOP:
 426     case THEN:
 427     case ELIF:
 428     case ELSE:
 429     case DOUTBRACK:
 430         incmdpos = 1;
 431         break;
 432     case STRING:
 433  /* case ENVSTRING: */
 434     case ENVARRAY:
 435     case OUTPAR:
 436     case CASE:
 437     case DINBRACK:
 438         incmdpos = 0;
 439         break;
 440     }
 441     if (tok != DINPAR)
 442         infor = tok == FOR ? 2 : 0;
 443     if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
 444         inredir = 1;
 445         oldpos = incmdpos;
 446         incmdpos = 0;
 447     } else if (inredir) {
 448         incmdpos = oldpos;
 449         inredir = 0;
 450     }
 451 }
 452
 453 #define LX1_BKSLASH 0
 454 #define LX1_COMMENT 1
 455 #define LX1_NEWLIN 2
 456 #define LX1_SEMI 3
 457 #define LX1_AMPER 5
 458 #define LX1_BAR 6
 459 #define LX1_INPAR 7
 460 #define LX1_OUTPAR 8
 461 #define LX1_INANG 13
 462 #define LX1_OUTANG 14
 463 #define LX1_OTHER 15
 464
 465 #define LX2_BREAK 0
 466 #define LX2_OUTPAR 1
 467 #define LX2_BAR 2
 468 #define LX2_STRING 3
 469 #define LX2_INBRACK 4
 470 #define LX2_OUTBRACK 5
 471 #define LX2_TILDE 6
 472 #define LX2_INPAR 7
 473 #define LX2_INBRACE 8
 474 #define LX2_OUTBRACE 9
 475 #define LX2_OUTANG 10
 476 #define LX2_INANG 11
 477 #define LX2_EQUALS 12
 478 #define LX2_BKSLASH 13
 479 #define LX2_QUOTE 14
 480 #define LX2_DQUOTE 15
 481 #define LX2_BQUOTE 16
 482 #define LX2_COMMA 17
 483 #define LX2_OTHER 18
 484 #define LX2_META 19
 485
 486 static unsigned char lexact1[256], lexact2[256], lextok2[256];
 487
 488 /**/
 489 void
 490 initlextabs(void)
 491 {
 492     int t0;
 493     static char *lx1 = "\\q\n;!&|(){}[]<>";
 494     static char *lx2 = ";)|$[]~({}><=\\\'\"`,";
 495
 496     for (t0 = 0; t0 != 256; t0++) {
 497         lexact1[t0] = LX1_OTHER;
 498         lexact2[t0] = LX2_OTHER;
 499         lextok2[t0] = t0;
 500     }
 501     for (t0 = 0; lx1[t0]; t0++)
 502         lexact1[(int)lx1[t0]] = t0;
 503     for (t0 = 0; lx2[t0]; t0++)
 504         lexact2[(int)lx2[t0]] = t0;
 505     lexact2['&'] = LX2_BREAK;
 506     lexact2[STOUC(Meta)] = LX2_META;
 507     lextok2['*'] = Star;
 508     lextok2['?'] = Quest;
 509     lextok2['{'] = Inbrace;
 510     lextok2['['] = Inbrack;
 511     lextok2['$'] = String;
 512     lextok2['~'] = Tilde;
 513     lextok2['#'] = Pound;
 514     lextok2['^'] = Hat;
 515 }
 516
 517 /* initialize lexical state */
 518
 519 /**/
 520 void
 521 lexinit(void)
 522 {
 523     incond = incasepat = nocorrect =
 524     infor = dbparens = lexstop = 0;
 525     incmdpos = 1;
 526     tok = ENDINPUT;
 527 }
 528
 529 /* add a char to the string buffer */
 530
 531 /**/
 532 void
 533 add(int c)
 534 {
 535     *bptr++ = c;
 536     if (bsiz == ++len) {
 537 #if 0
 538         int newbsiz;
 539
 540         newbsiz = bsiz * 8;
 541         while (newbsiz < inbufct)
 542             newbsiz *= 2;
 543         bptr = len + (tokstr = (char *)hrealloc(tokstr, bsiz, newbsiz));
 544         bsiz = newbsiz;
 545 #endif
 546
 547         int newbsiz = bsiz * 2;
 548
 549         if (newbsiz > inbufct && inbufct > bsiz)
 550             newbsiz = inbufct;
 551
 552         bptr = len + (tokstr = (char *)hrealloc(tokstr, bsiz, newbsiz));
 553         bsiz = newbsiz;
 554     }
 555 }
 556
 557 #define SETPARBEGIN {if (zleparse && !(inbufflags & INP_ALIAS) && zlemetacs >= zlemetall+1-inbufct) parbegin = inbufct;}
 558 #define SETPAREND {\
 559             if (zleparse && !(inbufflags & INP_ALIAS) && parbegin != -1 && parend == -1) {\
 560                 if (zlemetacs >= zlemetall + 1 - inbufct)\
 561                     parbegin = -1;\
 562                 else\
 563                     parend = inbufct;} }
 564
 565 /*
 566  * Return 1 for math, 0 for a command, 2 for an error.  If it couldn't be
 567  * parsed as math, but there was no gross error, it's a command.
 568  */
 569
 570 static int
 571 cmd_or_math(int cs_type)
 572 {
 573     int oldlen = len;
 574     int c;
 575
 576     cmdpush(cs_type);
 577     c = dquote_parse(')', 0);
 578     cmdpop();
 579     *bptr = '\0';
 580     if (!c) {
 581         /* Successfully parsed, see if it was math */
 582         c = hgetc();
 583         if (c == ')')
 584             return 1; /* yes */
 585         hungetc(c);
 586         lexstop = 0;
 587         c = ')';
 588     } else if (lexstop) {
 589         /* we haven't got anything to unget */
 590         return 2;
 591     }
 592     /* else unsuccessful: unget the whole thing */
 593     hungetc(c);
 594     lexstop = 0;
 595     while (len > oldlen) {
 596         len--;
 597         hungetc(itok(*--bptr) ? ztokens[*bptr - Pound] : *bptr);
 598     }
 599     hungetc('(');
 600     return 0;
 601 }
 602
 603
 604 /*
 605  * Parse either a $(( ... )) or a $(...)
 606  * Return 0 on success, 1 on failure.
 607  */
 608 static int
 609 cmd_or_math_sub(void)
 610 {
 611     int c = hgetc(), ret;
 612
 613     if (c == '(') {
 614         add(Inpar);
 615         add('(');
 616         if ((ret = cmd_or_math(CS_MATHSUBST)) == 1) {
 617             add(')');
 618             return 0;
 619         }
 620         if (ret == 2)
 621             return 1;
 622         bptr -= 2;
 623         len -= 2;
 624     } else {
 625         hungetc(c);
 626         lexstop = 0;
 627     }
 628     return skipcomm();
 629 }
 630
 631 /* Check whether we're looking at valid numeric globbing syntax      *
 632  * (/\<[0-9]*-[0-9]*\>/).  Call pointing just after the opening "<". *
 633  * Leaves the input in the same place, returning 0 or 1.             */
 634
 635 /**/
 636 static int
 637 isnumglob(void)
 638 {
 639     int c, ec = '-', ret = 0;
 640     int tbs = 256, n = 0;
 641     char *tbuf = (char *)zalloc(tbs);
 642
 643     while(1) {
 644         c = hgetc();
 645         if(lexstop) {
 646             lexstop = 0;
 647             break;
 648         }
 649         tbuf[n++] = c;
 650         if(!idigit(c)) {
 651             if(c != ec)
 652                 break;
 653             if(ec == '>') {
 654                 ret = 1;
 655                 break;
 656             }
 657             ec = '>';
 658         }
 659         if(n == tbs)
 660             tbuf = (char *)realloc(tbuf, tbs *= 2);
 661     }
 662     while(n--)
 663         hungetc(tbuf[n]);
 664     zfree(tbuf, tbs);
 665     return ret;
 666 }
 667
 668 /**/
 669 static int
 670 gettok(void)
 671 {
 672     int c, d;
 673     int peekfd = -1, peek;
 674
 675   beginning:
 676     tokstr = NULL;
 677     while (iblank(c = hgetc()) && !lexstop);
 678     toklineno = lineno;
 679     if (lexstop)
 680         return (errflag) ? LEXERR : ENDINPUT;
 681     isfirstln = 0;
 682     wordbeg = inbufct - (qbang && c == bangchar);
 683     hwbegin(-1-(qbang && c == bangchar));
 684     /* word includes the last character read and possibly \ before ! */
 685     if (dbparens) {
 686         len = 0;
 687         bptr = tokstr = (char *) hcalloc(bsiz = 32);
 688         hungetc(c);
 689         cmdpush(CS_MATH);
 690         c = dquote_parse(infor ? ';' : ')', 0);
 691         cmdpop();
 692         *bptr = '\0';
 693         if (!c && infor) {
 694             infor--;
 695             return DINPAR;
 696         }
 697         if (c || (c = hgetc()) != ')') {
 698             hungetc(c);
 699             return LEXERR;
 700         }
 701         dbparens = 0;
 702         return DOUTPAR;
 703     } else if (idigit(c)) {     /* handle 1< foo */
 704         d = hgetc();
 705         if(d == '&') {
 706             d = hgetc();
 707             if(d == '>') {
 708                 peekfd = c - '0';
 709                 hungetc('>');
 710                 c = '&';
 711             } else {
 712                 hungetc(d);
 713                 lexstop = 0;
 714                 hungetc('&');
 715             }
 716         } else if (d == '>' || d == '<') {
 717             peekfd = c - '0';
 718             c = d;
 719         } else {
 720             hungetc(d);
 721             lexstop = 0;
 722         }
 723     }
 724
 725     /* chars in initial position in word */
 726
 727     if (c == hashchar && !nocomments &&
 728         (isset(INTERACTIVECOMMENTS) ||
 729          (!zleparse && !expanding &&
 730           (!interact || unset(SHINSTDIN) || strin)))) {
 731         /* History is handled here to prevent extra  *
 732          * newlines being inserted into the history. */
 733
 734         while ((c = ingetc()) != '\n' && !lexstop) {
 735             hwaddc(c);
 736             addtoline(c);
 737         }
 738
 739         if (errflag)
 740             peek = LEXERR;
 741         else {
 742             hwend();
 743             hwbegin(0);
 744             hwaddc('\n');
 745             addtoline('\n');
 746             peek = NEWLIN;
 747         }
 748         return peek;
 749     }
 750     switch (lexact1[STOUC(c)]) {
 751     case LX1_BKSLASH:
 752         d = hgetc();
 753         if (d == '\n')
 754             goto beginning;
 755         hungetc(d);
 756         lexstop = 0;
 757         break;
 758     case LX1_NEWLIN:
 759         return NEWLIN;
 760     case LX1_SEMI:
 761         d = hgetc();
 762         if(d == ';')
 763             return DSEMI;
 764         else if(d == '&')
 765             return SEMIAMP;
 766         else if (d == '|')
 767             return SEMIBAR;
 768         hungetc(d);
 769         lexstop = 0;
 770         return SEMI;
 771     case LX1_AMPER:
 772         d = hgetc();
 773         if (d == '&')
 774             return DAMPER;
 775         else if (d == '!' || d == '|')
 776             return AMPERBANG;
 777         else if (d == '>') {
 778             tokfd = peekfd;
 779             d = hgetc();
 780             if (d == '!' || d == '|')
 781                 return OUTANGAMPBANG;
 782             else if (d == '>') {
 783                 d = hgetc();
 784                 if (d == '!' || d == '|')
 785                     return DOUTANGAMPBANG;
 786                 hungetc(d);
 787                 lexstop = 0;
 788                 return DOUTANGAMP;
 789             }
 790             hungetc(d);
 791             lexstop = 0;
 792             return AMPOUTANG;
 793         }
 794         hungetc(d);
 795         lexstop = 0;
 796         return AMPER;
 797     case LX1_BAR:
 798         d = hgetc();
 799         if (d == '|')
 800             return DBAR;
 801         else if (d == '&')
 802             return BARAMP;
 803         hungetc(d);
 804         lexstop = 0;
 805         return BAR;
 806     case LX1_INPAR:
 807         d = hgetc();
 808         if (d == '(') {
 809             if (infor) {
 810                 dbparens = 1;
 811                 return DINPAR;
 812             }
 813             if (incmdpos) {
 814                 len = 0;
 815                 bptr = tokstr = (char *) hcalloc(bsiz = 32);
 816                 switch (cmd_or_math(CS_MATH)) {
 817                 case 1:
 818                     return DINPAR;
 819
 820                 case 0:
 821                     return INPAR;
 822
 823                 default:
 824                     return LEXERR;
 825                 }
 826             }
 827         } else if (d == ')')
 828             return INOUTPAR;
 829         hungetc(d);
 830         lexstop = 0;
 831         if (!(incond == 1 || incmdpos))
 832             break;
 833         return INPAR;
 834     case LX1_OUTPAR:
 835         return OUTPAR;
 836     case LX1_INANG:
 837         d = hgetc();
 838         if (d == '(') {
 839             hungetc(d);
 840             lexstop = 0;
 841             unpeekfd:
 842             if(peekfd != -1) {
 843                 hungetc(c);
 844                 c = '0' + peekfd;
 845             }
 846             break;
 847         }
 848         if (d == '>') {
 849             peek = INOUTANG;
 850         } else if (d == '<') {
 851             int e = hgetc();
 852
 853             if (e == '(') {
 854                 hungetc(e);
 855                 hungetc(d);
 856                 peek = INANG;
 857             } else if (e == '<')
 858                 peek = TRINANG;
 859             else if (e == '-')
 860                 peek = DINANGDASH;
 861             else {
 862                 hungetc(e);
 863                 lexstop = 0;
 864                 peek = DINANG;
 865             }
 866         } else if (d == '&') {
 867             peek = INANGAMP;
 868         } else {
 869             hungetc(d);
 870             if(isnumglob())
 871                 goto unpeekfd;
 872             peek = INANG;
 873         }
 874         tokfd = peekfd;
 875         return peek;
 876     case LX1_OUTANG:
 877         d = hgetc();
 878         if (d == '(') {
 879             hungetc(d);
 880             goto unpeekfd;
 881         } else if (d == '&') {
 882             d = hgetc();
 883             if (d == '!' || d == '|')
 884                 peek = OUTANGAMPBANG;
 885             else {
 886                 hungetc(d);
 887                 lexstop = 0;
 888                 peek = OUTANGAMP;
 889             }
 890         } else if (d == '!' || d == '|')
 891             peek = OUTANGBANG;
 892         else if (d == '>') {
 893             d = hgetc();
 894             if (d == '&') {
 895                 d = hgetc();
 896                 if (d == '!' || d == '|')
 897                     peek = DOUTANGAMPBANG;
 898                 else {
 899                     hungetc(d);
 900                     lexstop = 0;
 901                     peek = DOUTANGAMP;
 902                 }
 903             } else if (d == '!' || d == '|')
 904                 peek = DOUTANGBANG;
 905             else if (d == '(') {
 906                 hungetc(d);
 907                 hungetc('>');
 908                 peek = OUTANG;
 909             } else {
 910                 hungetc(d);
 911                 lexstop = 0;
 912                 peek = DOUTANG;
 913                 if (isset(HISTALLOWCLOBBER))
 914                     hwaddc('|');
 915             }
 916         } else {
 917             hungetc(d);
 918             lexstop = 0;
 919             peek = OUTANG;
 920             if (!incond && isset(HISTALLOWCLOBBER))
 921                 hwaddc('|');
 922         }
 923         tokfd = peekfd;
 924         return peek;
 925     }
 926
 927     /* we've started a string, now get the *
 928      * rest of it, performing tokenization */
 929     return gettokstr(c, 0);
 930 }
 931
 932 /*
 933  * Get the remains of a token string.  This has two uses.
 934  * When called from gettok(), with sub = 0, we have already identified
 935  * any interesting initial character and want to get the rest of
 936  * what we now know is a string.  However, the string may still include
 937  * metacharacters and potentially substitutions.
 938  *
 939  * When called from parse_subst_string() with sub = 1, we are not
 940  * fully parsing a command line, merely tokenizing a string.
 941  * In this case we always add characters to the parsed string
 942  * unless there is a parse error.
 943  */
 944
 945 /**/
 946 static int
 947 gettokstr(int c, int sub)
 948 {
 949     int bct = 0, pct = 0, brct = 0, fdpar = 0;
 950     int intpos = 1, in_brace_param = 0;
 951     int peek, inquote, unmatched = 0;
 952 #ifdef DEBUG
 953     int ocmdsp = cmdsp;
 954 #endif
 955
 956     peek = STRING;
 957     if (!sub) {
 958         len = 0;
 959         bptr = tokstr = (char *) hcalloc(bsiz = 32);
 960     }
 961     for (;;) {
 962         int act;
 963         int e;
 964         int inbl = inblank(c);
 965
 966         if (fdpar && !inbl && c != ')')
 967             fdpar = 0;
 968
 969         if (inbl && !in_brace_param && !pct)
 970             act = LX2_BREAK;
 971         else {
 972             act = lexact2[STOUC(c)];
 973             c = lextok2[STOUC(c)];
 974         }
 975         switch (act) {
 976         case LX2_BREAK:
 977             if (!in_brace_param && !sub)
 978                 goto brk;
 979             break;
 980         case LX2_META:
 981             c = hgetc();
 982 #ifdef DEBUG
 983             if (lexstop) {
 984                 fputs("BUG: input terminated by Meta\n", stderr);
 985                 fflush(stderr);
 986                 goto brk;
 987             }
 988 #endif
 989             add(Meta);
 990             break;
 991         case LX2_OUTPAR:
 992             if (fdpar) {
 993                 /* this is a single word `(   )', treat as INOUTPAR */
 994                 add(c);
 995                 *bptr = '\0';
 996                 return INOUTPAR;
 997             }
 998             if ((sub || in_brace_param) && isset(SHGLOB))
 999                 break;
1000             if (!in_brace_param && !pct--) {
1001                 if (sub) {
1002                     pct = 0;
1003                     break;
1004                 } else
1005                     goto brk;
1006             }
1007             c = Outpar;
1008             break;
1009         case LX2_BAR:
1010             if (!pct && !in_brace_param) {
1011                 if (sub)
1012                     break;
1013                 else
1014                     goto brk;
1015             }
1016             if (unset(SHGLOB) || (!sub && !in_brace_param))
1017                 c = Bar;
1018             break;
1019         case LX2_STRING:
1020             e = hgetc();
1021             if (e == '[') {
1022                 cmdpush(CS_MATHSUBST);
1023                 add(String);
1024                 add(Inbrack);
1025                 c = dquote_parse(']', sub);
1026                 cmdpop();
1027                 if (c) {
1028                     peek = LEXERR;
1029                     goto brk;
1030                 }
1031                 c = Outbrack;
1032             } else if (e == '(') {
1033                 add(String);
1034                 c = cmd_or_math_sub();
1035                 if (c) {
1036                     peek = LEXERR;
1037                     goto brk;
1038                 }
1039                 c = Outpar;
1040             } else {
1041                 if (e == '{') {
1042                     add(c);
1043                     c = Inbrace;
1044                     ++bct;
1045                     cmdpush(CS_BRACEPAR);
1046                     if (!in_brace_param)
1047                         in_brace_param = bct;
1048                 } else {
1049                     hungetc(e);
1050                     lexstop = 0;
1051                 }
1052             }
1053             break;
1054         case LX2_INBRACK:
1055             if (!in_brace_param)
1056                 brct++;
1057             c = Inbrack;
1058             break;
1059         case LX2_OUTBRACK:
1060             if (!in_brace_param)
1061                 brct--;
1062             if (brct < 0)
1063                 brct = 0;
1064             c = Outbrack;
1065             break;
1066         case LX2_INPAR:
1067             if (isset(SHGLOB)) {
1068                 if (sub || in_brace_param)
1069                     break;
1070                 if (incasepat && !len)
1071                     return INPAR;
1072             }
1073             if (!in_brace_param) {
1074                 if (!sub) {
1075                     e = hgetc();
1076                     hungetc(e);
1077                     lexstop = 0;
1078                     /* For command words, parentheses are only
1079                      * special at the start.  But now we're tokenising
1080                      * the remaining string.  So I don't see what
1081                      * the old incmdpos test here is for.
1082                      *   pws 1999/6/8
1083                      *
1084                      * Oh, no.
1085                      *  func1(   )
1086                      * is a valid function definition in [k]sh.  The best
1087                      * thing we can do, without really nasty lookahead tricks,
1088                      * is break if we find a blank after a parenthesis.  At
1089                      * least this can't happen inside braces or brackets.  We
1090                      * only allow this with SHGLOB (set for both sh and ksh).
1091                      *
1092                      * Things like `print @( |foo)' should still
1093                      * work, because [k]sh don't allow multiple words
1094                      * in a function definition, so we only do this
1095                      * in command position.
1096                      *   pws 1999/6/14
1097                      */
1098                     if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
1099                                      !brct && !intpos && incmdpos)) {
1100                         /*
1101                          * Either a () token, or a command word with
1102                          * something suspiciously like a ksh function
1103                          * definition.
1104                          * The current word isn't spellcheckable.
1105                          */
1106                         nocorrect |= 2;
1107                         goto brk;
1108                     }
1109                 }
1110                 /*
1111                  * This also handles the [k]sh `foo( )' function definition.
1112                  * Maintain a variable fdpar, set as long as a single set of
1113                  * parentheses contains only space.  Then if we get to the
1114                  * closing parenthesis and it is still set, we can assume we
1115                  * have a function definition.  Only do this at the start of
1116                  * the word, since the (...) must be a separate token.
1117                  */
1118                 if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
1119                     fdpar = 1;
1120             }
1121             c = Inpar;
1122             break;
1123         case LX2_INBRACE:
1124             if (isset(IGNOREBRACES) || sub)
1125                 c = '{';
1126             else {
1127                 if (!len && incmdpos) {
1128                     add('{');
1129                     *bptr = '\0';
1130                     return STRING;
1131                 }
1132                 if (in_brace_param) {
1133                     cmdpush(CS_BRACE);
1134                 }
1135                 bct++;
1136             }
1137             break;
1138         case LX2_OUTBRACE:
1139             if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
1140                 break;
1141             if (!bct)
1142                 break;
1143             if (in_brace_param) {
1144                 cmdpop();
1145             }
1146             if (bct-- == in_brace_param)
1147                 in_brace_param = 0;
1148             c = Outbrace;
1149             break;
1150         case LX2_COMMA:
1151             if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
1152                 c = Comma;
1153             break;
1154         case LX2_OUTANG:
1155             if (in_brace_param || sub)
1156                 break;
1157             e = hgetc();
1158             if (e != '(') {
1159                 hungetc(e);
1160                 lexstop = 0;
1161                 goto brk;
1162             }
1163             add(OutangProc);
1164             if (skipcomm()) {
1165                 peek = LEXERR;
1166                 goto brk;
1167             }
1168             c = Outpar;
1169             break;
1170         case LX2_INANG:
1171             if (isset(SHGLOB) && sub)
1172                 break;
1173             e = hgetc();
1174             if (!(in_brace_param || sub) && e == '(') {
1175                 add(Inang);
1176                 if (skipcomm()) {
1177                     peek = LEXERR;
1178                     goto brk;
1179                 }
1180                 c = Outpar;
1181                 break;
1182             }
1183             hungetc(e);
1184             if(isnumglob()) {
1185                 add(Inang);
1186                 while ((c = hgetc()) != '>')
1187                     add(c);
1188                 c = Outang;
1189                 break;
1190             }
1191             lexstop = 0;
1192             if (in_brace_param || sub)
1193                 break;
1194             goto brk;
1195         case LX2_EQUALS:
1196             if (intpos) {
1197                 e = hgetc();
1198                 if (e != '(') {
1199                     hungetc(e);
1200                     lexstop = 0;
1201                     c = Equals;
1202                 } else {
1203                     add(Equals);
1204                     if (skipcomm()) {
1205                         peek = LEXERR;
1206                         goto brk;
1207                     }
1208                     c = Outpar;
1209                 }
1210             } else if (!sub && peek != ENVSTRING &&
1211                        incmdpos && !bct && !brct) {
1212                 char *t = tokstr;
1213                 if (idigit(*t))
1214                     while (++t < bptr && idigit(*t));
1215                 else {
1216                     int sav = *bptr;
1217                     *bptr = '\0';
1218                     t = itype_end(t, IIDENT, 0);
1219                     if (t < bptr) {
1220                         skipparens(Inbrack, Outbrack, &t);
1221                     } else {
1222                         *bptr = sav;
1223                     }
1224                 }
1225                 if (*t == '+')
1226                     t++;
1227                 if (t == bptr) {
1228                     e = hgetc();
1229                     if (e == '(' && incmdpos) {
1230                         *bptr = '\0';
1231                         return ENVARRAY;
1232                     }
1233                     hungetc(e);
1234                     lexstop = 0;
1235                     peek = ENVSTRING;
1236                     intpos = 2;
1237                 } else
1238                     c = Equals;
1239             } else
1240                 c = Equals;
1241             break;
1242         case LX2_BKSLASH:
1243             c = hgetc();
1244             if (c == '\n') {
1245                 c = hgetc();
1246                 if (!lexstop)
1247                     continue;
1248             } else
1249                 add(Bnull);
1250             if (lexstop)
1251                 goto brk;
1252             break;
1253         case LX2_QUOTE: {
1254             int strquote = (len && bptr[-1] == String);
1255
1256             add(Snull);
1257             cmdpush(CS_QUOTE);
1258             for (;;) {
1259                 STOPHIST
1260                 while ((c = hgetc()) != '\'' && !lexstop) {
1261                     if (strquote && c == '\\') {
1262                         c = hgetc();
1263                         if (lexstop)
1264                             break;
1265                         /*
1266                          * Mostly we don't need to do anything special
1267                          * with escape backslashes or closing quotes
1268                          * inside $'...'; however in completion we
1269                          * need to be able to strip multiple backslashes
1270                          * neatly.
1271                          */
1272                         if (c == '\\' || c == '\'')
1273                             add(Bnull);
1274                         else
1275                             add('\\');
1276                     } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1277                         if (bptr[-1] == '\\')
1278                             bptr--, len--;
1279                         else
1280                             break;
1281                     }
1282                     add(c);
1283                 }
1284                 ALLOWHIST
1285                 if (c != '\'') {
1286                     unmatched = '\'';
1287                     peek = LEXERR;
1288                     cmdpop();
1289                     goto brk;
1290                 }
1291                 e = hgetc();
1292                 if (e != '\'' || unset(RCQUOTES) || strquote)
1293                     break;
1294                 add(c);
1295             }
1296             cmdpop();
1297             hungetc(e);
1298             lexstop = 0;
1299             c = Snull;
1300             break;
1301         }
1302         case LX2_DQUOTE:
1303             add(Dnull);
1304             cmdpush(CS_DQUOTE);
1305             c = dquote_parse('"', sub);
1306             cmdpop();
1307             if (c) {
1308                 unmatched = '"';
1309                 peek = LEXERR;
1310                 goto brk;
1311             }
1312             c = Dnull;
1313             break;
1314         case LX2_BQUOTE:
1315             add(Tick);
1316             cmdpush(CS_BQUOTE);
1317             SETPARBEGIN
1318             inquote = 0;
1319             while ((c = hgetc()) != '`' && !lexstop) {
1320                 if (c == '\\') {
1321                     c = hgetc();
1322                     if (c != '\n') {
1323                         add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
1324                         add(c);
1325                     }
1326                     else if (!sub && isset(CSHJUNKIEQUOTES))
1327                         add(c);
1328                 } else {
1329                     if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1330                         break;
1331                     }
1332                     add(c);
1333                     if (c == '\'') {
1334                         if ((inquote = !inquote))
1335                             STOPHIST
1336                         else
1337                             ALLOWHIST
1338                     }
1339                 }
1340             }
1341             if (inquote)
1342                 ALLOWHIST
1343             cmdpop();
1344             if (c != '`') {
1345                 unmatched = '`';
1346                 peek = LEXERR;
1347                 goto brk;
1348             }
1349             c = Tick;
1350             SETPAREND
1351             break;
1352         }
1353         add(c);
1354         c = hgetc();
1355         if (intpos)
1356             intpos--;
1357         if (lexstop)
1358             break;
1359     }
1360   brk:
1361     hungetc(c);
1362     if (unmatched)
1363         zerr("unmatched %c", unmatched);
1364     if (in_brace_param) {
1365         while(bct-- >= in_brace_param)
1366             cmdpop();
1367         zerr("closing brace expected");
1368     } else if (unset(IGNOREBRACES) && !sub && len > 1 &&
1369                peek == STRING && bptr[-1] == '}' && bptr[-2] != Bnull) {
1370         /* hack to get {foo} command syntax work */
1371         bptr--;
1372         len--;
1373         lexstop = 0;
1374         hungetc('}');
1375     }
1376     *bptr = '\0';
1377     DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
1378     return peek;
1379 }
1380
1381
1382 /* Return non-zero for error (character to unget), else zero */
1383
1384 /**/
1385 static int
1386 dquote_parse(char endchar, int sub)
1387 {
1388     int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
1389     int c;
1390     int math = endchar == ')' || endchar == ']';
1391     int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
1392
1393     while (((c = hgetc()) != endchar || bct ||
1394             (math && ((pct > 0) || (brct > 0))) ||
1395             intick) && !lexstop) {
1396       cont:
1397         switch (c) {
1398         case '\\':
1399             c = hgetc();
1400             if (c != '\n') {
1401                 if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
1402                     c == endchar || c == '`' ||
1403                     (endchar == ']' && (c == '[' || c == ']' ||
1404                                         c == '(' || c == ')' ||
1405                                         c == '{' || c == '}' ||
1406                                         (c == '"' && sub))))
1407                     add(Bnull);
1408                 else {
1409                     /* lexstop is implicitly handled here */
1410                     add('\\');
1411                     goto cont;
1412                 }
1413             } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
1414                 continue;
1415             break;
1416         case '\n':
1417             err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
1418             break;
1419         case '$':
1420             if (intick)
1421                 break;
1422             c = hgetc();
1423             if (c == '(') {
1424                 add(Qstring);
1425                 err = cmd_or_math_sub();
1426                 c = Outpar;
1427             } else if (c == '[') {
1428                 add(String);
1429                 add(Inbrack);
1430                 cmdpush(CS_MATHSUBST);
1431                 err = dquote_parse(']', sub);
1432                 cmdpop();
1433                 c = Outbrack;
1434             } else if (c == '{') {
1435                 add(Qstring);
1436                 c = Inbrace;
1437                 cmdpush(CS_BRACEPAR);
1438                 bct++;
1439             } else if (c == '$')
1440                 add(Qstring);
1441             else {
1442                 hungetc(c);
1443                 lexstop = 0;
1444                 c = Qstring;
1445             }
1446             break;
1447         case '}':
1448             if (intick || !bct)
1449                 break;
1450             c = Outbrace;
1451             bct--;
1452             cmdpop();
1453             break;
1454         case '`':
1455             c = Qtick;
1456             if (intick == 2)
1457                 ALLOWHIST
1458             if ((intick = !intick)) {
1459                 SETPARBEGIN
1460                 cmdpush(CS_BQUOTE);
1461             } else {
1462                 SETPAREND
1463                 cmdpop();
1464             }
1465             break;
1466         case '\'':
1467             if (!intick)
1468                 break;
1469             if (intick == 1)
1470                 intick = 2, STOPHIST
1471             else
1472                 intick = 1, ALLOWHIST
1473             break;
1474         case '(':
1475             if (!math || !bct)
1476                 pct++;
1477             break;
1478         case ')':
1479             if (!math || !bct)
1480                 err = (!pct-- && math);
1481             break;
1482         case '[':
1483             if (!math || !bct)
1484                 brct++;
1485             break;
1486         case ']':
1487             if (!math || !bct)
1488                 err = (!brct-- && math);
1489             break;
1490         case '"':
1491             if (intick || ((endchar == ']' || !endchar) && !bct))
1492                 break;
1493             if (bct) {
1494                 add(Dnull);
1495                 cmdpush(CS_DQUOTE);
1496                 err = dquote_parse('"', sub);
1497                 cmdpop();
1498                 c = Dnull;
1499             } else
1500                 err = 1;
1501             break;
1502         }
1503         if (err || lexstop)
1504             break;
1505         add(c);
1506     }
1507     if (intick == 2)
1508         ALLOWHIST
1509     if (intick) {
1510         cmdpop();
1511     }
1512     while (bct--)
1513         cmdpop();
1514     if (lexstop)
1515         err = intick || endchar || err;
1516     else if (err == 1) {
1517         /*
1518          * TODO: as far as I can see, this hack is used in gettokstr()
1519          * to hungetc() a character on an error.  However, I don't
1520          * understand what that actually gets us, and we can't guarantee
1521          * it's a character anyway, because of the previous test.
1522          *
1523          * We use the same feature in cmd_or_math where we actually do
1524          * need to unget if we decide it's really a command substitution.
1525          * We try to handle the other case by testing for lexstop.
1526          */
1527         err = c;
1528     }
1529     if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
1530         inwhat = IN_MATH;
1531     return err;
1532 }
1533
1534 /* Tokenize a string given in s. Parsing is done as in double *
1535  * quotes.  This is usually called before singsub().          */
1536
1537 /**/
1538 mod_export int
1539 parsestr(char *s)
1540 {
1541     int err;
1542
1543     if ((err = parsestrnoerr(s))) {
1544         untokenize(s);
1545         if (err > 32 && err < 127)
1546             zerr("parse error near `%c'", err);
1547         else
1548             zerr("parse error");
1549     }
1550     return err;
1551 }
1552
1553 /**/
1554 mod_export int
1555 parsestrnoerr(char *s)
1556 {
1557     int l = strlen(s), err;
1558
1559     lexsave();
1560     untokenize(s);
1561     inpush(dupstring(s), 0, NULL);
1562     strinbeg(0);
1563     len = 0;
1564     bptr = tokstr = s;
1565     bsiz = l + 1;
1566     err = dquote_parse('\0', 1);
1567     *bptr = '\0';
1568     strinend();
1569     inpop();
1570     DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
1571     lexrestore();
1572     return err;
1573 }
1574
1575 /**/
1576 mod_export char *
1577 parse_subscript(char *s, int sub)
1578 {
1579     int l = strlen(s), err;
1580     char *t;
1581
1582     if (!*s || *s == ']')
1583         return 0;
1584     lexsave();
1585     untokenize(t = dupstring(s));
1586     inpush(t, 0, NULL);
1587     strinbeg(0);
1588     len = 0;
1589     bptr = tokstr = s;
1590     bsiz = l + 1;
1591     err = dquote_parse(']', sub);
1592     if (err) {
1593         err = *bptr;
1594         *bptr = 0;
1595         untokenize(s);
1596         *bptr = err;
1597         s = 0;
1598     } else
1599         s = bptr;
1600     strinend();
1601     inpop();
1602     DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
1603     lexrestore();
1604     return s;
1605 }
1606
1607 /* Tokenize a string given in s. Parsing is done as if s were a normal *
1608  * command-line argument but it may contain separators.  This is used  *
1609  * to parse the right-hand side of ${...%...} substitutions.           */
1610
1611 /**/
1612 mod_export int
1613 parse_subst_string(char *s)
1614 {
1615     int c, l = strlen(s), err, olen, lexstop_ret;
1616     char *ptr;
1617
1618     if (!*s || !strcmp(s, nulstring))
1619         return 0;
1620     lexsave();
1621     untokenize(s);
1622     inpush(dupstring(s), 0, NULL);
1623     strinbeg(0);
1624     len = 0;
1625     bptr = tokstr = s;
1626     bsiz = l + 1;
1627     c = hgetc();
1628     lexstop_ret = lexstop;
1629     c = gettokstr(c, 1);
1630     err = errflag;
1631     strinend();
1632     inpop();
1633     DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
1634     olen = len;
1635     lexrestore();
1636     errflag = err;
1637     if (c == LEXERR) {
1638         untokenize(s);
1639         return 1;
1640     }
1641 #ifdef DEBUG
1642     /*
1643      * Historical note: we used to check here for olen == l, but
1644      * that's not necessarily the case if we stripped an RCQUOTE.
1645      */
1646     if (c != STRING || (errflag && !noerrs)) {
1647         fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
1648                 errflag ? "errflag" : "c != STRING");
1649         fflush(stderr);
1650         untokenize(s);
1651         return 1;
1652     }
1653 #endif
1654     /* Check for $'...' quoting.  This needs special handling. */
1655     for (ptr = s; *ptr; )
1656     {
1657         if (*ptr == String && ptr[1] == Snull)
1658         {
1659             char *t;
1660             int len, tlen, diff;
1661             t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
1662             len += 2;
1663             tlen = strlen(t);
1664             diff = len - tlen;
1665             /*
1666              * Yuk.
1667              * parse_subst_string() currently handles strings in-place.
1668              * That's not so easy to fix without knowing whether
1669              * additional memory should come off the heap or
1670              * otherwise.  So we cheat by copying the unquoted string
1671              * into place, unless it's too long.  That's not the
1672              * normal case, but I'm worried there are are pathological
1673              * cases with converting metafied multibyte strings.
1674              * If someone can prove there aren't I will be very happy.
1675              */
1676             if (diff < 0) {
1677                 DPUTS(1, "$'...' subst too long: fix get_parse_string()");
1678                 return 1;
1679             }
1680             memcpy(ptr, t, tlen);
1681             ptr += tlen;
1682             if (diff > 0) {
1683                 char *dptr = ptr;
1684                 char *sptr = ptr + diff;
1685                 while ((*dptr++ = *sptr++))
1686                     ;
1687             }
1688         } else
1689             ptr++;
1690     }
1691     return 0;
1692 }
1693
1694 /* Called below to report word positions. */
1695
1696 /**/
1697 mod_export void
1698 gotword(void)
1699 {
1700     we = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
1701     if (zlemetacs <= we) {
1702         wb = zlemetall - wordbeg + addedx;
1703         zleparse = 0;
1704     }
1705 }
1706
1707 /* expand aliases and reserved words */
1708
1709 /**/
1710 int
1711 exalias(void)
1712 {
1713     Alias an;
1714     Reswd rw;
1715
1716     hwend();
1717     if (interact && isset(SHINSTDIN) && !strin && !incasepat &&
1718         tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
1719         (isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
1720         spckword(&tokstr, 1, incmdpos, 1);
1721
1722     if (!tokstr) {
1723         zshlextext = tokstrings[tok];
1724
1725         return 0;
1726     } else {
1727         VARARR(char, copy, (strlen(tokstr) + 1));
1728
1729         if (has_token(tokstr)) {
1730             char *p, *t;
1731
1732             zshlextext = p = copy;
1733             for (t = tokstr;
1734                  (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
1735         } else
1736             zshlextext = tokstr;
1737
1738         if (zleparse && !(inbufflags & INP_ALIAS)) {
1739             int zp = zleparse;
1740
1741             gotword();
1742             if (zp == 1 && !zleparse) {
1743                 if (zshlextext == copy)
1744                     zshlextext = tokstr;
1745                 return 0;
1746             }
1747         }
1748
1749         if (tok == STRING) {
1750             /* Check for an alias */
1751             if (!noaliases && isset(ALIASESOPT) &&
1752                 (!isset(POSIXALIASES) ||
1753                  !reswdtab->getnode(reswdtab, zshlextext))) {
1754                 char *suf;
1755
1756                 an = (Alias) aliastab->getnode(aliastab, zshlextext);
1757                 if (an && !an->inuse &&
1758                     ((an->node.flags & ALIAS_GLOBAL) || incmdpos || inalmore)) {
1759                     inpush(an->text, INP_ALIAS, an);
1760                     if (an->text[0] == ' ')
1761                         aliasspaceflag = 1;
1762                     lexstop = 0;
1763                     if (zshlextext == copy)
1764                         zshlextext = tokstr;
1765                     return 1;
1766                 }
1767                 if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
1768                     suf > zshlextext && suf[-1] != Meta &&
1769                     (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
1770                     !an->inuse && incmdpos) {
1771                     inpush(dupstring(zshlextext), INP_ALIAS, NULL);
1772                     inpush(" ", INP_ALIAS, NULL);
1773                     inpush(an->text, INP_ALIAS, an);
1774                     lexstop = 0;
1775                     if (zshlextext == copy)
1776                         zshlextext = tokstr;
1777                     return 1;
1778                 }
1779             }
1780
1781             /* Then check for a reserved word */
1782             if ((incmdpos ||
1783                  (unset(IGNOREBRACES) && zshlextext[0] == '}' && !zshlextext[1])) &&
1784                 (rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
1785                 tok = rw->token;
1786                 if (tok == DINBRACK)
1787                     incond = 1;
1788             } else if (incond && !strcmp(zshlextext, "]]")) {
1789                 tok = DOUTBRACK;
1790                 incond = 0;
1791             } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
1792                 tok = BANG;
1793         }
1794         inalmore = 0;
1795         if (zshlextext == copy)
1796             zshlextext = tokstr;
1797     }
1798     return 0;
1799 }
1800
1801 /* skip (...) */
1802
1803 /**/
1804 static int
1805 skipcomm(void)
1806 {
1807     int pct = 1, c, start = 1;
1808
1809     cmdpush(CS_CMDSUBST);
1810     SETPARBEGIN
1811     c = Inpar;
1812     do {
1813         int iswhite;
1814         add(c);
1815         c = hgetc();
1816         if (itok(c) || lexstop)
1817             break;
1818         iswhite = inblank(c);
1819         switch (c) {
1820         case '(':
1821             pct++;
1822             break;
1823         case ')':
1824             pct--;
1825             break;
1826         case '\\':
1827             add(c);
1828             c = hgetc();
1829             break;
1830         case '\'': {
1831             int strquote = bptr[-1] == '$';
1832             add(c);
1833             STOPHIST
1834             while ((c = hgetc()) != '\'' && !lexstop) {
1835                 if (c == '\\' && strquote) {
1836                     add(c);
1837                     c = hgetc();
1838                 }
1839                 add(c);
1840             }
1841             ALLOWHIST
1842             break;
1843         }
1844         case '\"':
1845             add(c);
1846             while ((c = hgetc()) != '\"' && !lexstop)
1847                 if (c == '\\') {
1848                     add(c);
1849                     add(hgetc());
1850                 } else
1851                     add(c);
1852             break;
1853         case '`':
1854             add(c);
1855             while ((c = hgetc()) != '`' && !lexstop)
1856                 if (c == '\\')
1857                     add(c), add(hgetc());
1858                 else
1859                     add(c);
1860             break;
1861         case '#':
1862             if (start) {
1863                 add(c);
1864                 while ((c = hgetc()) != '\n' && !lexstop)
1865                     add(c);
1866                 iswhite = 1;
1867             }
1868             break;
1869         }
1870         start = iswhite;
1871     }
1872     while (pct);
1873     if (!lexstop)
1874         SETPAREND
1875     cmdpop();
1876     return lexstop;
1877 }