asm/parser.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2019 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * parser.c   source line parser for the Netwide Assembler
  36  */
  37
  38 #include "compiler.h"
  39
  40 #include "nctype.h"
  41
  42 #include "nasm.h"
  43 #include "insns.h"
  44 #include "nasmlib.h"
  45 #include "error.h"
  46 #include "stdscan.h"
  47 #include "eval.h"
  48 #include "parser.h"
  49 #include "float.h"
  50 #include "assemble.h"
  51 #include "tables.h"
  52
  53
  54 static int end_expression_next(void);
  55
  56 static struct tokenval tokval;
  57
  58 static int prefix_slot(int prefix)
  59 {
  60     switch (prefix) {
  61     case P_WAIT:
  62         return PPS_WAIT;
  63     case R_CS:
  64     case R_DS:
  65     case R_SS:
  66     case R_ES:
  67     case R_FS:
  68     case R_GS:
  69         return PPS_SEG;
  70     case P_LOCK:
  71         return PPS_LOCK;
  72     case P_REP:
  73     case P_REPE:
  74     case P_REPZ:
  75     case P_REPNE:
  76     case P_REPNZ:
  77     case P_XACQUIRE:
  78     case P_XRELEASE:
  79     case P_BND:
  80     case P_NOBND:
  81         return PPS_REP;
  82     case P_O16:
  83     case P_O32:
  84     case P_O64:
  85     case P_OSP:
  86         return PPS_OSIZE;
  87     case P_A16:
  88     case P_A32:
  89     case P_A64:
  90     case P_ASP:
  91         return PPS_ASIZE;
  92     case P_EVEX:
  93     case P_VEX3:
  94     case P_VEX2:
  95         return PPS_VEX;
  96     default:
  97         nasm_panic("Invalid value %d passed to prefix_slot()", prefix);
  98         return -1;
  99     }
 100 }
 101
 102 static void process_size_override(insn *result, operand *op)
 103 {
 104     if (tasm_compatible_mode) {
 105         switch (tokval.t_integer) {
 106             /* For TASM compatibility a size override inside the
 107              * brackets changes the size of the operand, not the
 108              * address type of the operand as it does in standard
 109              * NASM syntax. Hence:
 110              *
 111              *  mov     eax,[DWORD val]
 112              *
 113              * is valid syntax in TASM compatibility mode. Note that
 114              * you lose the ability to override the default address
 115              * type for the instruction, but we never use anything
 116              * but 32-bit flat model addressing in our code.
 117              */
 118         case S_BYTE:
 119             op->type |= BITS8;
 120             break;
 121         case S_WORD:
 122             op->type |= BITS16;
 123             break;
 124         case S_DWORD:
 125         case S_LONG:
 126             op->type |= BITS32;
 127             break;
 128         case S_QWORD:
 129             op->type |= BITS64;
 130             break;
 131         case S_TWORD:
 132             op->type |= BITS80;
 133             break;
 134         case S_OWORD:
 135             op->type |= BITS128;
 136             break;
 137         default:
 138             nasm_nonfatal("invalid operand size specification");
 139             break;
 140         }
 141     } else {
 142         /* Standard NASM compatible syntax */
 143         switch (tokval.t_integer) {
 144         case S_NOSPLIT:
 145             op->eaflags |= EAF_TIMESTWO;
 146             break;
 147         case S_REL:
 148             op->eaflags |= EAF_REL;
 149             break;
 150         case S_ABS:
 151             op->eaflags |= EAF_ABS;
 152             break;
 153         case S_BYTE:
 154             op->disp_size = 8;
 155             op->eaflags |= EAF_BYTEOFFS;
 156             break;
 157         case P_A16:
 158         case P_A32:
 159         case P_A64:
 160             if (result->prefixes[PPS_ASIZE] &&
 161                 result->prefixes[PPS_ASIZE] != tokval.t_integer)
 162                 nasm_nonfatal("conflicting address size specifications");
 163             else
 164                 result->prefixes[PPS_ASIZE] = tokval.t_integer;
 165             break;
 166         case S_WORD:
 167             op->disp_size = 16;
 168             op->eaflags |= EAF_WORDOFFS;
 169             break;
 170         case S_DWORD:
 171         case S_LONG:
 172             op->disp_size = 32;
 173             op->eaflags |= EAF_WORDOFFS;
 174             break;
 175         case S_QWORD:
 176             op->disp_size = 64;
 177             op->eaflags |= EAF_WORDOFFS;
 178             break;
 179         default:
 180             nasm_nonfatal("invalid size specification in"
 181                           " effective address");
 182             break;
 183         }
 184     }
 185 }
 186
 187 /*
 188  * Brace decorators are are parsed here.  opmask and zeroing
 189  * decorators can be placed in any order.  e.g. zmm1 {k2}{z} or zmm2
 190  * {z}{k3} decorator(s) are placed at the end of an operand.
 191  */
 192 static bool parse_braces(decoflags_t *decoflags)
 193 {
 194     int i, j;
 195
 196     i = tokval.t_type;
 197
 198     while (true) {
 199         switch (i) {
 200         case TOKEN_OPMASK:
 201             if (*decoflags & OPMASK_MASK) {
 202                 nasm_nonfatal("opmask k%"PRIu64" is already set",
 203                               *decoflags & OPMASK_MASK);
 204                 *decoflags &= ~OPMASK_MASK;
 205             }
 206             *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
 207             break;
 208         case TOKEN_DECORATOR:
 209             j = tokval.t_integer;
 210             switch (j) {
 211             case BRC_Z:
 212                 *decoflags |= Z_MASK;
 213                 break;
 214             case BRC_1TO2:
 215             case BRC_1TO4:
 216             case BRC_1TO8:
 217             case BRC_1TO16:
 218                 *decoflags |= BRDCAST_MASK | VAL_BRNUM(j - BRC_1TO2);
 219                 break;
 220             default:
 221                 nasm_nonfatal("{%s} is not an expected decorator",
 222                               tokval.t_charptr);
 223                 break;
 224             }
 225             break;
 226         case ',':
 227         case TOKEN_EOS:
 228             return false;
 229         default:
 230             nasm_nonfatal("only a series of valid decorators expected");
 231             return true;
 232         }
 233         i = stdscan(NULL, &tokval);
 234     }
 235 }
 236
 237 static inline const expr *next_expr(const expr *e, const expr **next_list)
 238 {
 239     e++;
 240     if (!e->type) {
 241         if (next_list) {
 242             e = *next_list;
 243             *next_list = NULL;
 244         } else {
 245             e = NULL;
 246         }
 247     }
 248     return e;
 249 }
 250
 251 static inline void init_operand(operand *op)
 252 {
 253     memset(op, 0, sizeof *op);
 254
 255     op->basereg  = -1;
 256     op->indexreg = -1;
 257     op->segment  = NO_SEG;
 258     op->wrt      = NO_SEG;
 259 }
 260
 261 static int parse_mref(operand *op, const expr *e)
 262 {
 263     int b, i, s;        /* basereg, indexreg, scale */
 264     int64_t o;          /* offset */
 265
 266     b = op->basereg;
 267     i = op->indexreg;
 268     s = op->scale;
 269     o = op->offset;
 270
 271     for (; e->type; e++) {
 272         if (e->type <= EXPR_REG_END) {
 273             bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
 274
 275             if (is_gpr && e->value == 1 && b == -1) {
 276                 /* It can be basereg */
 277                 b = e->type;
 278             } else if (i == -1) {
 279                 /* Must be index register */
 280                 i = e->type;
 281                 s = e->value;
 282             } else {
 283                 if (b == -1)
 284                     nasm_nonfatal("invalid effective address: two index registers");
 285                 else if (!is_gpr)
 286                     nasm_nonfatal("invalid effective address: impossible register");
 287                 else
 288                     nasm_nonfatal("invalid effective address: too many registers");
 289                 return -1;
 290             }
 291         } else if (e->type == EXPR_UNKNOWN) {
 292             op->opflags |= OPFLAG_UNKNOWN;
 293         } else if (e->type == EXPR_SIMPLE) {
 294             o += e->value;
 295         } else if  (e->type == EXPR_WRT) {
 296             op->wrt = e->value;
 297         } else if (e->type >= EXPR_SEGBASE) {
 298             if (e->value == 1) {
 299                 if (op->segment != NO_SEG) {
 300                     nasm_nonfatal("invalid effective address: multiple base segments");
 301                     return -1;
 302                 }
 303                 op->segment = e->type - EXPR_SEGBASE;
 304             } else if (e->value == -1 &&
 305                        e->type == location.segment + EXPR_SEGBASE &&
 306                        !(op->opflags & OPFLAG_RELATIVE)) {
 307                 op->opflags |= OPFLAG_RELATIVE;
 308             } else {
 309                 nasm_nonfatal("invalid effective address: impossible segment base multiplier");
 310                 return -1;
 311             }
 312         } else {
 313             nasm_nonfatal("invalid effective address: bad subexpression type");
 314             return -1;
 315         }
 316    }
 317
 318     op->basereg  = b;
 319     op->indexreg = i;
 320     op->scale    = s;
 321     op->offset   = o;
 322     return 0;
 323 }
 324
 325 static void mref_set_optype(operand *op)
 326 {
 327     int b = op->basereg;
 328     int i = op->indexreg;
 329     int s = op->scale;
 330
 331     /* It is memory, but it can match any r/m operand */
 332     op->type |= MEMORY_ANY;
 333
 334     if (b == -1 && (i == -1 || s == 0)) {
 335         int is_rel = globalbits == 64 &&
 336             !(op->eaflags & EAF_ABS) &&
 337             ((globalrel &&
 338               !(op->eaflags & EAF_FSGS)) ||
 339              (op->eaflags & EAF_REL));
 340
 341         op->type |= is_rel ? IP_REL : MEM_OFFS;
 342     }
 343
 344     if (i != -1) {
 345         opflags_t iclass = nasm_reg_flags[i];
 346
 347         if (is_class(XMMREG,iclass))
 348             op->type |= XMEM;
 349         else if (is_class(YMMREG,iclass))
 350             op->type |= YMEM;
 351         else if (is_class(ZMMREG,iclass))
 352             op->type |= ZMEM;
 353     }
 354 }
 355
 356 /*
 357  * Convert an expression vector returned from evaluate() into an
 358  * extop structure.  Return zero on success.  Note that the eop
 359  * already has dup and elem set, so we can't clear it here.
 360  */
 361 static int value_to_extop(expr *vect, extop *eop, int32_t myseg)
 362 {
 363     eop->type = EOT_DB_NUMBER;
 364     eop->val.num.offset = 0;
 365     eop->val.num.segment = eop->val.num.wrt = NO_SEG;
 366     eop->val.num.relative = false;
 367
 368     for (; vect->type; vect++) {
 369         if (!vect->value)       /* zero term, safe to ignore */
 370             continue;
 371
 372         if (vect->type <= EXPR_REG_END) /* false if a register is present */
 373             return -1;
 374
 375         if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */
 376             return 0;
 377
 378         if (vect->type == EXPR_SIMPLE) {
 379             /* Simple number expression */
 380             eop->val.num.offset += vect->value;
 381             continue;
 382         }
 383         if (eop->val.num.wrt == NO_SEG && !eop->val.num.relative &&
 384             vect->type == EXPR_WRT) {
 385             /* WRT term */
 386             eop->val.num.wrt = vect->value;
 387             continue;
 388         }
 389
 390         if (!eop->val.num.relative &&
 391             vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
 392             /* Expression of the form: foo - $ */
 393             eop->val.num.relative = true;
 394             continue;
 395         }
 396
 397         if (eop->val.num.segment == NO_SEG &&
 398             vect->type >= EXPR_SEGBASE && vect->value == 1) {
 399             eop->val.num.segment = vect->type - EXPR_SEGBASE;
 400             continue;
 401         }
 402
 403         /* Otherwise, badness */
 404         return -1;
 405     }
 406
 407     /* We got to the end and it was all okay */
 408     return 0;
 409 }
 410
 411 /*
 412  * Parse an extended expression, used by db et al. "elem" is the element
 413  * size; initially comes from the specific opcode (e.g. db == 1) but
 414  * can be overridden.
 415  */
 416 static int parse_eops(extop **result, bool critical, int elem)
 417 {
 418     extop *eop = NULL, *prev = NULL;
 419     extop **tail = result;
 420     int sign;
 421     int i = tokval.t_type;
 422     int oper_num = 0;
 423     bool do_subexpr = false;
 424
 425     *tail = NULL;
 426
 427     /* End of string is obvious; ) ends a sub-expression list e.g. DUP */
 428     for (i = tokval.t_type; i != TOKEN_EOS; i = stdscan(NULL, &tokval)) {
 429         char endparen = ')';   /* Is a right paren the end of list? */
 430
 431         if (i == ')')
 432             break;
 433
 434         if (!eop) {
 435             nasm_new(eop);
 436             eop->dup  = 1;
 437             eop->elem = elem;
 438             do_subexpr = false;
 439         }
 440         sign = +1;
 441
 442         /*
 443          * end_expression_next() here is to distinguish this from
 444          * a string used as part of an expression...
 445          */
 446         if (i == TOKEN_QMARK) {
 447             eop->type = EOT_DB_RESERVE;
 448         } else if (do_subexpr && i == '(') {
 449             extop *subexpr;
 450
 451             stdscan(NULL, &tokval); /* Skip paren */
 452             if (parse_eops(&eop->val.subexpr, critical, eop->elem) < 0)
 453                 goto fail;
 454
 455             subexpr = eop->val.subexpr;
 456             if (!subexpr) {
 457                 /* Subexpression is empty */
 458                 eop->type = EOT_NOTHING;
 459             } else if (!subexpr->next) {
 460                 /* Subexpression is a single element, flatten */
 461                 eop->val   = subexpr->val;
 462                 eop->type  = subexpr->type;
 463                 eop->dup  *= subexpr->dup;
 464                 nasm_free(subexpr);
 465             } else {
 466                 eop->type = EOT_EXTOP;
 467             }
 468
 469             /* We should have ended on a closing paren */
 470             if (tokval.t_type != ')') {
 471                 nasm_nonfatal("expected `)' after subexpression, got `%s'",
 472                               i == TOKEN_EOS ?
 473                               "end of line" : tokval.t_charptr);
 474                 goto fail;
 475             }
 476             endparen = 0;       /* This time the paren is not the end */
 477         } else if (i == '%') {
 478             /* %(expression_list) */
 479             do_subexpr = true;
 480             continue;
 481         } else if (i == TOKEN_SIZE) {
 482             /* Element size override */
 483             eop->elem = tokval.t_inttwo;
 484             do_subexpr = true;
 485             continue;
 486         } else if (i == TOKEN_STR && end_expression_next()) {
 487             eop->type            = EOT_DB_STRING;
 488             eop->val.string.data = tokval.t_charptr;
 489             eop->val.string.len  = tokval.t_inttwo;
 490         } else if (i == TOKEN_STRFUNC) {
 491             bool parens = false;
 492             const char *funcname = tokval.t_charptr;
 493             enum strfunc func = tokval.t_integer;
 494
 495             i = stdscan(NULL, &tokval);
 496             if (i == '(') {
 497                 parens = true;
 498                 endparen = 0;
 499                 i = stdscan(NULL, &tokval);
 500             }
 501             if (i != TOKEN_STR) {
 502                 nasm_nonfatal("%s must be followed by a string constant",
 503                               funcname);
 504                 eop->type = EOT_NOTHING;
 505             } else {
 506                 eop->type = EOT_DB_STRING_FREE;
 507                 eop->val.string.len =
 508                     string_transform(tokval.t_charptr, tokval.t_inttwo,
 509                                      &eop->val.string.data, func);
 510                 if (eop->val.string.len == (size_t)-1) {
 511                     nasm_nonfatal("invalid input string to %s", funcname);
 512                     eop->type = EOT_NOTHING;
 513                 }
 514             }
 515             if (parens && i && i != ')') {
 516                 i = stdscan(NULL, &tokval);
 517                 if (i != ')')
 518                     nasm_nonfatal("unterminated %s function", funcname);
 519             }
 520         } else if (i == '-' || i == '+') {
 521             char *save = stdscan_get();
 522             struct tokenval tmptok;
 523
 524             sign = (i == '-') ? -1 : 1;
 525             if (stdscan(NULL, &tmptok) != TOKEN_FLOAT) {
 526                 stdscan_set(save);
 527                 goto is_expression;
 528             } else {
 529                 tokval = tmptok;
 530                 goto is_float;
 531             }
 532         } else if (i == TOKEN_FLOAT) {
 533         is_float:
 534             eop->type = EOT_DB_FLOAT;
 535
 536             if (eop->elem > 16) {
 537                 nasm_nonfatal("no %d-bit floating-point format supported",
 538                               eop->elem << 3);
 539                 eop->val.string.len = 0;
 540             } else if (eop->elem < 1) {
 541                 nasm_nonfatal("floating-point constant"
 542                               " encountered in unknown instruction");
 543                 /*
 544                  * fix suggested by Pedro Gimeno... original line was:
 545                  * eop->type = EOT_NOTHING;
 546                  */
 547                 eop->val.string.len = 0;
 548             } else {
 549                 eop->val.string.len = eop->elem;
 550
 551                 eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len);
 552                 eop->val.string.data = (char *)eop + sizeof(extop);
 553                 if (!float_const(tokval.t_charptr, sign,
 554                                  (uint8_t *)eop->val.string.data,
 555                                  eop->val.string.len))
 556                     eop->val.string.len = 0;
 557             }
 558             if (!eop->val.string.len)
 559                 eop->type = EOT_NOTHING;
 560         } else {
 561             /* anything else, assume it is an expression */
 562             expr *value;
 563
 564         is_expression:
 565             value = evaluate(stdscan, NULL, &tokval, NULL,
 566                              critical, NULL);
 567             i = tokval.t_type;
 568             if (!value)                  /* Error in evaluator */
 569                 goto fail;
 570             if (tokval.t_flag & TFLAG_DUP) {
 571                 /* Expression followed by DUP */
 572                 if (!is_simple(value)) {
 573                     nasm_nonfatal("non-constant argument supplied to DUP");
 574                     goto fail;
 575                 } else if (value->value < 0) {
 576                     nasm_nonfatal("negative argument supplied to DUP");
 577                     goto fail;
 578                 }
 579                 eop->dup *= (size_t)value->value;
 580                 do_subexpr = true;
 581                 continue;
 582             }
 583             if (value_to_extop(value, eop, location.segment)) {
 584                 nasm_nonfatal("expression is not simple or relocatable");
 585             }
 586         }
 587
 588         if (eop->dup == 0 || eop->type == EOT_NOTHING) {
 589             nasm_free(eop);
 590         } else if (eop->type == EOT_DB_RESERVE &&
 591                    prev && prev->type == EOT_DB_RESERVE &&
 592                    prev->elem == eop->elem) {
 593             /* Coalesce multiple EOT_DB_RESERVE */
 594             prev->dup += eop->dup;
 595             nasm_free(eop);
 596         } else {
 597             /* Add this eop to the end of the chain */
 598             prev = eop;
 599             *tail = eop;
 600             tail = &eop->next;
 601         }
 602
 603         oper_num++;
 604         eop = NULL;             /* Done with this operand */
 605
 606         /*
 607          * We're about to call stdscan(), which will eat the
 608          * comma that we're currently sitting on between
 609          * arguments. However, we'd better check first that it
 610          * _is_ a comma.
 611          */
 612         if (i == TOKEN_EOS || i == endparen)    /* Already at end? */
 613             break;
 614         if (i != ',') {
 615             i = stdscan(NULL, &tokval);         /* eat the comma or final paren */
 616             if (i == TOKEN_EOS || i == ')')     /* got end of expression */
 617                 break;
 618             if (i != ',') {
 619                 nasm_nonfatal("comma expected after operand");
 620                 goto fail;
 621             }
 622         }
 623     }
 624
 625     return oper_num;
 626
 627 fail:
 628     if (eop)
 629         nasm_free(eop);
 630     return -1;
 631 }
 632
 633 insn *parse_line(char *buffer, insn *result)
 634 {
 635     bool insn_is_label = false;
 636     struct eval_hints hints;
 637     int opnum;
 638     bool critical;
 639     bool first;
 640     bool recover;
 641     bool far_jmp_ok;
 642     int i;
 643
 644     nasm_static_assert(P_none == 0);
 645
 646 restart_parse:
 647     first               = true;
 648     result->forw_ref    = false;
 649
 650     stdscan_reset();
 651     stdscan_set(buffer);
 652     i = stdscan(NULL, &tokval);
 653
 654     memset(result->prefixes, P_none, sizeof(result->prefixes));
 655     result->times       = 1;    /* No TIMES either yet */
 656     result->label       = NULL; /* Assume no label */
 657     result->eops        = NULL; /* must do this, whatever happens */
 658     result->operands    = 0;    /* must initialize this */
 659     result->evex_rm     = 0;    /* Ensure EVEX rounding mode is reset */
 660     result->evex_brerop = -1;   /* Reset EVEX broadcasting/ER op position */
 661
 662     /* Ignore blank lines */
 663     if (i == TOKEN_EOS)
 664         goto fail;
 665
 666     if (i != TOKEN_ID       &&
 667         i != TOKEN_INSN     &&
 668         i != TOKEN_PREFIX   &&
 669         (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
 670         nasm_nonfatal("label or instruction expected at start of line");
 671         goto fail;
 672     }
 673
 674     if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
 675         /* there's a label here */
 676         first = false;
 677         result->label = tokval.t_charptr;
 678         i = stdscan(NULL, &tokval);
 679         if (i == ':') {         /* skip over the optional colon */
 680             i = stdscan(NULL, &tokval);
 681         } else if (i == 0) {
 682             /*!
 683              *!label-orphan [on] labels alone on lines without trailing `:'
 684              *!=orphan-labels
 685              *!  warns about source lines which contain no instruction but define
 686              *!  a label without a trailing colon. This is most likely indicative
 687              *!  of a typo, but is technically correct NASM syntax (see \k{syntax}.)
 688              */
 689             nasm_warn(WARN_LABEL_ORPHAN ,
 690                        "label alone on a line without a colon might be in error");
 691         }
 692         if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
 693             /*
 694              * FIXME: location.segment could be NO_SEG, in which case
 695              * it is possible we should be passing 'absolute.segment'. Look into this.
 696              * Work out whether that is *really* what we should be doing.
 697              * Generally fix things. I think this is right as it is, but
 698              * am still not certain.
 699              */
 700             define_label(result->label,
 701                          in_absolute ? absolute.segment : location.segment,
 702                          location.offset, true);
 703         }
 704     }
 705
 706     /* Just a label here */
 707     if (i == TOKEN_EOS)
 708         goto fail;
 709
 710     while (i == TOKEN_PREFIX ||
 711            (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
 712         first = false;
 713
 714         /*
 715          * Handle special case: the TIMES prefix.
 716          */
 717         if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
 718             expr *value;
 719
 720             i = stdscan(NULL, &tokval);
 721             value = evaluate(stdscan, NULL, &tokval, NULL, pass_stable(), NULL);
 722             i = tokval.t_type;
 723             if (!value)                  /* Error in evaluator */
 724                 goto fail;
 725             if (!is_simple(value)) {
 726                 nasm_nonfatal("non-constant argument supplied to TIMES");
 727                 result->times = 1L;
 728             } else {
 729                 result->times = value->value;
 730                 if (value->value < 0) {
 731                     nasm_nonfatalf(ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
 732                     result->times = 0;
 733                 }
 734             }
 735         } else {
 736             int slot = prefix_slot(tokval.t_integer);
 737             if (result->prefixes[slot]) {
 738                if (result->prefixes[slot] == tokval.t_integer)
 739                     nasm_warn(WARN_OTHER, "instruction has redundant prefixes");
 740                else
 741                     nasm_nonfatal("instruction has conflicting prefixes");
 742             }
 743             result->prefixes[slot] = tokval.t_integer;
 744             i = stdscan(NULL, &tokval);
 745         }
 746     }
 747
 748     if (i != TOKEN_INSN) {
 749         int j;
 750         enum prefixes pfx;
 751
 752         for (j = 0; j < MAXPREFIX; j++) {
 753             if ((pfx = result->prefixes[j]) != P_none)
 754                 break;
 755         }
 756
 757         if (i == 0 && pfx != P_none) {
 758             /*
 759              * Instruction prefixes are present, but no actual
 760              * instruction. This is allowed: at this point we
 761              * invent a notional instruction of RESB 0.
 762              */
 763             result->opcode          = I_RESB;
 764             result->operands        = 1;
 765             nasm_zero(result->oprs);
 766             result->oprs[0].type    = IMMEDIATE;
 767             result->oprs[0].offset  = 0L;
 768             result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
 769             return result;
 770         } else {
 771             nasm_nonfatal("parser: instruction expected");
 772             goto fail;
 773         }
 774     }
 775
 776     result->opcode = tokval.t_integer;
 777     result->condition = tokval.t_inttwo;
 778
 779     /*
 780      * INCBIN cannot be satisfied with incorrectly
 781      * evaluated operands, since the correct values _must_ be known
 782      * on the first pass. Hence, even in pass one, we set the
 783      * `critical' flag on calling evaluate(), so that it will bomb
 784      * out on undefined symbols.
 785      */
 786     critical = pass_final() || (result->opcode == I_INCBIN);
 787
 788     if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
 789         int oper_num;
 790
 791         i = stdscan(NULL, &tokval);
 792
 793         if (first && i == ':') {
 794             /* Really a label */
 795             insn_is_label = true;
 796             goto restart_parse;
 797         }
 798         first = false;
 799         oper_num = parse_eops(&result->eops, critical, db_bytes(result->opcode));
 800         if (oper_num < 0)
 801             goto fail;
 802
 803         if (result->opcode == I_INCBIN) {
 804             /*
 805              * Correct syntax for INCBIN is that there should be
 806              * one string operand, followed by one or two numeric
 807              * operands.
 808              */
 809             if (!result->eops || result->eops->type != EOT_DB_STRING)
 810                 nasm_nonfatal("`incbin' expects a file name");
 811             else if (result->eops->next &&
 812                      result->eops->next->type != EOT_DB_NUMBER)
 813                 nasm_nonfatal("`incbin': second parameter is"
 814                               " non-numeric");
 815             else if (result->eops->next && result->eops->next->next &&
 816                      result->eops->next->next->type != EOT_DB_NUMBER)
 817                 nasm_nonfatal("`incbin': third parameter is"
 818                               " non-numeric");
 819             else if (result->eops->next && result->eops->next->next &&
 820                      result->eops->next->next->next)
 821                 nasm_nonfatal("`incbin': more than three parameters");
 822             else
 823                 return result;
 824             /*
 825              * If we reach here, one of the above errors happened.
 826              * Throw the instruction away.
 827              */
 828             goto fail;
 829         } else {
 830             /* DB et al */
 831             result->operands = oper_num;
 832             if (oper_num == 0)
 833                 nasm_warn(WARN_OTHER, "no operand for data declaration");
 834         }
 835         return result;
 836     }
 837
 838     /*
 839      * Now we begin to parse the operands. There may be up to four
 840      * of these, separated by commas, and terminated by a zero token.
 841      */
 842     far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
 843
 844     for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
 845         operand *op = &result->oprs[opnum];
 846         expr *value;            /* used most of the time */
 847         bool mref = false;      /* is this going to be a memory ref? */
 848         int bracket = 0;        /* is it a [] mref, or a "naked" mref? */
 849         bool mib;               /* compound (mib) mref? */
 850         int setsize = 0;
 851         decoflags_t brace_flags = 0;    /* flags for decorators in braces */
 852
 853         init_operand(op);
 854
 855         i = stdscan(NULL, &tokval);
 856         if (i == TOKEN_EOS)
 857             break;              /* end of operands: get out of here */
 858         else if (first && i == ':') {
 859             insn_is_label = true;
 860             goto restart_parse;
 861         }
 862         first = false;
 863         op->type = 0; /* so far, no override */
 864         /* size specifiers */
 865         while (i == TOKEN_SPECIAL || i == TOKEN_SIZE) {
 866             switch (tokval.t_integer) {
 867             case S_BYTE:
 868                 if (!setsize)   /* we want to use only the first */
 869                     op->type |= BITS8;
 870                 setsize = 1;
 871                 break;
 872             case S_WORD:
 873                 if (!setsize)
 874                     op->type |= BITS16;
 875                 setsize = 1;
 876                 break;
 877             case S_DWORD:
 878             case S_LONG:
 879                 if (!setsize)
 880                     op->type |= BITS32;
 881                 setsize = 1;
 882                 break;
 883             case S_QWORD:
 884                 if (!setsize)
 885                     op->type |= BITS64;
 886                 setsize = 1;
 887                 break;
 888             case S_TWORD:
 889                 if (!setsize)
 890                     op->type |= BITS80;
 891                 setsize = 1;
 892                 break;
 893             case S_OWORD:
 894                 if (!setsize)
 895                     op->type |= BITS128;
 896                 setsize = 1;
 897                 break;
 898             case S_YWORD:
 899                 if (!setsize)
 900                     op->type |= BITS256;
 901                 setsize = 1;
 902                 break;
 903             case S_ZWORD:
 904                 if (!setsize)
 905                     op->type |= BITS512;
 906                 setsize = 1;
 907                 break;
 908             case S_TO:
 909                 op->type |= TO;
 910                 break;
 911             case S_STRICT:
 912                 op->type |= STRICT;
 913                 break;
 914             case S_FAR:
 915                 op->type |= FAR;
 916                 break;
 917             case S_NEAR:
 918                 op->type |= NEAR;
 919                 break;
 920             case S_SHORT:
 921                 op->type |= SHORT;
 922                 break;
 923             default:
 924                 nasm_nonfatal("invalid operand size specification");
 925             }
 926             i = stdscan(NULL, &tokval);
 927         }
 928
 929         if (i == '[' || i == TOKEN_MASM_PTR || i == '&') {
 930             /* memory reference */
 931             mref = true;
 932             bracket += (i == '[');
 933             i = stdscan(NULL, &tokval);
 934         }
 935
 936     mref_more:
 937         if (mref) {
 938             bool done = false;
 939             bool nofw = false;
 940
 941             while (!done) {
 942                 switch (i) {
 943                 case TOKEN_SPECIAL:
 944                 case TOKEN_SIZE:
 945                 case TOKEN_PREFIX:
 946                     process_size_override(result, op);
 947                     break;
 948
 949                 case '[':
 950                     bracket++;
 951                     break;
 952
 953                 case ',':
 954                     tokval.t_type = TOKEN_NUM;
 955                     tokval.t_integer = 0;
 956                     stdscan_set(stdscan_get() - 1);     /* rewind the comma */
 957                     done = nofw = true;
 958                     break;
 959
 960                 case TOKEN_MASM_FLAT:
 961                     i = stdscan(NULL, &tokval);
 962                     if (i != ':') {
 963                         nasm_nonfatal("unknown use of FLAT in MASM emulation");
 964                         nofw = true;
 965                     }
 966                     done = true;
 967                     break;
 968
 969                 default:
 970                     done = nofw = true;
 971                     break;
 972                 }
 973
 974                 if (!nofw)
 975                     i = stdscan(NULL, &tokval);
 976             }
 977         }
 978
 979         value = evaluate(stdscan, NULL, &tokval,
 980                          &op->opflags, critical, &hints);
 981         i = tokval.t_type;
 982         if (op->opflags & OPFLAG_FORWARD) {
 983             result->forw_ref = true;
 984         }
 985         if (!value)                  /* Error in evaluator */
 986             goto fail;
 987
 988         if (i == '[' && !bracket) {
 989             /* displacement[regs] syntax */
 990             mref = true;
 991             parse_mref(op, value); /* Process what we have so far */
 992             goto mref_more;
 993         }
 994
 995         if (i == ':' && (mref || !far_jmp_ok)) {
 996             /* segment override? */
 997             mref = true;
 998
 999             /*
1000              * Process the segment override.
1001              */
1002             if (value[1].type   != 0    ||
1003                 value->value    != 1    ||
1004                 !IS_SREG(value->type))
1005                 nasm_nonfatal("invalid segment override");
1006             else if (result->prefixes[PPS_SEG])
1007                 nasm_nonfatal("instruction has conflicting segment overrides");
1008             else {
1009                 result->prefixes[PPS_SEG] = value->type;
1010                 if (IS_FSGS(value->type))
1011                     op->eaflags |= EAF_FSGS;
1012             }
1013
1014             i = stdscan(NULL, &tokval); /* then skip the colon */
1015             goto mref_more;
1016         }
1017
1018         mib = false;
1019         if (mref && bracket && i == ',') {
1020             /* [seg:base+offset,index*scale] syntax (mib) */
1021             operand o2;         /* Index operand */
1022
1023             if (parse_mref(op, value))
1024                 goto fail;
1025
1026             i = stdscan(NULL, &tokval); /* Eat comma */
1027             value = evaluate(stdscan, NULL, &tokval, &op->opflags,
1028                              critical, &hints);
1029             i = tokval.t_type;
1030             if (!value)
1031                 goto fail;
1032
1033             init_operand(&o2);
1034             if (parse_mref(&o2, value))
1035                 goto fail;
1036
1037             if (o2.basereg != -1 && o2.indexreg == -1) {
1038                 o2.indexreg = o2.basereg;
1039                 o2.scale = 1;
1040                 o2.basereg = -1;
1041             }
1042
1043             if (op->indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
1044                 o2.segment != NO_SEG || o2.wrt != NO_SEG) {
1045                 nasm_nonfatal("invalid mib expression");
1046                 goto fail;
1047             }
1048
1049             op->indexreg = o2.indexreg;
1050             op->scale = o2.scale;
1051
1052             if (op->basereg != -1) {
1053                 op->hintbase = op->basereg;
1054                 op->hinttype = EAH_MAKEBASE;
1055             } else if (op->indexreg != -1) {
1056                 op->hintbase = op->indexreg;
1057                 op->hinttype = EAH_NOTBASE;
1058             } else {
1059                 op->hintbase = -1;
1060                 op->hinttype = EAH_NOHINT;
1061             }
1062
1063             mib = true;
1064         }
1065
1066         recover = false;
1067         if (mref) {
1068             if (bracket == 1) {
1069                 if (i == ']') {
1070                     bracket--;
1071                     i = stdscan(NULL, &tokval);
1072                 } else {
1073                     nasm_nonfatal("expecting ] at end of memory operand");
1074                     recover = true;
1075                 }
1076             } else if (bracket == 0) {
1077                 /* Do nothing */
1078             } else if (bracket > 0) {
1079                 nasm_nonfatal("excess brackets in memory operand");
1080                 recover = true;
1081             } else if (bracket < 0) {
1082                 nasm_nonfatal("unmatched ] in memory operand");
1083                 recover = true;
1084             }
1085
1086             if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
1087                 /* parse opmask (and zeroing) after an operand */
1088                 recover = parse_braces(&brace_flags);
1089                 i = tokval.t_type;
1090             }
1091             if (!recover && i != 0 && i != ',') {
1092                 nasm_nonfatal("comma, decorator or end of line expected, got %d", i);
1093                 recover = true;
1094             }
1095         } else {                /* immediate operand */
1096             if (i != 0 && i != ',' && i != ':' &&
1097                 i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
1098                 nasm_nonfatal("comma, colon, decorator or end of "
1099                               "line expected after operand");
1100                 recover = true;
1101             } else if (i == ':') {
1102                 op->type |= COLON;
1103             } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
1104                 /* parse opmask (and zeroing) after an operand */
1105                 recover = parse_braces(&brace_flags);
1106             }
1107         }
1108         if (recover) {
1109             do {                /* error recovery */
1110                 i = stdscan(NULL, &tokval);
1111             } while (i != 0 && i != ',');
1112         }
1113
1114         /*
1115          * now convert the exprs returned from evaluate()
1116          * into operand descriptions...
1117          */
1118         op->decoflags |= brace_flags;
1119
1120         if (mref) {             /* it's a memory reference */
1121             /* A mib reference was fully parsed already */
1122             if (!mib) {
1123                 if (parse_mref(op, value))
1124                     goto fail;
1125                 op->hintbase = hints.base;
1126                 op->hinttype = hints.type;
1127             }
1128             mref_set_optype(op);
1129         } else if ((op->type & FAR) && !far_jmp_ok) {
1130                 nasm_nonfatal("invalid use of FAR operand specifier");
1131                 recover = true;
1132         } else {                /* it's not a memory reference */
1133             if (is_just_unknown(value)) {       /* it's immediate but unknown */
1134                 op->type      |= IMMEDIATE;
1135                 op->opflags   |= OPFLAG_UNKNOWN;
1136                 op->offset    = 0;        /* don't care */
1137                 op->segment   = NO_SEG;   /* don't care again */
1138                 op->wrt       = NO_SEG;   /* still don't care */
1139
1140                 if(optimizing.level >= 0 && !(op->type & STRICT)) {
1141                     /* Be optimistic */
1142                     op->type |=
1143                         UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD;
1144                 }
1145             } else if (is_reloc(value)) {       /* it's immediate */
1146                 uint64_t n = reloc_value(value);
1147
1148                 op->type      |= IMMEDIATE;
1149                 op->offset    = n;
1150                 op->segment   = reloc_seg(value);
1151                 op->wrt       = reloc_wrt(value);
1152                 op->opflags   |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
1153
1154                 if (is_simple(value)) {
1155                     if (n == 1)
1156                         op->type |= UNITY;
1157                     if (optimizing.level >= 0 && !(op->type & STRICT)) {
1158                         if ((uint32_t) (n + 128) <= 255)
1159                             op->type |= SBYTEDWORD;
1160                         if ((uint16_t) (n + 128) <= 255)
1161                             op->type |= SBYTEWORD;
1162                         if (n <= UINT64_C(0xFFFFFFFF))
1163                             op->type |= UDWORD;
1164                         if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF))
1165                             op->type |= SDWORD;
1166                     }
1167                 }
1168             } else if (value->type == EXPR_RDSAE) {
1169                 /*
1170                  * it's not an operand but a rounding or SAE decorator.
1171                  * put the decorator information in the (opflag_t) type field
1172                  * of previous operand.
1173                  */
1174                 opnum--; op--;
1175                 switch (value->value) {
1176                 case BRC_RN:
1177                 case BRC_RU:
1178                 case BRC_RD:
1179                 case BRC_RZ:
1180                 case BRC_SAE:
1181                     op->decoflags |= (value->value == BRC_SAE ? SAE : ER);
1182                     result->evex_rm = value->value;
1183                     break;
1184                 default:
1185                     nasm_nonfatal("invalid decorator");
1186                     break;
1187                 }
1188             } else {            /* it's a register */
1189                 opflags_t rs;
1190                 uint64_t regset_size = 0;
1191
1192                 if (value->type >= EXPR_SIMPLE || value->value != 1) {
1193                     nasm_nonfatal("invalid operand type");
1194                     goto fail;
1195                 }
1196
1197                 /*
1198                  * We do not allow any kind of expression, except for
1199                  * reg+value in which case it is a register set.
1200                  */
1201                 for (i = 1; value[i].type; i++) {
1202                     if (!value[i].value)
1203                         continue;
1204
1205                     switch (value[i].type) {
1206                     case EXPR_SIMPLE:
1207                         if (!regset_size) {
1208                             regset_size = value[i].value + 1;
1209                             break;
1210                         }
1211                         /* fallthrough */
1212                     default:
1213                         nasm_nonfatal("invalid operand type");
1214                         goto fail;
1215                     }
1216                 }
1217
1218                 if ((regset_size & (regset_size - 1)) ||
1219                     regset_size >= (UINT64_C(1) << REGSET_BITS)) {
1220                     nasm_nonfatalf(ERR_PASS2, "invalid register set size");
1221                     regset_size = 0;
1222                 }
1223
1224                 /* clear overrides, except TO which applies to FPU regs */
1225                 if (op->type & ~TO) {
1226                     /*
1227                      * we want to produce a warning iff the specified size
1228                      * is different from the register size
1229                      */
1230                     rs = op->type & SIZE_MASK;
1231                 } else {
1232                     rs = 0;
1233                 }
1234
1235                 /*
1236                  * Make sure we're not out of nasm_reg_flags, still
1237                  * probably this should be fixed when we're defining
1238                  * the label.
1239                  *
1240                  * An easy trigger is
1241                  *
1242                  *      e equ 0x80000000:0
1243                  *      pshufw word e-0
1244                  *
1245                  */
1246                 if (value->type < EXPR_REG_START ||
1247                     value->type > EXPR_REG_END) {
1248                         nasm_nonfatal("invalid operand type");
1249                         goto fail;
1250                 }
1251
1252                 op->type      &= TO;
1253                 op->type      |= REGISTER;
1254                 op->type      |= nasm_reg_flags[value->type];
1255                 op->type      |= (regset_size >> 1) << REGSET_SHIFT;
1256                 op->decoflags |= brace_flags;
1257                 op->basereg   = value->type;
1258
1259                 if (rs) {
1260                     opflags_t opsize = nasm_reg_flags[value->type] & SIZE_MASK;
1261                     if (!opsize) {
1262                         op->type |= rs; /* For non-size-specific registers, permit size override */
1263                     } else if (opsize != rs) {
1264                         /*!
1265                          *!regsize [on] register size specification ignored
1266                          *!
1267                          *!  warns about a register with implicit size (such as \c{EAX}, which is always 32 bits)
1268                          *!  been given an explicit size specification which is inconsistent with the size
1269                          *!  of the named register, e.g. \c{WORD EAX}. \c{DWORD EAX} or \c{WORD AX} are
1270                          *!  permitted, and do not trigger this warning. Some registers which \e{do not} imply
1271                          *!  a specific size, such as \c{K0}, may need this specification unless the instruction
1272                          *!  itself implies the instruction size:
1273                          *!-
1274                          *!  \c      KMOVW K0,[foo]          ; Permitted, KMOVW implies 16 bits
1275                          *!  \c      KMOV  WORD K0,[foo]     ; Permitted, WORD K0 specifies instruction size
1276                          *!  \c      KMOV  K0,WORD [foo]     ; Permitted, WORD [foo] specifies instruction size
1277                          *!  \c      KMOV  K0,[foo]          ; Not permitted, instruction size ambiguous
1278                          */
1279                         nasm_warn(WARN_REGSIZE, "invalid register size specification ignored");
1280                     }
1281                 }
1282             }
1283         }
1284
1285         /* remember the position of operand having broadcasting/ER mode */
1286         if (op->decoflags & (BRDCAST_MASK | ER | SAE))
1287             result->evex_brerop = opnum;
1288     }
1289
1290     result->operands = opnum; /* set operand count */
1291
1292     /* clear remaining operands */
1293     while (opnum < MAX_OPERANDS)
1294         result->oprs[opnum++].type = 0;
1295
1296     return result;
1297
1298 fail:
1299     result->opcode = I_none;
1300     return result;
1301 }
1302
1303 static int end_expression_next(void)
1304 {
1305     struct tokenval tv;
1306     char *p;
1307     int i;
1308
1309     p = stdscan_get();
1310     i = stdscan(NULL, &tv);
1311     stdscan_set(p);
1312
1313     return (i == ',' || i == ';' || i == ')' || !i);
1314 }
1315
1316 static void free_eops(extop *e)
1317 {
1318     extop *next;
1319
1320     while (e) {
1321         next = e->next;
1322         switch (e->type) {
1323         case EOT_EXTOP:
1324             free_eops(e->val.subexpr);
1325             break;
1326
1327         case EOT_DB_STRING_FREE:
1328             nasm_free(e->val.string.data);
1329             break;
1330
1331         default:
1332             break;
1333         }
1334
1335         nasm_free(e);
1336         e = next;
1337     }
1338 }
1339
1340 void cleanup_insn(insn * i)
1341 {
1342     free_eops(i->eops);
1343 }