asm/parser.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * parser.c   source line parser for the Netwide Assembler
  36  */
  37
  38 #include "compiler.h"
  39
  40 #include "nctype.h"
  41
  42 #include "nasm.h"
  43 #include "insns.h"
  44 #include "nasmlib.h"
  45 #include "error.h"
  46 #include "stdscan.h"
  47 #include "eval.h"
  48 #include "parser.h"
  49 #include "floats.h"
  50 #include "assemble.h"
  51 #include "tables.h"
  52
  53
  54 static int end_expression_next(void);
  55
  56 static struct tokenval tokval;
  57
  58 static int prefix_slot(int prefix)
  59 {
  60     switch (prefix) {
  61     case P_WAIT:
  62         return PPS_WAIT;
  63     case R_CS:
  64     case R_DS:
  65     case R_SS:
  66     case R_ES:
  67     case R_FS:
  68     case R_GS:
  69         return PPS_SEG;
  70     case P_LOCK:
  71         return PPS_LOCK;
  72     case P_REP:
  73     case P_REPE:
  74     case P_REPZ:
  75     case P_REPNE:
  76     case P_REPNZ:
  77     case P_XACQUIRE:
  78     case P_XRELEASE:
  79     case P_BND:
  80     case P_NOBND:
  81         return PPS_REP;
  82     case P_O16:
  83     case P_O32:
  84     case P_O64:
  85     case P_OSP:
  86         return PPS_OSIZE;
  87     case P_A16:
  88     case P_A32:
  89     case P_A64:
  90     case P_ASP:
  91         return PPS_ASIZE;
  92     case P_EVEX:
  93     case P_VEX3:
  94     case P_VEX2:
  95         return PPS_VEX;
  96     default:
  97         nasm_panic("Invalid value %d passed to prefix_slot()", prefix);
  98         return -1;
  99     }
 100 }
 101
 102 static void process_size_override(insn *result, operand *op)
 103 {
 104     if (tasm_compatible_mode) {
 105         switch (tokval.t_integer) {
 106             /* For TASM compatibility a size override inside the
 107              * brackets changes the size of the operand, not the
 108              * address type of the operand as it does in standard
 109              * NASM syntax. Hence:
 110              *
 111              *  mov     eax,[DWORD val]
 112              *
 113              * is valid syntax in TASM compatibility mode. Note that
 114              * you lose the ability to override the default address
 115              * type for the instruction, but we never use anything
 116              * but 32-bit flat model addressing in our code.
 117              */
 118         case S_BYTE:
 119             op->type |= BITS8;
 120             break;
 121         case S_WORD:
 122             op->type |= BITS16;
 123             break;
 124         case S_DWORD:
 125         case S_LONG:
 126             op->type |= BITS32;
 127             break;
 128         case S_QWORD:
 129             op->type |= BITS64;
 130             break;
 131         case S_TWORD:
 132             op->type |= BITS80;
 133             break;
 134         case S_OWORD:
 135             op->type |= BITS128;
 136             break;
 137         default:
 138             nasm_nonfatal("invalid operand size specification");
 139             break;
 140         }
 141     } else {
 142         /* Standard NASM compatible syntax */
 143         switch (tokval.t_integer) {
 144         case S_NOSPLIT:
 145             op->eaflags |= EAF_TIMESTWO;
 146             break;
 147         case S_REL:
 148             op->eaflags |= EAF_REL;
 149             break;
 150         case S_ABS:
 151             op->eaflags |= EAF_ABS;
 152             break;
 153         case S_BYTE:
 154             op->disp_size = 8;
 155             op->eaflags |= EAF_BYTEOFFS;
 156             break;
 157         case P_A16:
 158         case P_A32:
 159         case P_A64:
 160             if (result->prefixes[PPS_ASIZE] &&
 161                 result->prefixes[PPS_ASIZE] != tokval.t_integer)
 162                 nasm_nonfatal("conflicting address size specifications");
 163             else
 164                 result->prefixes[PPS_ASIZE] = tokval.t_integer;
 165             break;
 166         case S_WORD:
 167             op->disp_size = 16;
 168             op->eaflags |= EAF_WORDOFFS;
 169             break;
 170         case S_DWORD:
 171         case S_LONG:
 172             op->disp_size = 32;
 173             op->eaflags |= EAF_WORDOFFS;
 174             break;
 175         case S_QWORD:
 176             op->disp_size = 64;
 177             op->eaflags |= EAF_WORDOFFS;
 178             break;
 179         default:
 180             nasm_nonfatal("invalid size specification in"
 181                           " effective address");
 182             break;
 183         }
 184     }
 185 }
 186
 187 /*
 188  * Brace decorators are are parsed here.  opmask and zeroing
 189  * decorators can be placed in any order.  e.g. zmm1 {k2}{z} or zmm2
 190  * {z}{k3} decorator(s) are placed at the end of an operand.
 191  */
 192 static bool parse_braces(decoflags_t *decoflags)
 193 {
 194     int i, j;
 195
 196     i = tokval.t_type;
 197
 198     while (true) {
 199         switch (i) {
 200         case TOKEN_OPMASK:
 201             if (*decoflags & OPMASK_MASK) {
 202                 nasm_nonfatal("opmask k%"PRIu64" is already set",
 203                               *decoflags & OPMASK_MASK);
 204                 *decoflags &= ~OPMASK_MASK;
 205             }
 206             *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
 207             break;
 208         case TOKEN_DECORATOR:
 209             j = tokval.t_integer;
 210             switch (j) {
 211             case BRC_Z:
 212                 *decoflags |= Z_MASK;
 213                 break;
 214             case BRC_1TO2:
 215             case BRC_1TO4:
 216             case BRC_1TO8:
 217             case BRC_1TO16:
 218                 *decoflags |= BRDCAST_MASK | VAL_BRNUM(j - BRC_1TO2);
 219                 break;
 220             default:
 221                 nasm_nonfatal("{%s} is not an expected decorator",
 222                               tokval.t_charptr);
 223                 break;
 224             }
 225             break;
 226         case ',':
 227         case TOKEN_EOS:
 228             return false;
 229         default:
 230             nasm_nonfatal("only a series of valid decorators expected");
 231             return true;
 232         }
 233         i = stdscan(NULL, &tokval);
 234     }
 235 }
 236
 237 static inline unused_func
 238 const expr *next_expr(const expr *e, const expr **next_list)
 239 {
 240     e++;
 241     if (!e->type) {
 242         if (next_list) {
 243             e = *next_list;
 244             *next_list = NULL;
 245         } else {
 246             e = NULL;
 247         }
 248     }
 249     return e;
 250 }
 251
 252 static inline void init_operand(operand *op)
 253 {
 254     memset(op, 0, sizeof *op);
 255
 256     op->basereg  = -1;
 257     op->indexreg = -1;
 258     op->segment  = NO_SEG;
 259     op->wrt      = NO_SEG;
 260 }
 261
 262 static int parse_mref(operand *op, const expr *e)
 263 {
 264     int b, i, s;        /* basereg, indexreg, scale */
 265     int64_t o;          /* offset */
 266
 267     b = op->basereg;
 268     i = op->indexreg;
 269     s = op->scale;
 270     o = op->offset;
 271
 272     for (; e->type; e++) {
 273         if (e->type <= EXPR_REG_END) {
 274             bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
 275
 276             if (is_gpr && e->value == 1 && b == -1) {
 277                 /* It can be basereg */
 278                 b = e->type;
 279             } else if (i == -1) {
 280                 /* Must be index register */
 281                 i = e->type;
 282                 s = e->value;
 283             } else {
 284                 if (b == -1)
 285                     nasm_nonfatal("invalid effective address: two index registers");
 286                 else if (!is_gpr)
 287                     nasm_nonfatal("invalid effective address: impossible register");
 288                 else
 289                     nasm_nonfatal("invalid effective address: too many registers");
 290                 return -1;
 291             }
 292         } else if (e->type == EXPR_UNKNOWN) {
 293             op->opflags |= OPFLAG_UNKNOWN;
 294         } else if (e->type == EXPR_SIMPLE) {
 295             o += e->value;
 296         } else if  (e->type == EXPR_WRT) {
 297             op->wrt = e->value;
 298         } else if (e->type >= EXPR_SEGBASE) {
 299             if (e->value == 1) {
 300                 if (op->segment != NO_SEG) {
 301                     nasm_nonfatal("invalid effective address: multiple base segments");
 302                     return -1;
 303                 }
 304                 op->segment = e->type - EXPR_SEGBASE;
 305             } else if (e->value == -1 &&
 306                        e->type == location.segment + EXPR_SEGBASE &&
 307                        !(op->opflags & OPFLAG_RELATIVE)) {
 308                 op->opflags |= OPFLAG_RELATIVE;
 309             } else {
 310                 nasm_nonfatal("invalid effective address: impossible segment base multiplier");
 311                 return -1;
 312             }
 313         } else {
 314             nasm_nonfatal("invalid effective address: bad subexpression type");
 315             return -1;
 316         }
 317    }
 318
 319     op->basereg  = b;
 320     op->indexreg = i;
 321     op->scale    = s;
 322     op->offset   = o;
 323     return 0;
 324 }
 325
 326 static void mref_set_optype(operand *op)
 327 {
 328     int b = op->basereg;
 329     int i = op->indexreg;
 330     int s = op->scale;
 331
 332     /* It is memory, but it can match any r/m operand */
 333     op->type |= MEMORY_ANY;
 334
 335     if (b == -1 && (i == -1 || s == 0)) {
 336         int is_rel = globalbits == 64 &&
 337             !(op->eaflags & EAF_ABS) &&
 338             ((globalrel &&
 339               !(op->eaflags & EAF_FSGS)) ||
 340              (op->eaflags & EAF_REL));
 341
 342         op->type |= is_rel ? IP_REL : MEM_OFFS;
 343     }
 344
 345     if (i != -1) {
 346         opflags_t iclass = nasm_reg_flags[i];
 347
 348         if (is_class(XMMREG,iclass))
 349             op->type |= XMEM;
 350         else if (is_class(YMMREG,iclass))
 351             op->type |= YMEM;
 352         else if (is_class(ZMMREG,iclass))
 353             op->type |= ZMEM;
 354     }
 355 }
 356
 357 /*
 358  * Convert an expression vector returned from evaluate() into an
 359  * extop structure.  Return zero on success.  Note that the eop
 360  * already has dup and elem set, so we can't clear it here.
 361  */
 362 static int value_to_extop(expr *vect, extop *eop, int32_t myseg)
 363 {
 364     eop->type = EOT_DB_NUMBER;
 365     eop->val.num.offset = 0;
 366     eop->val.num.segment = eop->val.num.wrt = NO_SEG;
 367     eop->val.num.relative = false;
 368
 369     for (; vect->type; vect++) {
 370         if (!vect->value)       /* zero term, safe to ignore */
 371             continue;
 372
 373         if (vect->type <= EXPR_REG_END) /* false if a register is present */
 374             return -1;
 375
 376         if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */
 377             return 0;
 378
 379         if (vect->type == EXPR_SIMPLE) {
 380             /* Simple number expression */
 381             eop->val.num.offset += vect->value;
 382             continue;
 383         }
 384         if (eop->val.num.wrt == NO_SEG && !eop->val.num.relative &&
 385             vect->type == EXPR_WRT) {
 386             /* WRT term */
 387             eop->val.num.wrt = vect->value;
 388             continue;
 389         }
 390
 391         if (!eop->val.num.relative &&
 392             vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
 393             /* Expression of the form: foo - $ */
 394             eop->val.num.relative = true;
 395             continue;
 396         }
 397
 398         if (eop->val.num.segment == NO_SEG &&
 399             vect->type >= EXPR_SEGBASE && vect->value == 1) {
 400             eop->val.num.segment = vect->type - EXPR_SEGBASE;
 401             continue;
 402         }
 403
 404         /* Otherwise, badness */
 405         return -1;
 406     }
 407
 408     /* We got to the end and it was all okay */
 409     return 0;
 410 }
 411
 412 /*
 413  * Parse an extended expression, used by db et al. "elem" is the element
 414  * size; initially comes from the specific opcode (e.g. db == 1) but
 415  * can be overridden.
 416  */
 417 static int parse_eops(extop **result, bool critical, int elem)
 418 {
 419     extop *eop = NULL, *prev = NULL;
 420     extop **tail = result;
 421     int sign;
 422     int i = tokval.t_type;
 423     int oper_num = 0;
 424     bool do_subexpr = false;
 425
 426     *tail = NULL;
 427
 428     /* End of string is obvious; ) ends a sub-expression list e.g. DUP */
 429     for (i = tokval.t_type; i != TOKEN_EOS; i = stdscan(NULL, &tokval)) {
 430         char endparen = ')';   /* Is a right paren the end of list? */
 431
 432         if (i == ')')
 433             break;
 434
 435         if (!eop) {
 436             nasm_new(eop);
 437             eop->dup  = 1;
 438             eop->elem = elem;
 439             do_subexpr = false;
 440         }
 441         sign = +1;
 442
 443         /*
 444          * end_expression_next() here is to distinguish this from
 445          * a string used as part of an expression...
 446          */
 447         if (i == TOKEN_QMARK) {
 448             eop->type = EOT_DB_RESERVE;
 449         } else if (do_subexpr && i == '(') {
 450             extop *subexpr;
 451
 452             stdscan(NULL, &tokval); /* Skip paren */
 453             if (parse_eops(&eop->val.subexpr, critical, eop->elem) < 0)
 454                 goto fail;
 455
 456             subexpr = eop->val.subexpr;
 457             if (!subexpr) {
 458                 /* Subexpression is empty */
 459                 eop->type = EOT_NOTHING;
 460             } else if (!subexpr->next) {
 461                 /* Subexpression is a single element, flatten */
 462                 eop->val   = subexpr->val;
 463                 eop->type  = subexpr->type;
 464                 eop->dup  *= subexpr->dup;
 465                 nasm_free(subexpr);
 466             } else {
 467                 eop->type = EOT_EXTOP;
 468             }
 469
 470             /* We should have ended on a closing paren */
 471             if (tokval.t_type != ')') {
 472                 nasm_nonfatal("expected `)' after subexpression, got `%s'",
 473                               i == TOKEN_EOS ?
 474                               "end of line" : tokval.t_charptr);
 475                 goto fail;
 476             }
 477             endparen = 0;       /* This time the paren is not the end */
 478         } else if (i == '%') {
 479             /* %(expression_list) */
 480             do_subexpr = true;
 481             continue;
 482         } else if (i == TOKEN_SIZE) {
 483             /* Element size override */
 484             eop->elem = tokval.t_inttwo;
 485             do_subexpr = true;
 486             continue;
 487         } else if (i == TOKEN_STR && end_expression_next()) {
 488             eop->type            = EOT_DB_STRING;
 489             eop->val.string.data = tokval.t_charptr;
 490             eop->val.string.len  = tokval.t_inttwo;
 491         } else if (i == TOKEN_STRFUNC) {
 492             bool parens = false;
 493             const char *funcname = tokval.t_charptr;
 494             enum strfunc func = tokval.t_integer;
 495
 496             i = stdscan(NULL, &tokval);
 497             if (i == '(') {
 498                 parens = true;
 499                 endparen = 0;
 500                 i = stdscan(NULL, &tokval);
 501             }
 502             if (i != TOKEN_STR) {
 503                 nasm_nonfatal("%s must be followed by a string constant",
 504                               funcname);
 505                 eop->type = EOT_NOTHING;
 506             } else {
 507                 eop->type = EOT_DB_STRING_FREE;
 508                 eop->val.string.len =
 509                     string_transform(tokval.t_charptr, tokval.t_inttwo,
 510                                      &eop->val.string.data, func);
 511                 if (eop->val.string.len == (size_t)-1) {
 512                     nasm_nonfatal("invalid input string to %s", funcname);
 513                     eop->type = EOT_NOTHING;
 514                 }
 515             }
 516             if (parens && i && i != ')') {
 517                 i = stdscan(NULL, &tokval);
 518                 if (i != ')')
 519                     nasm_nonfatal("unterminated %s function", funcname);
 520             }
 521         } else if (i == '-' || i == '+') {
 522             char *save = stdscan_get();
 523             struct tokenval tmptok;
 524
 525             sign = (i == '-') ? -1 : 1;
 526             if (stdscan(NULL, &tmptok) != TOKEN_FLOAT) {
 527                 stdscan_set(save);
 528                 goto is_expression;
 529             } else {
 530                 tokval = tmptok;
 531                 goto is_float;
 532             }
 533         } else if (i == TOKEN_FLOAT) {
 534         is_float:
 535             eop->type = EOT_DB_FLOAT;
 536
 537             if (eop->elem > 16) {
 538                 nasm_nonfatal("no %d-bit floating-point format supported",
 539                               eop->elem << 3);
 540                 eop->val.string.len = 0;
 541             } else if (eop->elem < 1) {
 542                 nasm_nonfatal("floating-point constant"
 543                               " encountered in unknown instruction");
 544                 /*
 545                  * fix suggested by Pedro Gimeno... original line was:
 546                  * eop->type = EOT_NOTHING;
 547                  */
 548                 eop->val.string.len = 0;
 549             } else {
 550                 eop->val.string.len = eop->elem;
 551
 552                 eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len);
 553                 eop->val.string.data = (char *)eop + sizeof(extop);
 554                 if (!float_const(tokval.t_charptr, sign,
 555                                  (uint8_t *)eop->val.string.data,
 556                                  eop->val.string.len))
 557                     eop->val.string.len = 0;
 558             }
 559             if (!eop->val.string.len)
 560                 eop->type = EOT_NOTHING;
 561         } else {
 562             /* anything else, assume it is an expression */
 563             expr *value;
 564
 565         is_expression:
 566             value = evaluate(stdscan, NULL, &tokval, NULL,
 567                              critical, NULL);
 568             i = tokval.t_type;
 569             if (!value)                  /* Error in evaluator */
 570                 goto fail;
 571             if (tokval.t_flag & TFLAG_DUP) {
 572                 /* Expression followed by DUP */
 573                 if (!is_simple(value)) {
 574                     nasm_nonfatal("non-constant argument supplied to DUP");
 575                     goto fail;
 576                 } else if (value->value < 0) {
 577                     nasm_nonfatal("negative argument supplied to DUP");
 578                     goto fail;
 579                 }
 580                 eop->dup *= (size_t)value->value;
 581                 do_subexpr = true;
 582                 continue;
 583             }
 584             if (value_to_extop(value, eop, location.segment)) {
 585                 nasm_nonfatal("expression is not simple or relocatable");
 586             }
 587         }
 588
 589         if (eop->dup == 0 || eop->type == EOT_NOTHING) {
 590             nasm_free(eop);
 591         } else if (eop->type == EOT_DB_RESERVE &&
 592                    prev && prev->type == EOT_DB_RESERVE &&
 593                    prev->elem == eop->elem) {
 594             /* Coalesce multiple EOT_DB_RESERVE */
 595             prev->dup += eop->dup;
 596             nasm_free(eop);
 597         } else {
 598             /* Add this eop to the end of the chain */
 599             prev = eop;
 600             *tail = eop;
 601             tail = &eop->next;
 602         }
 603
 604         oper_num++;
 605         eop = NULL;             /* Done with this operand */
 606
 607         /*
 608          * We're about to call stdscan(), which will eat the
 609          * comma that we're currently sitting on between
 610          * arguments. However, we'd better check first that it
 611          * _is_ a comma.
 612          */
 613         if (i == TOKEN_EOS || i == endparen)    /* Already at end? */
 614             break;
 615         if (i != ',') {
 616             i = stdscan(NULL, &tokval);         /* eat the comma or final paren */
 617             if (i == TOKEN_EOS || i == ')')     /* got end of expression */
 618                 break;
 619             if (i != ',') {
 620                 nasm_nonfatal("comma expected after operand");
 621                 goto fail;
 622             }
 623         }
 624     }
 625
 626     return oper_num;
 627
 628 fail:
 629     if (eop)
 630         nasm_free(eop);
 631     return -1;
 632 }
 633
 634 insn *parse_line(char *buffer, insn *result)
 635 {
 636     bool insn_is_label = false;
 637     struct eval_hints hints;
 638     int opnum;
 639     bool critical;
 640     bool first;
 641     bool recover;
 642     bool far_jmp_ok;
 643     int i;
 644
 645     nasm_static_assert(P_none == 0);
 646
 647 restart_parse:
 648     first               = true;
 649     result->forw_ref    = false;
 650
 651     stdscan_reset();
 652     stdscan_set(buffer);
 653     i = stdscan(NULL, &tokval);
 654
 655     memset(result->prefixes, P_none, sizeof(result->prefixes));
 656     result->times       = 1;    /* No TIMES either yet */
 657     result->label       = NULL; /* Assume no label */
 658     result->eops        = NULL; /* must do this, whatever happens */
 659     result->operands    = 0;    /* must initialize this */
 660     result->evex_rm     = 0;    /* Ensure EVEX rounding mode is reset */
 661     result->evex_brerop = -1;   /* Reset EVEX broadcasting/ER op position */
 662
 663     /* Ignore blank lines */
 664     if (i == TOKEN_EOS)
 665         goto fail;
 666
 667     if (i != TOKEN_ID       &&
 668         i != TOKEN_INSN     &&
 669         i != TOKEN_PREFIX   &&
 670         (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
 671         nasm_nonfatal("label or instruction expected at start of line");
 672         goto fail;
 673     }
 674
 675     if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
 676         /* there's a label here */
 677         first = false;
 678         result->label = tokval.t_charptr;
 679         i = stdscan(NULL, &tokval);
 680         if (i == ':') {         /* skip over the optional colon */
 681             i = stdscan(NULL, &tokval);
 682         } else if (i == 0) {
 683             /*!
 684              *!label-orphan [on] labels alone on lines without trailing `:'
 685              *!=orphan-labels
 686              *!  warns about source lines which contain no instruction but define
 687              *!  a label without a trailing colon. This is most likely indicative
 688              *!  of a typo, but is technically correct NASM syntax (see \k{syntax}.)
 689              */
 690             nasm_warn(WARN_LABEL_ORPHAN ,
 691                        "label alone on a line without a colon might be in error");
 692         }
 693         if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
 694             /*
 695              * FIXME: location.segment could be NO_SEG, in which case
 696              * it is possible we should be passing 'absolute.segment'. Look into this.
 697              * Work out whether that is *really* what we should be doing.
 698              * Generally fix things. I think this is right as it is, but
 699              * am still not certain.
 700              */
 701             define_label(result->label,
 702                          in_absolute ? absolute.segment : location.segment,
 703                          location.offset, true);
 704         }
 705     }
 706
 707     /* Just a label here */
 708     if (i == TOKEN_EOS)
 709         goto fail;
 710
 711     while (i == TOKEN_PREFIX ||
 712            (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
 713         first = false;
 714
 715         /*
 716          * Handle special case: the TIMES prefix.
 717          */
 718         if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
 719             expr *value;
 720
 721             i = stdscan(NULL, &tokval);
 722             value = evaluate(stdscan, NULL, &tokval, NULL, pass_stable(), NULL);
 723             i = tokval.t_type;
 724             if (!value)                  /* Error in evaluator */
 725                 goto fail;
 726             if (!is_simple(value)) {
 727                 nasm_nonfatal("non-constant argument supplied to TIMES");
 728                 result->times = 1L;
 729             } else {
 730                 result->times = value->value;
 731                 if (value->value < 0) {
 732                     nasm_nonfatalf(ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
 733                     result->times = 0;
 734                 }
 735             }
 736         } else {
 737             int slot = prefix_slot(tokval.t_integer);
 738             if (result->prefixes[slot]) {
 739                if (result->prefixes[slot] == tokval.t_integer)
 740                     nasm_warn(WARN_OTHER, "instruction has redundant prefixes");
 741                else
 742                     nasm_nonfatal("instruction has conflicting prefixes");
 743             }
 744             result->prefixes[slot] = tokval.t_integer;
 745             i = stdscan(NULL, &tokval);
 746         }
 747     }
 748
 749     if (i != TOKEN_INSN) {
 750         int j;
 751         enum prefixes pfx;
 752
 753         for (j = 0; j < MAXPREFIX; j++) {
 754             if ((pfx = result->prefixes[j]) != P_none)
 755                 break;
 756         }
 757
 758         if (i == 0 && pfx != P_none) {
 759             /*
 760              * Instruction prefixes are present, but no actual
 761              * instruction. This is allowed: at this point we
 762              * invent a notional instruction of RESB 0.
 763              */
 764             result->opcode          = I_RESB;
 765             result->operands        = 1;
 766             nasm_zero(result->oprs);
 767             result->oprs[0].type    = IMMEDIATE;
 768             result->oprs[0].offset  = 0L;
 769             result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
 770             return result;
 771         } else {
 772             nasm_nonfatal("parser: instruction expected");
 773             goto fail;
 774         }
 775     }
 776
 777     result->opcode = tokval.t_integer;
 778     result->condition = tokval.t_inttwo;
 779
 780     /*
 781      * INCBIN cannot be satisfied with incorrectly
 782      * evaluated operands, since the correct values _must_ be known
 783      * on the first pass. Hence, even in pass one, we set the
 784      * `critical' flag on calling evaluate(), so that it will bomb
 785      * out on undefined symbols.
 786      */
 787     critical = pass_final() || (result->opcode == I_INCBIN);
 788
 789     if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
 790         int oper_num;
 791
 792         i = stdscan(NULL, &tokval);
 793
 794         if (first && i == ':') {
 795             /* Really a label */
 796             insn_is_label = true;
 797             goto restart_parse;
 798         }
 799         first = false;
 800         oper_num = parse_eops(&result->eops, critical, db_bytes(result->opcode));
 801         if (oper_num < 0)
 802             goto fail;
 803
 804         if (result->opcode == I_INCBIN) {
 805             /*
 806              * Correct syntax for INCBIN is that there should be
 807              * one string operand, followed by one or two numeric
 808              * operands.
 809              */
 810             if (!result->eops || result->eops->type != EOT_DB_STRING)
 811                 nasm_nonfatal("`incbin' expects a file name");
 812             else if (result->eops->next &&
 813                      result->eops->next->type != EOT_DB_NUMBER)
 814                 nasm_nonfatal("`incbin': second parameter is"
 815                               " non-numeric");
 816             else if (result->eops->next && result->eops->next->next &&
 817                      result->eops->next->next->type != EOT_DB_NUMBER)
 818                 nasm_nonfatal("`incbin': third parameter is"
 819                               " non-numeric");
 820             else if (result->eops->next && result->eops->next->next &&
 821                      result->eops->next->next->next)
 822                 nasm_nonfatal("`incbin': more than three parameters");
 823             else
 824                 return result;
 825             /*
 826              * If we reach here, one of the above errors happened.
 827              * Throw the instruction away.
 828              */
 829             goto fail;
 830         } else {
 831             /* DB et al */
 832             result->operands = oper_num;
 833             if (oper_num == 0)
 834                 /*!
 835                  *!db-empty [on] no operand for data declaration
 836                  *!  warns about a \c{DB}, \c{DW}, etc declaration
 837                  *!  with no operands, producing no output.
 838                  *!  This is permitted, but often indicative of an error.
 839                  *!  See \k{db}.
 840                  */
 841                 nasm_warn(WARN_DB_EMPTY, "no operand for data declaration");
 842         }
 843         return result;
 844     }
 845
 846     /*
 847      * Now we begin to parse the operands. There may be up to four
 848      * of these, separated by commas, and terminated by a zero token.
 849      */
 850     far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
 851
 852     for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
 853         operand *op = &result->oprs[opnum];
 854         expr *value;            /* used most of the time */
 855         bool mref = false;      /* is this going to be a memory ref? */
 856         int bracket = 0;        /* is it a [] mref, or a "naked" mref? */
 857         bool mib;               /* compound (mib) mref? */
 858         int setsize = 0;
 859         decoflags_t brace_flags = 0;    /* flags for decorators in braces */
 860
 861         init_operand(op);
 862
 863         i = stdscan(NULL, &tokval);
 864         if (i == TOKEN_EOS)
 865             break;              /* end of operands: get out of here */
 866         else if (first && i == ':') {
 867             insn_is_label = true;
 868             goto restart_parse;
 869         }
 870         first = false;
 871         op->type = 0; /* so far, no override */
 872         /* size specifiers */
 873         while (i == TOKEN_SPECIAL || i == TOKEN_SIZE) {
 874             switch (tokval.t_integer) {
 875             case S_BYTE:
 876                 if (!setsize)   /* we want to use only the first */
 877                     op->type |= BITS8;
 878                 setsize = 1;
 879                 break;
 880             case S_WORD:
 881                 if (!setsize)
 882                     op->type |= BITS16;
 883                 setsize = 1;
 884                 break;
 885             case S_DWORD:
 886             case S_LONG:
 887                 if (!setsize)
 888                     op->type |= BITS32;
 889                 setsize = 1;
 890                 break;
 891             case S_QWORD:
 892                 if (!setsize)
 893                     op->type |= BITS64;
 894                 setsize = 1;
 895                 break;
 896             case S_TWORD:
 897                 if (!setsize)
 898                     op->type |= BITS80;
 899                 setsize = 1;
 900                 break;
 901             case S_OWORD:
 902                 if (!setsize)
 903                     op->type |= BITS128;
 904                 setsize = 1;
 905                 break;
 906             case S_YWORD:
 907                 if (!setsize)
 908                     op->type |= BITS256;
 909                 setsize = 1;
 910                 break;
 911             case S_ZWORD:
 912                 if (!setsize)
 913                     op->type |= BITS512;
 914                 setsize = 1;
 915                 break;
 916             case S_TO:
 917                 op->type |= TO;
 918                 break;
 919             case S_STRICT:
 920                 op->type |= STRICT;
 921                 break;
 922             case S_FAR:
 923                 op->type |= FAR;
 924                 break;
 925             case S_NEAR:
 926                 op->type |= NEAR;
 927                 break;
 928             case S_SHORT:
 929                 op->type |= SHORT;
 930                 break;
 931             default:
 932                 nasm_nonfatal("invalid operand size specification");
 933             }
 934             i = stdscan(NULL, &tokval);
 935         }
 936
 937         if (i == '[' || i == TOKEN_MASM_PTR || i == '&') {
 938             /* memory reference */
 939             mref = true;
 940             bracket += (i == '[');
 941             i = stdscan(NULL, &tokval);
 942         }
 943
 944     mref_more:
 945         if (mref) {
 946             bool done = false;
 947             bool nofw = false;
 948
 949             while (!done) {
 950                 switch (i) {
 951                 case TOKEN_SPECIAL:
 952                 case TOKEN_SIZE:
 953                 case TOKEN_PREFIX:
 954                     process_size_override(result, op);
 955                     break;
 956
 957                 case '[':
 958                     bracket++;
 959                     break;
 960
 961                 case ',':
 962                     tokval.t_type = TOKEN_NUM;
 963                     tokval.t_integer = 0;
 964                     stdscan_set(stdscan_get() - 1);     /* rewind the comma */
 965                     done = nofw = true;
 966                     break;
 967
 968                 case TOKEN_MASM_FLAT:
 969                     i = stdscan(NULL, &tokval);
 970                     if (i != ':') {
 971                         nasm_nonfatal("unknown use of FLAT in MASM emulation");
 972                         nofw = true;
 973                     }
 974                     done = true;
 975                     break;
 976
 977                 default:
 978                     done = nofw = true;
 979                     break;
 980                 }
 981
 982                 if (!nofw)
 983                     i = stdscan(NULL, &tokval);
 984             }
 985         }
 986
 987         value = evaluate(stdscan, NULL, &tokval,
 988                          &op->opflags, critical, &hints);
 989         i = tokval.t_type;
 990         if (op->opflags & OPFLAG_FORWARD) {
 991             result->forw_ref = true;
 992         }
 993         if (!value)                  /* Error in evaluator */
 994             goto fail;
 995
 996         if (i == '[' && !bracket) {
 997             /* displacement[regs] syntax */
 998             mref = true;
 999             parse_mref(op, value); /* Process what we have so far */
1000             goto mref_more;
1001         }
1002
1003         if (i == ':' && (mref || !far_jmp_ok)) {
1004             /* segment override? */
1005             mref = true;
1006
1007             /*
1008              * Process the segment override.
1009              */
1010             if (value[1].type   != 0    ||
1011                 value->value    != 1    ||
1012                 !IS_SREG(value->type))
1013                 nasm_nonfatal("invalid segment override");
1014             else if (result->prefixes[PPS_SEG])
1015                 nasm_nonfatal("instruction has conflicting segment overrides");
1016             else {
1017                 result->prefixes[PPS_SEG] = value->type;
1018                 if (IS_FSGS(value->type))
1019                     op->eaflags |= EAF_FSGS;
1020             }
1021
1022             i = stdscan(NULL, &tokval); /* then skip the colon */
1023             goto mref_more;
1024         }
1025
1026         mib = false;
1027         if (mref && bracket && i == ',') {
1028             /* [seg:base+offset,index*scale] syntax (mib) */
1029             operand o2;         /* Index operand */
1030
1031             if (parse_mref(op, value))
1032                 goto fail;
1033
1034             i = stdscan(NULL, &tokval); /* Eat comma */
1035             value = evaluate(stdscan, NULL, &tokval, &op->opflags,
1036                              critical, &hints);
1037             i = tokval.t_type;
1038             if (!value)
1039                 goto fail;
1040
1041             init_operand(&o2);
1042             if (parse_mref(&o2, value))
1043                 goto fail;
1044
1045             if (o2.basereg != -1 && o2.indexreg == -1) {
1046                 o2.indexreg = o2.basereg;
1047                 o2.scale = 1;
1048                 o2.basereg = -1;
1049             }
1050
1051             if (op->indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
1052                 o2.segment != NO_SEG || o2.wrt != NO_SEG) {
1053                 nasm_nonfatal("invalid mib expression");
1054                 goto fail;
1055             }
1056
1057             op->indexreg = o2.indexreg;
1058             op->scale = o2.scale;
1059
1060             if (op->basereg != -1) {
1061                 op->hintbase = op->basereg;
1062                 op->hinttype = EAH_MAKEBASE;
1063             } else if (op->indexreg != -1) {
1064                 op->hintbase = op->indexreg;
1065                 op->hinttype = EAH_NOTBASE;
1066             } else {
1067                 op->hintbase = -1;
1068                 op->hinttype = EAH_NOHINT;
1069             }
1070
1071             mib = true;
1072         }
1073
1074         recover = false;
1075         if (mref) {
1076             if (bracket == 1) {
1077                 if (i == ']') {
1078                     bracket--;
1079                     i = stdscan(NULL, &tokval);
1080                 } else {
1081                     nasm_nonfatal("expecting ] at end of memory operand");
1082                     recover = true;
1083                 }
1084             } else if (bracket == 0) {
1085                 /* Do nothing */
1086             } else if (bracket > 0) {
1087                 nasm_nonfatal("excess brackets in memory operand");
1088                 recover = true;
1089             } else if (bracket < 0) {
1090                 nasm_nonfatal("unmatched ] in memory operand");
1091                 recover = true;
1092             }
1093
1094             if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
1095                 /* parse opmask (and zeroing) after an operand */
1096                 recover = parse_braces(&brace_flags);
1097                 i = tokval.t_type;
1098             }
1099             if (!recover && i != 0 && i != ',') {
1100                 nasm_nonfatal("comma, decorator or end of line expected, got %d", i);
1101                 recover = true;
1102             }
1103         } else {                /* immediate operand */
1104             if (i != 0 && i != ',' && i != ':' &&
1105                 i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
1106                 nasm_nonfatal("comma, colon, decorator or end of "
1107                               "line expected after operand");
1108                 recover = true;
1109             } else if (i == ':') {
1110                 op->type |= COLON;
1111             } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
1112                 /* parse opmask (and zeroing) after an operand */
1113                 recover = parse_braces(&brace_flags);
1114             }
1115         }
1116         if (recover) {
1117             do {                /* error recovery */
1118                 i = stdscan(NULL, &tokval);
1119             } while (i != 0 && i != ',');
1120         }
1121
1122         /*
1123          * now convert the exprs returned from evaluate()
1124          * into operand descriptions...
1125          */
1126         op->decoflags |= brace_flags;
1127
1128         if (mref) {             /* it's a memory reference */
1129             /* A mib reference was fully parsed already */
1130             if (!mib) {
1131                 if (parse_mref(op, value))
1132                     goto fail;
1133                 op->hintbase = hints.base;
1134                 op->hinttype = hints.type;
1135             }
1136             mref_set_optype(op);
1137         } else if ((op->type & FAR) && !far_jmp_ok) {
1138                 nasm_nonfatal("invalid use of FAR operand specifier");
1139                 recover = true;
1140         } else {                /* it's not a memory reference */
1141             if (is_just_unknown(value)) {       /* it's immediate but unknown */
1142                 op->type      |= IMMEDIATE;
1143                 op->opflags   |= OPFLAG_UNKNOWN;
1144                 op->offset    = 0;        /* don't care */
1145                 op->segment   = NO_SEG;   /* don't care again */
1146                 op->wrt       = NO_SEG;   /* still don't care */
1147
1148                 if(optimizing.level >= 0 && !(op->type & STRICT)) {
1149                     /* Be optimistic */
1150                     op->type |=
1151                         UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD;
1152                 }
1153             } else if (is_reloc(value)) {       /* it's immediate */
1154                 uint64_t n = reloc_value(value);
1155
1156                 op->type      |= IMMEDIATE;
1157                 op->offset    = n;
1158                 op->segment   = reloc_seg(value);
1159                 op->wrt       = reloc_wrt(value);
1160                 op->opflags   |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
1161
1162                 if (is_simple(value)) {
1163                     if (n == 1)
1164                         op->type |= UNITY;
1165                     if (optimizing.level >= 0 && !(op->type & STRICT)) {
1166                         if ((uint32_t) (n + 128) <= 255)
1167                             op->type |= SBYTEDWORD;
1168                         if ((uint16_t) (n + 128) <= 255)
1169                             op->type |= SBYTEWORD;
1170                         if (n <= UINT64_C(0xFFFFFFFF))
1171                             op->type |= UDWORD;
1172                         if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF))
1173                             op->type |= SDWORD;
1174                     }
1175                 }
1176             } else if (value->type == EXPR_RDSAE) {
1177                 /*
1178                  * it's not an operand but a rounding or SAE decorator.
1179                  * put the decorator information in the (opflag_t) type field
1180                  * of previous operand.
1181                  */
1182                 opnum--; op--;
1183                 switch (value->value) {
1184                 case BRC_RN:
1185                 case BRC_RU:
1186                 case BRC_RD:
1187                 case BRC_RZ:
1188                 case BRC_SAE:
1189                     op->decoflags |= (value->value == BRC_SAE ? SAE : ER);
1190                     result->evex_rm = value->value;
1191                     break;
1192                 default:
1193                     nasm_nonfatal("invalid decorator");
1194                     break;
1195                 }
1196             } else {            /* it's a register */
1197                 opflags_t rs;
1198                 uint64_t regset_size = 0;
1199
1200                 if (value->type >= EXPR_SIMPLE || value->value != 1) {
1201                     nasm_nonfatal("invalid operand type");
1202                     goto fail;
1203                 }
1204
1205                 /*
1206                  * We do not allow any kind of expression, except for
1207                  * reg+value in which case it is a register set.
1208                  */
1209                 for (i = 1; value[i].type; i++) {
1210                     if (!value[i].value)
1211                         continue;
1212
1213                     switch (value[i].type) {
1214                     case EXPR_SIMPLE:
1215                         if (!regset_size) {
1216                             regset_size = value[i].value + 1;
1217                             break;
1218                         }
1219                         /* fallthrough */
1220                     default:
1221                         nasm_nonfatal("invalid operand type");
1222                         goto fail;
1223                     }
1224                 }
1225
1226                 if ((regset_size & (regset_size - 1)) ||
1227                     regset_size >= (UINT64_C(1) << REGSET_BITS)) {
1228                     nasm_nonfatalf(ERR_PASS2, "invalid register set size");
1229                     regset_size = 0;
1230                 }
1231
1232                 /* clear overrides, except TO which applies to FPU regs */
1233                 if (op->type & ~TO) {
1234                     /*
1235                      * we want to produce a warning iff the specified size
1236                      * is different from the register size
1237                      */
1238                     rs = op->type & SIZE_MASK;
1239                 } else {
1240                     rs = 0;
1241                 }
1242
1243                 /*
1244                  * Make sure we're not out of nasm_reg_flags, still
1245                  * probably this should be fixed when we're defining
1246                  * the label.
1247                  *
1248                  * An easy trigger is
1249                  *
1250                  *      e equ 0x80000000:0
1251                  *      pshufw word e-0
1252                  *
1253                  */
1254                 if (value->type < EXPR_REG_START ||
1255                     value->type > EXPR_REG_END) {
1256                         nasm_nonfatal("invalid operand type");
1257                         goto fail;
1258                 }
1259
1260                 op->type      &= TO;
1261                 op->type      |= REGISTER;
1262                 op->type      |= nasm_reg_flags[value->type];
1263                 op->type      |= (regset_size >> 1) << REGSET_SHIFT;
1264                 op->decoflags |= brace_flags;
1265                 op->basereg   = value->type;
1266
1267                 if (rs) {
1268                     opflags_t opsize = nasm_reg_flags[value->type] & SIZE_MASK;
1269                     if (!opsize) {
1270                         op->type |= rs; /* For non-size-specific registers, permit size override */
1271                     } else if (opsize != rs) {
1272                         /*!
1273                          *!regsize [on] register size specification ignored
1274                          *!
1275                          *!  warns about a register with implicit size (such as \c{EAX}, which is always 32 bits)
1276                          *!  been given an explicit size specification which is inconsistent with the size
1277                          *!  of the named register, e.g. \c{WORD EAX}. \c{DWORD EAX} or \c{WORD AX} are
1278                          *!  permitted, and do not trigger this warning. Some registers which \e{do not} imply
1279                          *!  a specific size, such as \c{K0}, may need this specification unless the instruction
1280                          *!  itself implies the instruction size:
1281                          *!-
1282                          *!  \c      KMOVW K0,[foo]          ; Permitted, KMOVW implies 16 bits
1283                          *!  \c      KMOV  WORD K0,[foo]     ; Permitted, WORD K0 specifies instruction size
1284                          *!  \c      KMOV  K0,WORD [foo]     ; Permitted, WORD [foo] specifies instruction size
1285                          *!  \c      KMOV  K0,[foo]          ; Not permitted, instruction size ambiguous
1286                          */
1287                         nasm_warn(WARN_REGSIZE, "invalid register size specification ignored");
1288                     }
1289                 }
1290             }
1291         }
1292
1293         /* remember the position of operand having broadcasting/ER mode */
1294         if (op->decoflags & (BRDCAST_MASK | ER | SAE))
1295             result->evex_brerop = opnum;
1296     }
1297
1298     result->operands = opnum; /* set operand count */
1299
1300     /* clear remaining operands */
1301     while (opnum < MAX_OPERANDS)
1302         result->oprs[opnum++].type = 0;
1303
1304     return result;
1305
1306 fail:
1307     result->opcode = I_none;
1308     return result;
1309 }
1310
1311 static int end_expression_next(void)
1312 {
1313     struct tokenval tv;
1314     char *p;
1315     int i;
1316
1317     p = stdscan_get();
1318     i = stdscan(NULL, &tv);
1319     stdscan_set(p);
1320
1321     return (i == ',' || i == ';' || i == ')' || !i);
1322 }
1323
1324 static void free_eops(extop *e)
1325 {
1326     extop *next;
1327
1328     while (e) {
1329         next = e->next;
1330         switch (e->type) {
1331         case EOT_EXTOP:
1332             free_eops(e->val.subexpr);
1333             break;
1334
1335         case EOT_DB_STRING_FREE:
1336             nasm_free(e->val.string.data);
1337             break;
1338
1339         default:
1340             break;
1341         }
1342
1343         nasm_free(e);
1344         e = next;
1345     }
1346 }
1347
1348 void cleanup_insn(insn * i)
1349 {
1350     free_eops(i->eops);
1351 }