asm/parser.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * parser.c   source line parser for the Netwide Assembler
  36  */
  37
  38 #include "compiler.h"
  39
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <stddef.h>
  43 #include <string.h>
  44 #include <ctype.h>
  45
  46 #include "nasm.h"
  47 #include "insns.h"
  48 #include "nasmlib.h"
  49 #include "error.h"
  50 #include "stdscan.h"
  51 #include "eval.h"
  52 #include "parser.h"
  53 #include "float.h"
  54 #include "assemble.h"
  55 #include "tables.h"
  56
  57
  58 static int is_comma_next(void);
  59
  60 static struct tokenval tokval;
  61
  62 static int prefix_slot(int prefix)
  63 {
  64     switch (prefix) {
  65     case P_WAIT:
  66         return PPS_WAIT;
  67     case R_CS:
  68     case R_DS:
  69     case R_SS:
  70     case R_ES:
  71     case R_FS:
  72     case R_GS:
  73         return PPS_SEG;
  74     case P_LOCK:
  75         return PPS_LOCK;
  76     case P_REP:
  77     case P_REPE:
  78     case P_REPZ:
  79     case P_REPNE:
  80     case P_REPNZ:
  81     case P_XACQUIRE:
  82     case P_XRELEASE:
  83     case P_BND:
  84     case P_NOBND:
  85         return PPS_REP;
  86     case P_O16:
  87     case P_O32:
  88     case P_O64:
  89     case P_OSP:
  90         return PPS_OSIZE;
  91     case P_A16:
  92     case P_A32:
  93     case P_A64:
  94     case P_ASP:
  95         return PPS_ASIZE;
  96     case P_EVEX:
  97     case P_VEX3:
  98     case P_VEX2:
  99         return PPS_VEX;
 100     default:
 101         nasm_panic("Invalid value %d passed to prefix_slot()", prefix);
 102         return -1;
 103     }
 104 }
 105
 106 static void process_size_override(insn *result, operand *op)
 107 {
 108     if (tasm_compatible_mode) {
 109         switch (tokval.t_integer) {
 110             /* For TASM compatibility a size override inside the
 111              * brackets changes the size of the operand, not the
 112              * address type of the operand as it does in standard
 113              * NASM syntax. Hence:
 114              *
 115              *  mov     eax,[DWORD val]
 116              *
 117              * is valid syntax in TASM compatibility mode. Note that
 118              * you lose the ability to override the default address
 119              * type for the instruction, but we never use anything
 120              * but 32-bit flat model addressing in our code.
 121              */
 122         case S_BYTE:
 123             op->type |= BITS8;
 124             break;
 125         case S_WORD:
 126             op->type |= BITS16;
 127             break;
 128         case S_DWORD:
 129         case S_LONG:
 130             op->type |= BITS32;
 131             break;
 132         case S_QWORD:
 133             op->type |= BITS64;
 134             break;
 135         case S_TWORD:
 136             op->type |= BITS80;
 137             break;
 138         case S_OWORD:
 139             op->type |= BITS128;
 140             break;
 141         default:
 142             nasm_error(ERR_NONFATAL,
 143                        "invalid operand size specification");
 144             break;
 145         }
 146     } else {
 147         /* Standard NASM compatible syntax */
 148         switch (tokval.t_integer) {
 149         case S_NOSPLIT:
 150             op->eaflags |= EAF_TIMESTWO;
 151             break;
 152         case S_REL:
 153             op->eaflags |= EAF_REL;
 154             break;
 155         case S_ABS:
 156             op->eaflags |= EAF_ABS;
 157             break;
 158         case S_BYTE:
 159             op->disp_size = 8;
 160             op->eaflags |= EAF_BYTEOFFS;
 161             break;
 162         case P_A16:
 163         case P_A32:
 164         case P_A64:
 165             if (result->prefixes[PPS_ASIZE] &&
 166                 result->prefixes[PPS_ASIZE] != tokval.t_integer)
 167                 nasm_error(ERR_NONFATAL,
 168                            "conflicting address size specifications");
 169             else
 170                 result->prefixes[PPS_ASIZE] = tokval.t_integer;
 171             break;
 172         case S_WORD:
 173             op->disp_size = 16;
 174             op->eaflags |= EAF_WORDOFFS;
 175             break;
 176         case S_DWORD:
 177         case S_LONG:
 178             op->disp_size = 32;
 179             op->eaflags |= EAF_WORDOFFS;
 180             break;
 181         case S_QWORD:
 182             op->disp_size = 64;
 183             op->eaflags |= EAF_WORDOFFS;
 184             break;
 185         default:
 186             nasm_error(ERR_NONFATAL, "invalid size specification in"
 187                        " effective address");
 188             break;
 189         }
 190     }
 191 }
 192
 193 /*
 194  * Brace decorators are are parsed here.  opmask and zeroing
 195  * decorators can be placed in any order.  e.g. zmm1 {k2}{z} or zmm2
 196  * {z}{k3} decorator(s) are placed at the end of an operand.
 197  */
 198 static bool parse_braces(decoflags_t *decoflags)
 199 {
 200     int i, j;
 201
 202     i = tokval.t_type;
 203
 204     while (true) {
 205         switch (i) {
 206         case TOKEN_OPMASK:
 207             if (*decoflags & OPMASK_MASK) {
 208                 nasm_error(ERR_NONFATAL,
 209                            "opmask k%"PRIu64" is already set",
 210                            *decoflags & OPMASK_MASK);
 211                 *decoflags &= ~OPMASK_MASK;
 212             }
 213             *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
 214             break;
 215         case TOKEN_DECORATOR:
 216             j = tokval.t_integer;
 217             switch (j) {
 218             case BRC_Z:
 219                 *decoflags |= Z_MASK;
 220                 break;
 221             case BRC_1TO2:
 222             case BRC_1TO4:
 223             case BRC_1TO8:
 224             case BRC_1TO16:
 225                 *decoflags |= BRDCAST_MASK | VAL_BRNUM(j - BRC_1TO2);
 226                 break;
 227             default:
 228                 nasm_error(ERR_NONFATAL,
 229                            "{%s} is not an expected decorator",
 230                            tokval.t_charptr);
 231                 break;
 232             }
 233             break;
 234         case ',':
 235         case TOKEN_EOS:
 236             return false;
 237         default:
 238             nasm_error(ERR_NONFATAL,
 239                        "only a series of valid decorators expected");
 240             return true;
 241         }
 242         i = stdscan(NULL, &tokval);
 243     }
 244 }
 245
 246 static int parse_mref(operand *op, const expr *e)
 247 {
 248     int b, i, s;        /* basereg, indexreg, scale */
 249     int64_t o;          /* offset */
 250
 251     b = i = -1;
 252     o = s = 0;
 253     op->segment = op->wrt = NO_SEG;
 254
 255     if (e->type && e->type <= EXPR_REG_END) {   /* this bit's a register */
 256         bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
 257
 258         if (is_gpr && e->value == 1)
 259             b = e->type;        /* It can be basereg */
 260         else                    /* No, it has to be indexreg */
 261             i = e->type, s = e->value;
 262         e++;
 263     }
 264     if (e->type && e->type <= EXPR_REG_END) {   /* it's a 2nd register */
 265         bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
 266
 267         if (b != -1)    /* If the first was the base, ... */
 268             i = e->type, s = e->value;  /* second has to be indexreg */
 269
 270         else if (!is_gpr || e->value != 1) {
 271             /* If both want to be index */
 272             nasm_error(ERR_NONFATAL,
 273                        "invalid effective address: two index registers");
 274             return -1;
 275         } else
 276             b = e->type;
 277         e++;
 278     }
 279
 280     if (e->type) {                     /* is there an offset? */
 281         if (e->type <= EXPR_REG_END) {  /* in fact, is there an error? */
 282             nasm_error(ERR_NONFATAL,
 283                        "invalid effective address: impossible register");
 284             return -1;
 285         } else {
 286             if (e->type == EXPR_UNKNOWN) {
 287                 op->opflags |= OPFLAG_UNKNOWN;
 288                 o = 0;  /* doesn't matter what */
 289                 while (e->type)
 290                     e++;        /* go to the end of the line */
 291             } else {
 292                 if (e->type == EXPR_SIMPLE) {
 293                     o = e->value;
 294                     e++;
 295                 }
 296                 if (e->type == EXPR_WRT) {
 297                     op->wrt = e->value;
 298                     e++;
 299                 }
 300                 /*
 301                  * Look for a segment base type.
 302                  */
 303                 for (; e->type; e++) {
 304                     if (!e->value)
 305                         continue;
 306
 307                     if (e->type <= EXPR_REG_END) {
 308                         nasm_error(ERR_NONFATAL,
 309                                    "invalid effective address: too many registers");
 310                         return -1;
 311                     } else if (e->type < EXPR_SEGBASE) {
 312                         nasm_error(ERR_NONFATAL,
 313                                    "invalid effective address: bad subexpression type");
 314                         return -1;
 315                     } else if (e->value == 1) {
 316                         if (op->segment != NO_SEG) {
 317                             nasm_error(ERR_NONFATAL,
 318                                        "invalid effective address: multiple base segments");
 319                             return -1;
 320                         }
 321                         op->segment = e->type - EXPR_SEGBASE;
 322                     } else if (e->value == -1 &&
 323                                e->type == location.segment + EXPR_SEGBASE &&
 324                                !(op->opflags & OPFLAG_RELATIVE)) {
 325                         op->opflags |= OPFLAG_RELATIVE;
 326                     } else {
 327                         nasm_error(ERR_NONFATAL,
 328                                    "invalid effective address: impossible segment base multiplier");
 329                         return -1;
 330                     }
 331                 }
 332             }
 333         }
 334     }
 335
 336     nasm_assert(!e->type);      /* We should be at the end */
 337
 338     op->basereg = b;
 339     op->indexreg = i;
 340     op->scale = s;
 341     op->offset = o;
 342     return 0;
 343 }
 344
 345 static void mref_set_optype(operand *op)
 346 {
 347     int b = op->basereg;
 348     int i = op->indexreg;
 349     int s = op->scale;
 350
 351     /* It is memory, but it can match any r/m operand */
 352     op->type |= MEMORY_ANY;
 353
 354     if (b == -1 && (i == -1 || s == 0)) {
 355         int is_rel = globalbits == 64 &&
 356             !(op->eaflags & EAF_ABS) &&
 357             ((globalrel &&
 358               !(op->eaflags & EAF_FSGS)) ||
 359              (op->eaflags & EAF_REL));
 360
 361         op->type |= is_rel ? IP_REL : MEM_OFFS;
 362     }
 363
 364     if (i != -1) {
 365         opflags_t iclass = nasm_reg_flags[i];
 366
 367         if (is_class(XMMREG,iclass))
 368             op->type |= XMEM;
 369         else if (is_class(YMMREG,iclass))
 370             op->type |= YMEM;
 371         else if (is_class(ZMMREG,iclass))
 372             op->type |= ZMEM;
 373     }
 374 }
 375
 376 /*
 377  * Convert an expression vector returned from evaluate() into an
 378  * extop structure.  Return zero on success.
 379  */
 380 static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
 381 {
 382     eop->type = EOT_DB_NUMBER;
 383     eop->offset = 0;
 384     eop->segment = eop->wrt = NO_SEG;
 385     eop->relative = false;
 386
 387     for (; vect->type; vect++) {
 388         if (!vect->value)       /* zero term, safe to ignore */
 389             continue;
 390
 391         if (vect->type <= EXPR_REG_END) /* false if a register is present */
 392             return -1;
 393
 394         if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */
 395             return 0;
 396
 397         if (vect->type == EXPR_SIMPLE) {
 398             /* Simple number expression */
 399             eop->offset += vect->value;
 400             continue;
 401         }
 402         if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) {
 403             /* WRT term */
 404             eop->wrt = vect->value;
 405             continue;
 406         }
 407
 408         if (!eop->relative &&
 409             vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
 410             /* Expression of the form: foo - $ */
 411             eop->relative = true;
 412             continue;
 413         }
 414
 415         if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE &&
 416             vect->value == 1) {
 417             eop->segment = vect->type - EXPR_SEGBASE;
 418             continue;
 419         }
 420
 421         /* Otherwise, badness */
 422         return -1;
 423     }
 424
 425     /* We got to the end and it was all okay */
 426     return 0;
 427 }
 428
 429 insn *parse_line(int pass, char *buffer, insn *result)
 430 {
 431     bool insn_is_label = false;
 432     struct eval_hints hints;
 433     int opnum;
 434     int critical;
 435     bool first;
 436     bool recover;
 437     int i;
 438
 439     nasm_static_assert(P_none == 0);
 440
 441 restart_parse:
 442     first               = true;
 443     result->forw_ref    = false;
 444
 445     stdscan_reset();
 446     stdscan_set(buffer);
 447     i = stdscan(NULL, &tokval);
 448
 449     memset(result->prefixes, P_none, sizeof(result->prefixes));
 450     result->times       = 1;    /* No TIMES either yet */
 451     result->label       = NULL; /* Assume no label */
 452     result->eops        = NULL; /* must do this, whatever happens */
 453     result->operands    = 0;    /* must initialize this */
 454     result->evex_rm     = 0;    /* Ensure EVEX rounding mode is reset */
 455     result->evex_brerop = -1;   /* Reset EVEX broadcasting/ER op position */
 456
 457     /* Ignore blank lines */
 458     if (i == TOKEN_EOS)
 459         goto fail;
 460
 461     if (i != TOKEN_ID       &&
 462         i != TOKEN_INSN     &&
 463         i != TOKEN_PREFIX   &&
 464         (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
 465         nasm_error(ERR_NONFATAL,
 466                    "label or instruction expected at start of line");
 467         goto fail;
 468     }
 469
 470     if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
 471         /* there's a label here */
 472         first = false;
 473         result->label = tokval.t_charptr;
 474         i = stdscan(NULL, &tokval);
 475         if (i == ':') {         /* skip over the optional colon */
 476             i = stdscan(NULL, &tokval);
 477         } else if (i == 0) {
 478             nasm_error(ERR_WARNING | ERR_WARN_OL | ERR_PASS1,
 479                   "label alone on a line without a colon might be in error");
 480         }
 481         if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
 482             /*
 483              * FIXME: location.segment could be NO_SEG, in which case
 484              * it is possible we should be passing 'absolute.segment'. Look into this.
 485              * Work out whether that is *really* what we should be doing.
 486              * Generally fix things. I think this is right as it is, but
 487              * am still not certain.
 488              */
 489             define_label(result->label,
 490                          in_absolute ? absolute.segment : location.segment,
 491                          location.offset, true);
 492         }
 493     }
 494
 495     /* Just a label here */
 496     if (i == TOKEN_EOS)
 497         goto fail;
 498
 499     while (i == TOKEN_PREFIX ||
 500            (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
 501         first = false;
 502
 503         /*
 504          * Handle special case: the TIMES prefix.
 505          */
 506         if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
 507             expr *value;
 508
 509             i = stdscan(NULL, &tokval);
 510             value = evaluate(stdscan, NULL, &tokval, NULL, pass0, NULL);
 511             i = tokval.t_type;
 512             if (!value)                  /* Error in evaluator */
 513                 goto fail;
 514             if (!is_simple(value)) {
 515                 nasm_error(ERR_NONFATAL,
 516                       "non-constant argument supplied to TIMES");
 517                 result->times = 1L;
 518             } else {
 519                 result->times = value->value;
 520                 if (value->value < 0) {
 521                     nasm_error(ERR_NONFATAL|ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
 522                     result->times = 0;
 523                 }
 524             }
 525         } else {
 526             int slot = prefix_slot(tokval.t_integer);
 527             if (result->prefixes[slot]) {
 528                if (result->prefixes[slot] == tokval.t_integer)
 529                     nasm_error(ERR_WARNING | ERR_PASS1,
 530                                "instruction has redundant prefixes");
 531                else
 532                     nasm_error(ERR_NONFATAL,
 533                                "instruction has conflicting prefixes");
 534             }
 535             result->prefixes[slot] = tokval.t_integer;
 536             i = stdscan(NULL, &tokval);
 537         }
 538     }
 539
 540     if (i != TOKEN_INSN) {
 541         int j;
 542         enum prefixes pfx;
 543
 544         for (j = 0; j < MAXPREFIX; j++) {
 545             if ((pfx = result->prefixes[j]) != P_none)
 546                 break;
 547         }
 548
 549         if (i == 0 && pfx != P_none) {
 550             /*
 551              * Instruction prefixes are present, but no actual
 552              * instruction. This is allowed: at this point we
 553              * invent a notional instruction of RESB 0.
 554              */
 555             result->opcode          = I_RESB;
 556             result->operands        = 1;
 557             nasm_zero(result->oprs);
 558             result->oprs[0].type    = IMMEDIATE;
 559             result->oprs[0].offset  = 0L;
 560             result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
 561             return result;
 562         } else {
 563             nasm_error(ERR_NONFATAL, "parser: instruction expected");
 564             goto fail;
 565         }
 566     }
 567
 568     result->opcode = tokval.t_integer;
 569     result->condition = tokval.t_inttwo;
 570
 571     /*
 572      * INCBIN cannot be satisfied with incorrectly
 573      * evaluated operands, since the correct values _must_ be known
 574      * on the first pass. Hence, even in pass one, we set the
 575      * `critical' flag on calling evaluate(), so that it will bomb
 576      * out on undefined symbols.
 577      */
 578     if (result->opcode == I_INCBIN) {
 579         critical = (pass0 < 2 ? 1 : 2);
 580
 581     } else
 582         critical = (pass == 2 ? 2 : 0);
 583
 584     if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
 585         extop *eop, **tail = &result->eops, **fixptr;
 586         int oper_num = 0;
 587         int32_t sign;
 588
 589         result->eops_float = false;
 590
 591         /*
 592          * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands.
 593          */
 594         while (1) {
 595             i = stdscan(NULL, &tokval);
 596             if (i == TOKEN_EOS)
 597                 break;
 598             else if (first && i == ':') {
 599                 insn_is_label = true;
 600                 goto restart_parse;
 601             }
 602             first = false;
 603             fixptr = tail;
 604             eop = *tail = nasm_malloc(sizeof(extop));
 605             tail = &eop->next;
 606             eop->next = NULL;
 607             eop->type = EOT_NOTHING;
 608             oper_num++;
 609             sign = +1;
 610
 611             /*
 612              * is_comma_next() here is to distinguish this from
 613              * a string used as part of an expression...
 614              */
 615             if (i == TOKEN_STR && is_comma_next()) {
 616                 eop->type       = EOT_DB_STRING;
 617                 eop->stringval  = tokval.t_charptr;
 618                 eop->stringlen  = tokval.t_inttwo;
 619                 i = stdscan(NULL, &tokval);     /* eat the comma */
 620             } else if (i == TOKEN_STRFUNC) {
 621                 bool parens = false;
 622                 const char *funcname = tokval.t_charptr;
 623                 enum strfunc func = tokval.t_integer;
 624                 i = stdscan(NULL, &tokval);
 625                 if (i == '(') {
 626                     parens = true;
 627                     i = stdscan(NULL, &tokval);
 628                 }
 629                 if (i != TOKEN_STR) {
 630                     nasm_error(ERR_NONFATAL,
 631                                "%s must be followed by a string constant",
 632                                funcname);
 633                         eop->type = EOT_NOTHING;
 634                 } else {
 635                     eop->type = EOT_DB_STRING_FREE;
 636                     eop->stringlen =
 637                         string_transform(tokval.t_charptr, tokval.t_inttwo,
 638                                          &eop->stringval, func);
 639                     if (eop->stringlen == (size_t)-1) {
 640                         nasm_error(ERR_NONFATAL, "invalid string for transform");
 641                         eop->type = EOT_NOTHING;
 642                     }
 643                 }
 644                 if (parens && i && i != ')') {
 645                     i = stdscan(NULL, &tokval);
 646                     if (i != ')') {
 647                         nasm_error(ERR_NONFATAL, "unterminated %s function",
 648                                    funcname);
 649                     }
 650                 }
 651                 if (i && i != ',')
 652                     i = stdscan(NULL, &tokval);
 653             } else if (i == '-' || i == '+') {
 654                 char *save = stdscan_get();
 655                 int token = i;
 656                 sign = (i == '-') ? -1 : 1;
 657                 i = stdscan(NULL, &tokval);
 658                 if (i != TOKEN_FLOAT) {
 659                     stdscan_set(save);
 660                     i = tokval.t_type = token;
 661                     goto is_expression;
 662                 } else {
 663                     goto is_float;
 664                 }
 665             } else if (i == TOKEN_FLOAT) {
 666 is_float:
 667                 eop->type = EOT_DB_STRING;
 668                 result->eops_float = true;
 669
 670                 eop->stringlen = db_bytes(result->opcode);
 671                 if (eop->stringlen > 16) {
 672                     nasm_error(ERR_NONFATAL, "floating-point constant"
 673                                " encountered in DY or DZ instruction");
 674                     eop->stringlen = 0;
 675                 } else if (eop->stringlen < 1) {
 676                     nasm_error(ERR_NONFATAL, "floating-point constant"
 677                                " encountered in unknown instruction");
 678                     /*
 679                      * fix suggested by Pedro Gimeno... original line was:
 680                      * eop->type = EOT_NOTHING;
 681                      */
 682                     eop->stringlen = 0;
 683                 }
 684
 685                 eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
 686                 tail = &eop->next;
 687                 *fixptr = eop;
 688                 eop->stringval = (char *)eop + sizeof(extop);
 689                 if (!eop->stringlen ||
 690                     !float_const(tokval.t_charptr, sign,
 691                                  (uint8_t *)eop->stringval, eop->stringlen))
 692                     eop->type = EOT_NOTHING;
 693                 i = stdscan(NULL, &tokval); /* eat the comma */
 694             } else {
 695                 /* anything else, assume it is an expression */
 696                 expr *value;
 697
 698 is_expression:
 699                 value = evaluate(stdscan, NULL, &tokval, NULL,
 700                                  critical, NULL);
 701                 i = tokval.t_type;
 702                 if (!value)                  /* Error in evaluator */
 703                     goto fail;
 704                 if (value_to_extop(value, eop, location.segment)) {
 705                     nasm_error(ERR_NONFATAL,
 706                                "operand %d: expression is not simple or relocatable",
 707                                oper_num);
 708                 }
 709             }
 710
 711             /*
 712              * We're about to call stdscan(), which will eat the
 713              * comma that we're currently sitting on between
 714              * arguments. However, we'd better check first that it
 715              * _is_ a comma.
 716              */
 717             if (i == TOKEN_EOS) /* also could be EOL */
 718                 break;
 719             if (i != ',') {
 720                 nasm_error(ERR_NONFATAL, "comma expected after operand %d",
 721                            oper_num);
 722                 goto fail;
 723             }
 724         }
 725
 726         if (result->opcode == I_INCBIN) {
 727             /*
 728              * Correct syntax for INCBIN is that there should be
 729              * one string operand, followed by one or two numeric
 730              * operands.
 731              */
 732             if (!result->eops || result->eops->type != EOT_DB_STRING)
 733                 nasm_error(ERR_NONFATAL, "`incbin' expects a file name");
 734             else if (result->eops->next &&
 735                      result->eops->next->type != EOT_DB_NUMBER)
 736                 nasm_error(ERR_NONFATAL, "`incbin': second parameter is"
 737                            " non-numeric");
 738             else if (result->eops->next && result->eops->next->next &&
 739                      result->eops->next->next->type != EOT_DB_NUMBER)
 740                 nasm_error(ERR_NONFATAL, "`incbin': third parameter is"
 741                            " non-numeric");
 742             else if (result->eops->next && result->eops->next->next &&
 743                      result->eops->next->next->next)
 744                 nasm_error(ERR_NONFATAL,
 745                            "`incbin': more than three parameters");
 746             else
 747                 return result;
 748             /*
 749              * If we reach here, one of the above errors happened.
 750              * Throw the instruction away.
 751              */
 752             goto fail;
 753         } else /* DB ... */ if (oper_num == 0)
 754             nasm_error(ERR_WARNING | ERR_PASS1,
 755                   "no operand for data declaration");
 756         else
 757             result->operands = oper_num;
 758
 759         return result;
 760     }
 761
 762     /*
 763      * Now we begin to parse the operands. There may be up to four
 764      * of these, separated by commas, and terminated by a zero token.
 765      */
 766
 767     for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
 768         operand *op = &result->oprs[opnum];
 769         expr *value;            /* used most of the time */
 770         bool mref;              /* is this going to be a memory ref? */
 771         bool bracket;           /* is it a [] mref, or a & mref? */
 772         bool mib;               /* compound (mib) mref? */
 773         int setsize = 0;
 774         decoflags_t brace_flags = 0;    /* flags for decorators in braces */
 775
 776         op->disp_size = 0;    /* have to zero this whatever */
 777         op->eaflags   = 0;    /* and this */
 778         op->opflags   = 0;
 779         op->decoflags = 0;
 780
 781         i = stdscan(NULL, &tokval);
 782         if (i == TOKEN_EOS)
 783             break;              /* end of operands: get out of here */
 784         else if (first && i == ':') {
 785             insn_is_label = true;
 786             goto restart_parse;
 787         }
 788         first = false;
 789         op->type = 0; /* so far, no override */
 790         while (i == TOKEN_SPECIAL) {    /* size specifiers */
 791             switch (tokval.t_integer) {
 792             case S_BYTE:
 793                 if (!setsize)   /* we want to use only the first */
 794                     op->type |= BITS8;
 795                 setsize = 1;
 796                 break;
 797             case S_WORD:
 798                 if (!setsize)
 799                     op->type |= BITS16;
 800                 setsize = 1;
 801                 break;
 802             case S_DWORD:
 803             case S_LONG:
 804                 if (!setsize)
 805                     op->type |= BITS32;
 806                 setsize = 1;
 807                 break;
 808             case S_QWORD:
 809                 if (!setsize)
 810                     op->type |= BITS64;
 811                 setsize = 1;
 812                 break;
 813             case S_TWORD:
 814                 if (!setsize)
 815                     op->type |= BITS80;
 816                 setsize = 1;
 817                 break;
 818             case S_OWORD:
 819                 if (!setsize)
 820                     op->type |= BITS128;
 821                 setsize = 1;
 822                 break;
 823             case S_YWORD:
 824                 if (!setsize)
 825                     op->type |= BITS256;
 826                 setsize = 1;
 827                 break;
 828             case S_ZWORD:
 829                 if (!setsize)
 830                     op->type |= BITS512;
 831                 setsize = 1;
 832                 break;
 833             case S_TO:
 834                 op->type |= TO;
 835                 break;
 836             case S_STRICT:
 837                 op->type |= STRICT;
 838                 break;
 839             case S_FAR:
 840                 op->type |= FAR;
 841                 break;
 842             case S_NEAR:
 843                 op->type |= NEAR;
 844                 break;
 845             case S_SHORT:
 846                 op->type |= SHORT;
 847                 break;
 848             default:
 849                 nasm_error(ERR_NONFATAL, "invalid operand size specification");
 850             }
 851             i = stdscan(NULL, &tokval);
 852         }
 853
 854         if (i == '[' || i == '&') {     /* memory reference */
 855             mref = true;
 856             bracket = (i == '[');
 857             i = stdscan(NULL, &tokval); /* then skip the colon */
 858             while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
 859                 process_size_override(result, op);
 860                 i = stdscan(NULL, &tokval);
 861             }
 862             /* when a comma follows an opening bracket - [ , eax*4] */
 863             if (i == ',') {
 864                 /* treat as if there is a zero displacement virtually */
 865                 tokval.t_type = TOKEN_NUM;
 866                 tokval.t_integer = 0;
 867                 stdscan_set(stdscan_get() - 1);     /* rewind the comma */
 868             }
 869         } else {                /* immediate operand, or register */
 870             mref = false;
 871             bracket = false;    /* placate optimisers */
 872         }
 873
 874         if ((op->type & FAR) && !mref &&
 875             result->opcode != I_JMP && result->opcode != I_CALL) {
 876             nasm_error(ERR_NONFATAL, "invalid use of FAR operand specifier");
 877         }
 878
 879         value = evaluate(stdscan, NULL, &tokval,
 880                          &op->opflags, critical, &hints);
 881         i = tokval.t_type;
 882         if (op->opflags & OPFLAG_FORWARD) {
 883             result->forw_ref = true;
 884         }
 885         if (!value)                  /* Error in evaluator */
 886             goto fail;
 887         if (i == ':' && mref) { /* it was seg:offset */
 888             /*
 889              * Process the segment override.
 890              */
 891             if (value[1].type   != 0    ||
 892                 value->value    != 1    ||
 893                 !IS_SREG(value->type))
 894                 nasm_error(ERR_NONFATAL, "invalid segment override");
 895             else if (result->prefixes[PPS_SEG])
 896                 nasm_error(ERR_NONFATAL,
 897                       "instruction has conflicting segment overrides");
 898             else {
 899                 result->prefixes[PPS_SEG] = value->type;
 900                 if (IS_FSGS(value->type))
 901                     op->eaflags |= EAF_FSGS;
 902             }
 903
 904             i = stdscan(NULL, &tokval); /* then skip the colon */
 905             while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
 906                 process_size_override(result, op);
 907                 i = stdscan(NULL, &tokval);
 908             }
 909             value = evaluate(stdscan, NULL, &tokval,
 910                              &op->opflags, critical, &hints);
 911             i = tokval.t_type;
 912             if (op->opflags & OPFLAG_FORWARD) {
 913                 result->forw_ref = true;
 914             }
 915             /* and get the offset */
 916             if (!value)                  /* Error in evaluator */
 917                 goto fail;
 918         }
 919
 920         mib = false;
 921         if (mref && bracket && i == ',') {
 922             /* [seg:base+offset,index*scale] syntax (mib) */
 923
 924             operand o1, o2;     /* Partial operands */
 925
 926             if (parse_mref(&o1, value))
 927                 goto fail;
 928
 929             i = stdscan(NULL, &tokval); /* Eat comma */
 930             value = evaluate(stdscan, NULL, &tokval, &op->opflags,
 931                              critical, &hints);
 932             i = tokval.t_type;
 933             if (!value)
 934                 goto fail;
 935
 936             if (parse_mref(&o2, value))
 937                 goto fail;
 938
 939             if (o2.basereg != -1 && o2.indexreg == -1) {
 940                 o2.indexreg = o2.basereg;
 941                 o2.scale = 1;
 942                 o2.basereg = -1;
 943             }
 944
 945             if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
 946                 o2.segment != NO_SEG || o2.wrt != NO_SEG) {
 947                 nasm_error(ERR_NONFATAL, "invalid mib expression");
 948                 goto fail;
 949             }
 950
 951             op->basereg = o1.basereg;
 952             op->indexreg = o2.indexreg;
 953             op->scale = o2.scale;
 954             op->offset = o1.offset;
 955             op->segment = o1.segment;
 956             op->wrt = o1.wrt;
 957
 958             if (op->basereg != -1) {
 959                 op->hintbase = op->basereg;
 960                 op->hinttype = EAH_MAKEBASE;
 961             } else if (op->indexreg != -1) {
 962                 op->hintbase = op->indexreg;
 963                 op->hinttype = EAH_NOTBASE;
 964             } else {
 965                 op->hintbase = -1;
 966                 op->hinttype = EAH_NOHINT;
 967             }
 968
 969             mib = true;
 970         }
 971
 972         recover = false;
 973         if (mref && bracket) {  /* find ] at the end */
 974             if (i != ']') {
 975                 nasm_error(ERR_NONFATAL, "parser: expecting ]");
 976                 recover = true;
 977             } else {            /* we got the required ] */
 978                 i = stdscan(NULL, &tokval);
 979                 if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
 980                     /* parse opmask (and zeroing) after an operand */
 981                     recover = parse_braces(&brace_flags);
 982                     i = tokval.t_type;
 983                 }
 984                 if (i != 0 && i != ',') {
 985                     nasm_error(ERR_NONFATAL, "comma or end of line expected");
 986                     recover = true;
 987                 }
 988             }
 989         } else {                /* immediate operand */
 990             if (i != 0 && i != ',' && i != ':' &&
 991                 i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
 992                 nasm_error(ERR_NONFATAL, "comma, colon, decorator or end of "
 993                                          "line expected after operand");
 994                 recover = true;
 995             } else if (i == ':') {
 996                 op->type |= COLON;
 997             } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
 998                 /* parse opmask (and zeroing) after an operand */
 999                 recover = parse_braces(&brace_flags);
1000             }
1001         }
1002         if (recover) {
1003             do {                /* error recovery */
1004                 i = stdscan(NULL, &tokval);
1005             } while (i != 0 && i != ',');
1006         }
1007
1008         /*
1009          * now convert the exprs returned from evaluate()
1010          * into operand descriptions...
1011          */
1012         op->decoflags |= brace_flags;
1013
1014         if (mref) {             /* it's a memory reference */
1015             /* A mib reference was fully parsed already */
1016             if (!mib) {
1017                 if (parse_mref(op, value))
1018                     goto fail;
1019                 op->hintbase = hints.base;
1020                 op->hinttype = hints.type;
1021             }
1022             mref_set_optype(op);
1023         } else {                /* it's not a memory reference */
1024             if (is_just_unknown(value)) {       /* it's immediate but unknown */
1025                 op->type      |= IMMEDIATE;
1026                 op->opflags   |= OPFLAG_UNKNOWN;
1027                 op->offset    = 0;        /* don't care */
1028                 op->segment   = NO_SEG;   /* don't care again */
1029                 op->wrt       = NO_SEG;   /* still don't care */
1030
1031                 if(optimizing.level >= 0 && !(op->type & STRICT)) {
1032                     /* Be optimistic */
1033                     op->type |=
1034                         UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD;
1035                 }
1036             } else if (is_reloc(value)) {       /* it's immediate */
1037                 uint64_t n = reloc_value(value);
1038
1039                 op->type      |= IMMEDIATE;
1040                 op->offset    = n;
1041                 op->segment   = reloc_seg(value);
1042                 op->wrt       = reloc_wrt(value);
1043                 op->opflags   |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
1044
1045                 if (is_simple(value)) {
1046                     if (n == 1)
1047                         op->type |= UNITY;
1048                     if (optimizing.level >= 0 && !(op->type & STRICT)) {
1049                         if ((uint32_t) (n + 128) <= 255)
1050                             op->type |= SBYTEDWORD;
1051                         if ((uint16_t) (n + 128) <= 255)
1052                             op->type |= SBYTEWORD;
1053                         if (n <= UINT64_C(0xFFFFFFFF))
1054                             op->type |= UDWORD;
1055                         if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF))
1056                             op->type |= SDWORD;
1057                     }
1058                 }
1059             } else if (value->type == EXPR_RDSAE) {
1060                 /*
1061                  * it's not an operand but a rounding or SAE decorator.
1062                  * put the decorator information in the (opflag_t) type field
1063                  * of previous operand.
1064                  */
1065                 opnum--; op--;
1066                 switch (value->value) {
1067                 case BRC_RN:
1068                 case BRC_RU:
1069                 case BRC_RD:
1070                 case BRC_RZ:
1071                 case BRC_SAE:
1072                     op->decoflags |= (value->value == BRC_SAE ? SAE : ER);
1073                     result->evex_rm = value->value;
1074                     break;
1075                 default:
1076                     nasm_error(ERR_NONFATAL, "invalid decorator");
1077                     break;
1078                 }
1079             } else {            /* it's a register */
1080                 opflags_t rs;
1081                 uint64_t regset_size = 0;
1082
1083                 if (value->type >= EXPR_SIMPLE || value->value != 1) {
1084                     nasm_error(ERR_NONFATAL, "invalid operand type");
1085                     goto fail;
1086                 }
1087
1088                 /*
1089                  * We do not allow any kind of expression, except for
1090                  * reg+value in which case it is a register set.
1091                  */
1092                 for (i = 1; value[i].type; i++) {
1093                     if (!value[i].value)
1094                         continue;
1095
1096                     switch (value[i].type) {
1097                     case EXPR_SIMPLE:
1098                         if (!regset_size) {
1099                             regset_size = value[i].value + 1;
1100                             break;
1101                         }
1102                         /* fallthrough */
1103                     default:
1104                         nasm_error(ERR_NONFATAL, "invalid operand type");
1105                         goto fail;
1106                     }
1107                 }
1108
1109                 if ((regset_size & (regset_size - 1)) ||
1110                     regset_size >= (UINT64_C(1) << REGSET_BITS)) {
1111                     nasm_error(ERR_NONFATAL | ERR_PASS2,
1112                                "invalid register set size");
1113                     regset_size = 0;
1114                 }
1115
1116                 /* clear overrides, except TO which applies to FPU regs */
1117                 if (op->type & ~TO) {
1118                     /*
1119                      * we want to produce a warning iff the specified size
1120                      * is different from the register size
1121                      */
1122                     rs = op->type & SIZE_MASK;
1123                 } else {
1124                     rs = 0;
1125                 }
1126
1127                 /*
1128                  * Make sure we're not out of nasm_reg_flags, still
1129                  * probably this should be fixed when we're defining
1130                  * the label.
1131                  *
1132                  * An easy trigger is
1133                  *
1134                  *      e equ 0x80000000:0
1135                  *      pshufw word e-0
1136                  *
1137                  */
1138                 if (value->type < EXPR_REG_START ||
1139                     value->type > EXPR_REG_END) {
1140                         nasm_error(ERR_NONFATAL, "invalid operand type");
1141                         goto fail;
1142                 }
1143
1144                 op->type      &= TO;
1145                 op->type      |= REGISTER;
1146                 op->type      |= nasm_reg_flags[value->type];
1147                 op->type      |= (regset_size >> 1) << REGSET_SHIFT;
1148                 op->decoflags |= brace_flags;
1149                 op->basereg   = value->type;
1150
1151                 if (rs && (op->type & SIZE_MASK) != rs)
1152                     nasm_error(ERR_WARNING | ERR_PASS1,
1153                           "register size specification ignored");
1154             }
1155         }
1156
1157         /* remember the position of operand having broadcasting/ER mode */
1158         if (op->decoflags & (BRDCAST_MASK | ER | SAE))
1159             result->evex_brerop = opnum;
1160     }
1161
1162     result->operands = opnum; /* set operand count */
1163
1164     /* clear remaining operands */
1165     while (opnum < MAX_OPERANDS)
1166         result->oprs[opnum++].type = 0;
1167
1168     /*
1169      * Transform RESW, RESD, RESQ, REST, RESO, RESY, RESZ into RESB.
1170      */
1171     if (opcode_is_resb(result->opcode)) {
1172         result->oprs[0].offset *= resb_bytes(result->opcode);
1173         result->oprs[0].offset *= result->times;
1174         result->times = 1;
1175         result->opcode = I_RESB;
1176     }
1177
1178     return result;
1179
1180 fail:
1181     result->opcode = I_none;
1182     return result;
1183 }
1184
1185 static int is_comma_next(void)
1186 {
1187     struct tokenval tv;
1188     char *p;
1189     int i;
1190
1191     p = stdscan_get();
1192     i = stdscan(NULL, &tv);
1193     stdscan_set(p);
1194
1195     return (i == ',' || i == ';' || !i);
1196 }
1197
1198 void cleanup_insn(insn * i)
1199 {
1200     extop *e;
1201
1202     while ((e = i->eops)) {
1203         i->eops = e->next;
1204         if (e->type == EOT_DB_STRING_FREE)
1205             nasm_free(e->stringval);
1206         nasm_free(e);
1207     }
1208 }