asm/parser.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * parser.c   source line parser for the Netwide Assembler
  36  */
  37
  38 #include "compiler.h"
  39
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <stddef.h>
  43 #include <string.h>
  44 #include <ctype.h>
  45
  46 #include "nasm.h"
  47 #include "insns.h"
  48 #include "nasmlib.h"
  49 #include "error.h"
  50 #include "stdscan.h"
  51 #include "eval.h"
  52 #include "parser.h"
  53 #include "float.h"
  54 #include "assemble.h"
  55 #include "tables.h"
  56
  57
  58 static int is_comma_next(void);
  59
  60 static struct tokenval tokval;
  61
  62 static int prefix_slot(int prefix)
  63 {
  64     switch (prefix) {
  65     case P_WAIT:
  66         return PPS_WAIT;
  67     case R_CS:
  68     case R_DS:
  69     case R_SS:
  70     case R_ES:
  71     case R_FS:
  72     case R_GS:
  73         return PPS_SEG;
  74     case P_LOCK:
  75         return PPS_LOCK;
  76     case P_REP:
  77     case P_REPE:
  78     case P_REPZ:
  79     case P_REPNE:
  80     case P_REPNZ:
  81     case P_XACQUIRE:
  82     case P_XRELEASE:
  83     case P_BND:
  84     case P_NOBND:
  85         return PPS_REP;
  86     case P_O16:
  87     case P_O32:
  88     case P_O64:
  89     case P_OSP:
  90         return PPS_OSIZE;
  91     case P_A16:
  92     case P_A32:
  93     case P_A64:
  94     case P_ASP:
  95         return PPS_ASIZE;
  96     case P_EVEX:
  97     case P_VEX3:
  98     case P_VEX2:
  99         return PPS_VEX;
 100     default:
 101         nasm_panic(0, "Invalid value %d passed to prefix_slot()", prefix);
 102         return -1;
 103     }
 104 }
 105
 106 static void process_size_override(insn *result, operand *op)
 107 {
 108     if (tasm_compatible_mode) {
 109         switch (tokval.t_integer) {
 110             /* For TASM compatibility a size override inside the
 111              * brackets changes the size of the operand, not the
 112              * address type of the operand as it does in standard
 113              * NASM syntax. Hence:
 114              *
 115              *  mov     eax,[DWORD val]
 116              *
 117              * is valid syntax in TASM compatibility mode. Note that
 118              * you lose the ability to override the default address
 119              * type for the instruction, but we never use anything
 120              * but 32-bit flat model addressing in our code.
 121              */
 122         case S_BYTE:
 123             op->type |= BITS8;
 124             break;
 125         case S_WORD:
 126             op->type |= BITS16;
 127             break;
 128         case S_DWORD:
 129         case S_LONG:
 130             op->type |= BITS32;
 131             break;
 132         case S_QWORD:
 133             op->type |= BITS64;
 134             break;
 135         case S_TWORD:
 136             op->type |= BITS80;
 137             break;
 138         case S_OWORD:
 139             op->type |= BITS128;
 140             break;
 141         default:
 142             nasm_error(ERR_NONFATAL,
 143                        "invalid operand size specification");
 144             break;
 145         }
 146     } else {
 147         /* Standard NASM compatible syntax */
 148         switch (tokval.t_integer) {
 149         case S_NOSPLIT:
 150             op->eaflags |= EAF_TIMESTWO;
 151             break;
 152         case S_REL:
 153             op->eaflags |= EAF_REL;
 154             break;
 155         case S_ABS:
 156             op->eaflags |= EAF_ABS;
 157             break;
 158         case S_BYTE:
 159             op->disp_size = 8;
 160             op->eaflags |= EAF_BYTEOFFS;
 161             break;
 162         case P_A16:
 163         case P_A32:
 164         case P_A64:
 165             if (result->prefixes[PPS_ASIZE] &&
 166                 result->prefixes[PPS_ASIZE] != tokval.t_integer)
 167                 nasm_error(ERR_NONFATAL,
 168                            "conflicting address size specifications");
 169             else
 170                 result->prefixes[PPS_ASIZE] = tokval.t_integer;
 171             break;
 172         case S_WORD:
 173             op->disp_size = 16;
 174             op->eaflags |= EAF_WORDOFFS;
 175             break;
 176         case S_DWORD:
 177         case S_LONG:
 178             op->disp_size = 32;
 179             op->eaflags |= EAF_WORDOFFS;
 180             break;
 181         case S_QWORD:
 182             op->disp_size = 64;
 183             op->eaflags |= EAF_WORDOFFS;
 184             break;
 185         default:
 186             nasm_error(ERR_NONFATAL, "invalid size specification in"
 187                        " effective address");
 188             break;
 189         }
 190     }
 191 }
 192
 193 /*
 194  * Brace decorators are are parsed here.  opmask and zeroing
 195  * decorators can be placed in any order.  e.g. zmm1 {k2}{z} or zmm2
 196  * {z}{k3} decorator(s) are placed at the end of an operand.
 197  */
 198 static bool parse_braces(decoflags_t *decoflags)
 199 {
 200     int i, j;
 201
 202     i = tokval.t_type;
 203
 204     while (true) {
 205         switch (i) {
 206         case TOKEN_OPMASK:
 207             if (*decoflags & OPMASK_MASK) {
 208                 nasm_error(ERR_NONFATAL,
 209                            "opmask k%"PRIu64" is already set",
 210                            *decoflags & OPMASK_MASK);
 211                 *decoflags &= ~OPMASK_MASK;
 212             }
 213             *decoflags |= VAL_OPMASK(nasm_regvals[tokval.t_integer]);
 214             break;
 215         case TOKEN_DECORATOR:
 216             j = tokval.t_integer;
 217             switch (j) {
 218             case BRC_Z:
 219                 *decoflags |= Z_MASK;
 220                 break;
 221             case BRC_1TO2:
 222             case BRC_1TO4:
 223             case BRC_1TO8:
 224             case BRC_1TO16:
 225                 *decoflags |= BRDCAST_MASK | VAL_BRNUM(j - BRC_1TO2);
 226                 break;
 227             default:
 228                 nasm_error(ERR_NONFATAL,
 229                            "{%s} is not an expected decorator",
 230                            tokval.t_charptr);
 231                 break;
 232             }
 233             break;
 234         case ',':
 235         case TOKEN_EOS:
 236             return false;
 237         default:
 238             nasm_error(ERR_NONFATAL,
 239                        "only a series of valid decorators expected");
 240             return true;
 241         }
 242         i = stdscan(NULL, &tokval);
 243     }
 244 }
 245
 246 static int parse_mref(operand *op, const expr *e)
 247 {
 248     int b, i, s;        /* basereg, indexreg, scale */
 249     int64_t o;          /* offset */
 250
 251     b = i = -1;
 252     o = s = 0;
 253     op->segment = op->wrt = NO_SEG;
 254
 255     if (e->type && e->type <= EXPR_REG_END) {   /* this bit's a register */
 256         bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
 257
 258         if (is_gpr && e->value == 1)
 259             b = e->type;        /* It can be basereg */
 260         else                    /* No, it has to be indexreg */
 261             i = e->type, s = e->value;
 262         e++;
 263     }
 264     if (e->type && e->type <= EXPR_REG_END) {   /* it's a 2nd register */
 265         bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
 266
 267         if (b != -1)    /* If the first was the base, ... */
 268             i = e->type, s = e->value;  /* second has to be indexreg */
 269
 270         else if (!is_gpr || e->value != 1) {
 271             /* If both want to be index */
 272             nasm_error(ERR_NONFATAL,
 273                        "invalid effective address: two index registers");
 274             return -1;
 275         } else
 276             b = e->type;
 277         e++;
 278     }
 279
 280     if (e->type) {                     /* is there an offset? */
 281         if (e->type <= EXPR_REG_END) {  /* in fact, is there an error? */
 282             nasm_error(ERR_NONFATAL,
 283                        "invalid effective address: impossible register");
 284             return -1;
 285         } else {
 286             if (e->type == EXPR_UNKNOWN) {
 287                 op->opflags |= OPFLAG_UNKNOWN;
 288                 o = 0;  /* doesn't matter what */
 289                 while (e->type)
 290                     e++;        /* go to the end of the line */
 291             } else {
 292                 if (e->type == EXPR_SIMPLE) {
 293                     o = e->value;
 294                     e++;
 295                 }
 296                 if (e->type == EXPR_WRT) {
 297                     op->wrt = e->value;
 298                     e++;
 299                 }
 300                 /*
 301                  * Look for a segment base type.
 302                  */
 303                 for (; e->type; e++) {
 304                     if (!e->value)
 305                         continue;
 306
 307                     if (e->type <= EXPR_REG_END) {
 308                         nasm_error(ERR_NONFATAL,
 309                                    "invalid effective address: too many registers");
 310                         return -1;
 311                     } else if (e->type < EXPR_SEGBASE) {
 312                         nasm_error(ERR_NONFATAL,
 313                                    "invalid effective address: bad subexpression type");
 314                         return -1;
 315                     } else if (e->value == 1) {
 316                         if (op->segment != NO_SEG) {
 317                             nasm_error(ERR_NONFATAL,
 318                                        "invalid effective address: multiple base segments");
 319                             return -1;
 320                         }
 321                         op->segment = e->type - EXPR_SEGBASE;
 322                     } else if (e->value == -1 &&
 323                                e->type == location.segment + EXPR_SEGBASE &&
 324                                !(op->opflags & OPFLAG_RELATIVE)) {
 325                         op->opflags |= OPFLAG_RELATIVE;
 326                     } else {
 327                         nasm_error(ERR_NONFATAL,
 328                                    "invalid effective address: impossible segment base multiplier");
 329                         return -1;
 330                     }
 331                 }
 332             }
 333         }
 334     }
 335
 336     nasm_assert(!e->type);      /* We should be at the end */
 337
 338     op->basereg = b;
 339     op->indexreg = i;
 340     op->scale = s;
 341     op->offset = o;
 342     return 0;
 343 }
 344
 345 static void mref_set_optype(operand *op)
 346 {
 347     int b = op->basereg;
 348     int i = op->indexreg;
 349     int s = op->scale;
 350
 351     /* It is memory, but it can match any r/m operand */
 352     op->type |= MEMORY_ANY;
 353
 354     if (b == -1 && (i == -1 || s == 0)) {
 355         int is_rel = globalbits == 64 &&
 356             !(op->eaflags & EAF_ABS) &&
 357             ((globalrel &&
 358               !(op->eaflags & EAF_FSGS)) ||
 359              (op->eaflags & EAF_REL));
 360
 361         op->type |= is_rel ? IP_REL : MEM_OFFS;
 362     }
 363
 364     if (i != -1) {
 365         opflags_t iclass = nasm_reg_flags[i];
 366
 367         if (is_class(XMMREG,iclass))
 368             op->type |= XMEM;
 369         else if (is_class(YMMREG,iclass))
 370             op->type |= YMEM;
 371         else if (is_class(ZMMREG,iclass))
 372             op->type |= ZMEM;
 373     }
 374 }
 375
 376 /*
 377  * Convert an expression vector returned from evaluate() into an
 378  * extop structure.  Return zero on success.
 379  */
 380 static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
 381 {
 382     eop->type = EOT_DB_NUMBER;
 383     eop->offset = 0;
 384     eop->segment = eop->wrt = NO_SEG;
 385     eop->relative = false;
 386
 387     for (; vect->type; vect++) {
 388         if (!vect->value)       /* zero term, safe to ignore */
 389             continue;
 390
 391         if (vect->type <= EXPR_REG_END) /* false if a register is present */
 392             return -1;
 393
 394         if (vect->type == EXPR_UNKNOWN) /* something we can't resolve yet */
 395             return 0;
 396
 397         if (vect->type == EXPR_SIMPLE) {
 398             /* Simple number expression */
 399             eop->offset += vect->value;
 400             continue;
 401         }
 402         if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) {
 403             /* WRT term */
 404             eop->wrt = vect->value;
 405             continue;
 406         }
 407
 408         if (!eop->relative &&
 409             vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
 410             /* Expression of the form: foo - $ */
 411             eop->relative = true;
 412             continue;
 413         }
 414
 415         if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE &&
 416             vect->value == 1) {
 417             eop->segment = vect->type - EXPR_SEGBASE;
 418             continue;
 419         }
 420
 421         /* Otherwise, badness */
 422         return -1;
 423     }
 424
 425     /* We got to the end and it was all okay */
 426     return 0;
 427 }
 428
 429 insn *parse_line(int pass, char *buffer, insn *result, ldfunc ldef)
 430 {
 431     bool insn_is_label = false;
 432     struct eval_hints hints;
 433     int opnum;
 434     int critical;
 435     bool first;
 436     bool recover;
 437     int i;
 438
 439 restart_parse:
 440     first               = true;
 441     result->forw_ref    = false;
 442
 443     stdscan_reset();
 444     stdscan_set(buffer);
 445     i = stdscan(NULL, &tokval);
 446
 447     nasm_static_assert(P_none == 0);
 448     memset(result->prefixes, P_none, sizeof(result->prefixes));
 449     result->times       = 1;    /* No TIMES either yet */
 450     result->label       = NULL; /* Assume no label */
 451     result->eops        = NULL; /* must do this, whatever happens */
 452     result->operands    = 0;    /* must initialize this */
 453     result->evex_rm     = 0;    /* Ensure EVEX rounding mode is reset */
 454     result->evex_brerop = -1;   /* Reset EVEX broadcasting/ER op position */
 455
 456     /* Ignore blank lines */
 457     if (i == TOKEN_EOS)
 458         goto fail;
 459
 460     if (i != TOKEN_ID       &&
 461         i != TOKEN_INSN     &&
 462         i != TOKEN_PREFIX   &&
 463         (i != TOKEN_REG || !IS_SREG(tokval.t_integer))) {
 464         nasm_error(ERR_NONFATAL,
 465                    "label or instruction expected at start of line");
 466         goto fail;
 467     }
 468
 469     if (i == TOKEN_ID || (insn_is_label && i == TOKEN_INSN)) {
 470         /* there's a label here */
 471         first = false;
 472         result->label = tokval.t_charptr;
 473         i = stdscan(NULL, &tokval);
 474         if (i == ':') {         /* skip over the optional colon */
 475             i = stdscan(NULL, &tokval);
 476         } else if (i == 0) {
 477             nasm_error(ERR_WARNING | ERR_WARN_OL | ERR_PASS1,
 478                   "label alone on a line without a colon might be in error");
 479         }
 480         if (i != TOKEN_INSN || tokval.t_integer != I_EQU) {
 481             /*
 482              * FIXME: location.segment could be NO_SEG, in which case
 483              * it is possible we should be passing 'absolute.segment'. Look into this.
 484              * Work out whether that is *really* what we should be doing.
 485              * Generally fix things. I think this is right as it is, but
 486              * am still not certain.
 487              */
 488             ldef(result->label, in_absolute ? absolute.segment : location.segment,
 489                  location.offset, NULL, true, false);
 490         }
 491     }
 492
 493     /* Just a label here */
 494     if (i == TOKEN_EOS)
 495         goto fail;
 496
 497     while (i == TOKEN_PREFIX ||
 498            (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
 499         first = false;
 500
 501         /*
 502          * Handle special case: the TIMES prefix.
 503          */
 504         if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
 505             expr *value;
 506
 507             i = stdscan(NULL, &tokval);
 508             value = evaluate(stdscan, NULL, &tokval, NULL, pass0, NULL);
 509             i = tokval.t_type;
 510             if (!value)                  /* Error in evaluator */
 511                 goto fail;
 512             if (!is_simple(value)) {
 513                 nasm_error(ERR_NONFATAL,
 514                       "non-constant argument supplied to TIMES");
 515                 result->times = 1L;
 516             } else {
 517                 result->times = value->value;
 518                 if (value->value < 0) {
 519                     nasm_error(ERR_NONFATAL|ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
 520                     result->times = 0;
 521                 }
 522             }
 523         } else {
 524             int slot = prefix_slot(tokval.t_integer);
 525             if (result->prefixes[slot]) {
 526                if (result->prefixes[slot] == tokval.t_integer)
 527                     nasm_error(ERR_WARNING | ERR_PASS1,
 528                                "instruction has redundant prefixes");
 529                else
 530                     nasm_error(ERR_NONFATAL,
 531                                "instruction has conflicting prefixes");
 532             }
 533             result->prefixes[slot] = tokval.t_integer;
 534             i = stdscan(NULL, &tokval);
 535         }
 536     }
 537
 538     if (i != TOKEN_INSN) {
 539         int j;
 540         enum prefixes pfx;
 541
 542         for (j = 0; j < MAXPREFIX; j++) {
 543             if ((pfx = result->prefixes[j]) != P_none)
 544                 break;
 545         }
 546
 547         if (i == 0 && pfx != P_none) {
 548             /*
 549              * Instruction prefixes are present, but no actual
 550              * instruction. This is allowed: at this point we
 551              * invent a notional instruction of RESB 0.
 552              */
 553             result->opcode          = I_RESB;
 554             result->operands        = 1;
 555             nasm_zero(result->oprs);
 556             result->oprs[0].type    = IMMEDIATE;
 557             result->oprs[0].offset  = 0L;
 558             result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
 559             return result;
 560         } else {
 561             nasm_error(ERR_NONFATAL, "parser: instruction expected");
 562             goto fail;
 563         }
 564     }
 565
 566     result->opcode = tokval.t_integer;
 567     result->condition = tokval.t_inttwo;
 568
 569     /*
 570      * INCBIN cannot be satisfied with incorrectly
 571      * evaluated operands, since the correct values _must_ be known
 572      * on the first pass. Hence, even in pass one, we set the
 573      * `critical' flag on calling evaluate(), so that it will bomb
 574      * out on undefined symbols.
 575      */
 576     if (result->opcode == I_INCBIN) {
 577         critical = (pass0 < 2 ? 1 : 2);
 578
 579     } else
 580         critical = (pass == 2 ? 2 : 0);
 581
 582     if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
 583         extop *eop, **tail = &result->eops, **fixptr;
 584         int oper_num = 0;
 585         int32_t sign;
 586
 587         result->eops_float = false;
 588
 589         /*
 590          * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands.
 591          */
 592         while (1) {
 593             i = stdscan(NULL, &tokval);
 594             if (i == TOKEN_EOS)
 595                 break;
 596             else if (first && i == ':') {
 597                 insn_is_label = true;
 598                 goto restart_parse;
 599             }
 600             first = false;
 601             fixptr = tail;
 602             eop = *tail = nasm_malloc(sizeof(extop));
 603             tail = &eop->next;
 604             eop->next = NULL;
 605             eop->type = EOT_NOTHING;
 606             oper_num++;
 607             sign = +1;
 608
 609             /*
 610              * is_comma_next() here is to distinguish this from
 611              * a string used as part of an expression...
 612              */
 613             if (i == TOKEN_STR && is_comma_next()) {
 614                 eop->type       = EOT_DB_STRING;
 615                 eop->stringval  = tokval.t_charptr;
 616                 eop->stringlen  = tokval.t_inttwo;
 617                 i = stdscan(NULL, &tokval);     /* eat the comma */
 618             } else if (i == TOKEN_STRFUNC) {
 619                 bool parens = false;
 620                 const char *funcname = tokval.t_charptr;
 621                 enum strfunc func = tokval.t_integer;
 622                 i = stdscan(NULL, &tokval);
 623                 if (i == '(') {
 624                     parens = true;
 625                     i = stdscan(NULL, &tokval);
 626                 }
 627                 if (i != TOKEN_STR) {
 628                     nasm_error(ERR_NONFATAL,
 629                                "%s must be followed by a string constant",
 630                                funcname);
 631                         eop->type = EOT_NOTHING;
 632                 } else {
 633                     eop->type = EOT_DB_STRING_FREE;
 634                     eop->stringlen =
 635                         string_transform(tokval.t_charptr, tokval.t_inttwo,
 636                                          &eop->stringval, func);
 637                     if (eop->stringlen == (size_t)-1) {
 638                         nasm_error(ERR_NONFATAL, "invalid string for transform");
 639                         eop->type = EOT_NOTHING;
 640                     }
 641                 }
 642                 if (parens && i && i != ')') {
 643                     i = stdscan(NULL, &tokval);
 644                     if (i != ')') {
 645                         nasm_error(ERR_NONFATAL, "unterminated %s function",
 646                                    funcname);
 647                     }
 648                 }
 649                 if (i && i != ',')
 650                     i = stdscan(NULL, &tokval);
 651             } else if (i == '-' || i == '+') {
 652                 char *save = stdscan_get();
 653                 int token = i;
 654                 sign = (i == '-') ? -1 : 1;
 655                 i = stdscan(NULL, &tokval);
 656                 if (i != TOKEN_FLOAT) {
 657                     stdscan_set(save);
 658                     i = tokval.t_type = token;
 659                     goto is_expression;
 660                 } else {
 661                     goto is_float;
 662                 }
 663             } else if (i == TOKEN_FLOAT) {
 664 is_float:
 665                 eop->type = EOT_DB_STRING;
 666                 result->eops_float = true;
 667
 668                 eop->stringlen = db_bytes(result->opcode);
 669                 if (eop->stringlen > 16) {
 670                     nasm_error(ERR_NONFATAL, "floating-point constant"
 671                                " encountered in DY or DZ instruction");
 672                     eop->stringlen = 0;
 673                 } else if (eop->stringlen < 1) {
 674                     nasm_error(ERR_NONFATAL, "floating-point constant"
 675                                " encountered in unknown instruction");
 676                     /*
 677                      * fix suggested by Pedro Gimeno... original line was:
 678                      * eop->type = EOT_NOTHING;
 679                      */
 680                     eop->stringlen = 0;
 681                 }
 682
 683                 eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
 684                 tail = &eop->next;
 685                 *fixptr = eop;
 686                 eop->stringval = (char *)eop + sizeof(extop);
 687                 if (!eop->stringlen ||
 688                     !float_const(tokval.t_charptr, sign,
 689                                  (uint8_t *)eop->stringval, eop->stringlen))
 690                     eop->type = EOT_NOTHING;
 691                 i = stdscan(NULL, &tokval); /* eat the comma */
 692             } else {
 693                 /* anything else, assume it is an expression */
 694                 expr *value;
 695
 696 is_expression:
 697                 value = evaluate(stdscan, NULL, &tokval, NULL,
 698                                  critical, NULL);
 699                 i = tokval.t_type;
 700                 if (!value)                  /* Error in evaluator */
 701                     goto fail;
 702                 if (value_to_extop(value, eop, location.segment)) {
 703                     nasm_error(ERR_NONFATAL,
 704                                "operand %d: expression is not simple or relocatable",
 705                                oper_num);
 706                 }
 707             }
 708
 709             /*
 710              * We're about to call stdscan(), which will eat the
 711              * comma that we're currently sitting on between
 712              * arguments. However, we'd better check first that it
 713              * _is_ a comma.
 714              */
 715             if (i == TOKEN_EOS) /* also could be EOL */
 716                 break;
 717             if (i != ',') {
 718                 nasm_error(ERR_NONFATAL, "comma expected after operand %d",
 719                            oper_num);
 720                 goto fail;
 721             }
 722         }
 723
 724         if (result->opcode == I_INCBIN) {
 725             /*
 726              * Correct syntax for INCBIN is that there should be
 727              * one string operand, followed by one or two numeric
 728              * operands.
 729              */
 730             if (!result->eops || result->eops->type != EOT_DB_STRING)
 731                 nasm_error(ERR_NONFATAL, "`incbin' expects a file name");
 732             else if (result->eops->next &&
 733                      result->eops->next->type != EOT_DB_NUMBER)
 734                 nasm_error(ERR_NONFATAL, "`incbin': second parameter is"
 735                            " non-numeric");
 736             else if (result->eops->next && result->eops->next->next &&
 737                      result->eops->next->next->type != EOT_DB_NUMBER)
 738                 nasm_error(ERR_NONFATAL, "`incbin': third parameter is"
 739                            " non-numeric");
 740             else if (result->eops->next && result->eops->next->next &&
 741                      result->eops->next->next->next)
 742                 nasm_error(ERR_NONFATAL,
 743                            "`incbin': more than three parameters");
 744             else
 745                 return result;
 746             /*
 747              * If we reach here, one of the above errors happened.
 748              * Throw the instruction away.
 749              */
 750             goto fail;
 751         } else /* DB ... */ if (oper_num == 0)
 752             nasm_error(ERR_WARNING | ERR_PASS1,
 753                   "no operand for data declaration");
 754         else
 755             result->operands = oper_num;
 756
 757         return result;
 758     }
 759
 760     /*
 761      * Now we begin to parse the operands. There may be up to four
 762      * of these, separated by commas, and terminated by a zero token.
 763      */
 764
 765     for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
 766         operand *op = &result->oprs[opnum];
 767         expr *value;            /* used most of the time */
 768         bool mref;              /* is this going to be a memory ref? */
 769         bool bracket;           /* is it a [] mref, or a & mref? */
 770         bool mib;               /* compound (mib) mref? */
 771         int setsize = 0;
 772         decoflags_t brace_flags = 0;    /* flags for decorators in braces */
 773
 774         op->disp_size = 0;    /* have to zero this whatever */
 775         op->eaflags   = 0;    /* and this */
 776         op->opflags   = 0;
 777         op->decoflags = 0;
 778
 779         i = stdscan(NULL, &tokval);
 780         if (i == TOKEN_EOS)
 781             break;              /* end of operands: get out of here */
 782         else if (first && i == ':') {
 783             insn_is_label = true;
 784             goto restart_parse;
 785         }
 786         first = false;
 787         op->type = 0; /* so far, no override */
 788         while (i == TOKEN_SPECIAL) {    /* size specifiers */
 789             switch (tokval.t_integer) {
 790             case S_BYTE:
 791                 if (!setsize)   /* we want to use only the first */
 792                     op->type |= BITS8;
 793                 setsize = 1;
 794                 break;
 795             case S_WORD:
 796                 if (!setsize)
 797                     op->type |= BITS16;
 798                 setsize = 1;
 799                 break;
 800             case S_DWORD:
 801             case S_LONG:
 802                 if (!setsize)
 803                     op->type |= BITS32;
 804                 setsize = 1;
 805                 break;
 806             case S_QWORD:
 807                 if (!setsize)
 808                     op->type |= BITS64;
 809                 setsize = 1;
 810                 break;
 811             case S_TWORD:
 812                 if (!setsize)
 813                     op->type |= BITS80;
 814                 setsize = 1;
 815                 break;
 816             case S_OWORD:
 817                 if (!setsize)
 818                     op->type |= BITS128;
 819                 setsize = 1;
 820                 break;
 821             case S_YWORD:
 822                 if (!setsize)
 823                     op->type |= BITS256;
 824                 setsize = 1;
 825                 break;
 826             case S_ZWORD:
 827                 if (!setsize)
 828                     op->type |= BITS512;
 829                 setsize = 1;
 830                 break;
 831             case S_TO:
 832                 op->type |= TO;
 833                 break;
 834             case S_STRICT:
 835                 op->type |= STRICT;
 836                 break;
 837             case S_FAR:
 838                 op->type |= FAR;
 839                 break;
 840             case S_NEAR:
 841                 op->type |= NEAR;
 842                 break;
 843             case S_SHORT:
 844                 op->type |= SHORT;
 845                 break;
 846             default:
 847                 nasm_error(ERR_NONFATAL, "invalid operand size specification");
 848             }
 849             i = stdscan(NULL, &tokval);
 850         }
 851
 852         if (i == '[' || i == '&') {     /* memory reference */
 853             mref = true;
 854             bracket = (i == '[');
 855             i = stdscan(NULL, &tokval); /* then skip the colon */
 856             while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
 857                 process_size_override(result, op);
 858                 i = stdscan(NULL, &tokval);
 859             }
 860             /* when a comma follows an opening bracket - [ , eax*4] */
 861             if (i == ',') {
 862                 /* treat as if there is a zero displacement virtually */
 863                 tokval.t_type = TOKEN_NUM;
 864                 tokval.t_integer = 0;
 865                 stdscan_set(stdscan_get() - 1);     /* rewind the comma */
 866             }
 867         } else {                /* immediate operand, or register */
 868             mref = false;
 869             bracket = false;    /* placate optimisers */
 870         }
 871
 872         if ((op->type & FAR) && !mref &&
 873             result->opcode != I_JMP && result->opcode != I_CALL) {
 874             nasm_error(ERR_NONFATAL, "invalid use of FAR operand specifier");
 875         }
 876
 877         value = evaluate(stdscan, NULL, &tokval,
 878                          &op->opflags, critical, &hints);
 879         i = tokval.t_type;
 880         if (op->opflags & OPFLAG_FORWARD) {
 881             result->forw_ref = true;
 882         }
 883         if (!value)                  /* Error in evaluator */
 884             goto fail;
 885         if (i == ':' && mref) { /* it was seg:offset */
 886             /*
 887              * Process the segment override.
 888              */
 889             if (value[1].type   != 0    ||
 890                 value->value    != 1    ||
 891                 !IS_SREG(value->type))
 892                 nasm_error(ERR_NONFATAL, "invalid segment override");
 893             else if (result->prefixes[PPS_SEG])
 894                 nasm_error(ERR_NONFATAL,
 895                       "instruction has conflicting segment overrides");
 896             else {
 897                 result->prefixes[PPS_SEG] = value->type;
 898                 if (IS_FSGS(value->type))
 899                     op->eaflags |= EAF_FSGS;
 900             }
 901
 902             i = stdscan(NULL, &tokval); /* then skip the colon */
 903             while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
 904                 process_size_override(result, op);
 905                 i = stdscan(NULL, &tokval);
 906             }
 907             value = evaluate(stdscan, NULL, &tokval,
 908                              &op->opflags, critical, &hints);
 909             i = tokval.t_type;
 910             if (op->opflags & OPFLAG_FORWARD) {
 911                 result->forw_ref = true;
 912             }
 913             /* and get the offset */
 914             if (!value)                  /* Error in evaluator */
 915                 goto fail;
 916         }
 917
 918         mib = false;
 919         if (mref && bracket && i == ',') {
 920             /* [seg:base+offset,index*scale] syntax (mib) */
 921
 922             operand o1, o2;     /* Partial operands */
 923
 924             if (parse_mref(&o1, value))
 925                 goto fail;
 926
 927             i = stdscan(NULL, &tokval); /* Eat comma */
 928             value = evaluate(stdscan, NULL, &tokval, &op->opflags,
 929                              critical, &hints);
 930             i = tokval.t_type;
 931             if (!value)
 932                 goto fail;
 933
 934             if (parse_mref(&o2, value))
 935                 goto fail;
 936
 937             if (o2.basereg != -1 && o2.indexreg == -1) {
 938                 o2.indexreg = o2.basereg;
 939                 o2.scale = 1;
 940                 o2.basereg = -1;
 941             }
 942
 943             if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
 944                 o2.segment != NO_SEG || o2.wrt != NO_SEG) {
 945                 nasm_error(ERR_NONFATAL, "invalid mib expression");
 946                 goto fail;
 947             }
 948
 949             op->basereg = o1.basereg;
 950             op->indexreg = o2.indexreg;
 951             op->scale = o2.scale;
 952             op->offset = o1.offset;
 953             op->segment = o1.segment;
 954             op->wrt = o1.wrt;
 955
 956             if (op->basereg != -1) {
 957                 op->hintbase = op->basereg;
 958                 op->hinttype = EAH_MAKEBASE;
 959             } else if (op->indexreg != -1) {
 960                 op->hintbase = op->indexreg;
 961                 op->hinttype = EAH_NOTBASE;
 962             } else {
 963                 op->hintbase = -1;
 964                 op->hinttype = EAH_NOHINT;
 965             }
 966
 967             mib = true;
 968         }
 969
 970         recover = false;
 971         if (mref && bracket) {  /* find ] at the end */
 972             if (i != ']') {
 973                 nasm_error(ERR_NONFATAL, "parser: expecting ]");
 974                 recover = true;
 975             } else {            /* we got the required ] */
 976                 i = stdscan(NULL, &tokval);
 977                 if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
 978                     /* parse opmask (and zeroing) after an operand */
 979                     recover = parse_braces(&brace_flags);
 980                     i = tokval.t_type;
 981                 }
 982                 if (i != 0 && i != ',') {
 983                     nasm_error(ERR_NONFATAL, "comma or end of line expected");
 984                     recover = true;
 985                 }
 986             }
 987         } else {                /* immediate operand */
 988             if (i != 0 && i != ',' && i != ':' &&
 989                 i != TOKEN_DECORATOR && i != TOKEN_OPMASK) {
 990                 nasm_error(ERR_NONFATAL, "comma, colon, decorator or end of "
 991                                          "line expected after operand");
 992                 recover = true;
 993             } else if (i == ':') {
 994                 op->type |= COLON;
 995             } else if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
 996                 /* parse opmask (and zeroing) after an operand */
 997                 recover = parse_braces(&brace_flags);
 998             }
 999         }
1000         if (recover) {
1001             do {                /* error recovery */
1002                 i = stdscan(NULL, &tokval);
1003             } while (i != 0 && i != ',');
1004         }
1005
1006         /*
1007          * now convert the exprs returned from evaluate()
1008          * into operand descriptions...
1009          */
1010         op->decoflags |= brace_flags;
1011
1012         if (mref) {             /* it's a memory reference */
1013             /* A mib reference was fully parsed already */
1014             if (!mib) {
1015                 if (parse_mref(op, value))
1016                     goto fail;
1017                 op->hintbase = hints.base;
1018                 op->hinttype = hints.type;
1019             }
1020             mref_set_optype(op);
1021         } else {                /* it's not a memory reference */
1022             if (is_just_unknown(value)) {       /* it's immediate but unknown */
1023                 op->type      |= IMMEDIATE;
1024                 op->opflags   |= OPFLAG_UNKNOWN;
1025                 op->offset    = 0;        /* don't care */
1026                 op->segment   = NO_SEG;   /* don't care again */
1027                 op->wrt       = NO_SEG;   /* still don't care */
1028
1029                 if(optimizing >= 0 && !(op->type & STRICT)) {
1030                     /* Be optimistic */
1031                     op->type |=
1032                         UNITY | SBYTEWORD | SBYTEDWORD | UDWORD | SDWORD;
1033                 }
1034             } else if (is_reloc(value)) {       /* it's immediate */
1035                 uint64_t n = reloc_value(value);
1036
1037                 op->type      |= IMMEDIATE;
1038                 op->offset    = n;
1039                 op->segment   = reloc_seg(value);
1040                 op->wrt       = reloc_wrt(value);
1041                 op->opflags   |= is_self_relative(value) ? OPFLAG_RELATIVE : 0;
1042
1043                 if (is_simple(value)) {
1044                     if (n == 1)
1045                         op->type |= UNITY;
1046                     if (optimizing >= 0 && !(op->type & STRICT)) {
1047                         if ((uint32_t) (n + 128) <= 255)
1048                             op->type |= SBYTEDWORD;
1049                         if ((uint16_t) (n + 128) <= 255)
1050                             op->type |= SBYTEWORD;
1051                         if (n <= UINT64_C(0xFFFFFFFF))
1052                             op->type |= UDWORD;
1053                         if (n + UINT64_C(0x80000000) <= UINT64_C(0xFFFFFFFF))
1054                             op->type |= SDWORD;
1055                     }
1056                 }
1057             } else if (value->type == EXPR_RDSAE) {
1058                 /*
1059                  * it's not an operand but a rounding or SAE decorator.
1060                  * put the decorator information in the (opflag_t) type field
1061                  * of previous operand.
1062                  */
1063                 opnum--; op--;
1064                 switch (value->value) {
1065                 case BRC_RN:
1066                 case BRC_RU:
1067                 case BRC_RD:
1068                 case BRC_RZ:
1069                 case BRC_SAE:
1070                     op->decoflags |= (value->value == BRC_SAE ? SAE : ER);
1071                     result->evex_rm = value->value;
1072                     break;
1073                 default:
1074                     nasm_error(ERR_NONFATAL, "invalid decorator");
1075                     break;
1076                 }
1077             } else {            /* it's a register */
1078                 opflags_t rs;
1079
1080                 if (value->type >= EXPR_SIMPLE || value->value != 1) {
1081                     nasm_error(ERR_NONFATAL, "invalid operand type");
1082                     goto fail;
1083                 }
1084
1085                 /*
1086                  * check that its only 1 register, not an expression...
1087                  */
1088                 for (i = 1; value[i].type; i++)
1089                     if (value[i].value) {
1090                         nasm_error(ERR_NONFATAL, "invalid operand type");
1091                         goto fail;
1092                     }
1093
1094                 /* clear overrides, except TO which applies to FPU regs */
1095                 if (op->type & ~TO) {
1096                     /*
1097                      * we want to produce a warning iff the specified size
1098                      * is different from the register size
1099                      */
1100                     rs = op->type & SIZE_MASK;
1101                 } else
1102                     rs = 0;
1103
1104                 op->type      &= TO;
1105                 op->type      |= REGISTER;
1106                 op->type      |= nasm_reg_flags[value->type];
1107                 op->decoflags |= brace_flags;
1108                 op->basereg   = value->type;
1109
1110                 if (rs && (op->type & SIZE_MASK) != rs)
1111                     nasm_error(ERR_WARNING | ERR_PASS1,
1112                           "register size specification ignored");
1113             }
1114         }
1115
1116         /* remember the position of operand having broadcasting/ER mode */
1117         if (op->decoflags & (BRDCAST_MASK | ER | SAE))
1118             result->evex_brerop = opnum;
1119     }
1120
1121     result->operands = opnum; /* set operand count */
1122
1123     /* clear remaining operands */
1124     while (opnum < MAX_OPERANDS)
1125         result->oprs[opnum++].type = 0;
1126
1127     /*
1128      * Transform RESW, RESD, RESQ, REST, RESO, RESY, RESZ into RESB.
1129      */
1130     if (opcode_is_resb(result->opcode)) {
1131         result->oprs[0].offset *= resb_bytes(result->opcode);
1132         result->oprs[0].offset *= result->times;
1133         result->times = 1;
1134         result->opcode = I_RESB;
1135     }
1136
1137     return result;
1138
1139 fail:
1140     result->opcode = I_none;
1141     return result;
1142 }
1143
1144 static int is_comma_next(void)
1145 {
1146     struct tokenval tv;
1147     char *p;
1148     int i;
1149
1150     p = stdscan_get();
1151     i = stdscan(NULL, &tv);
1152     stdscan_set(p);
1153
1154     return (i == ',' || i == ';' || !i);
1155 }
1156
1157 void cleanup_insn(insn * i)
1158 {
1159     extop *e;
1160
1161     while ((e = i->eops)) {
1162         i->eops = e->next;
1163         if (e->type == EOT_DB_STRING_FREE)
1164             nasm_free(e->stringval);
1165         nasm_free(e);
1166     }
1167 }