assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2012 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \172\ab       - the register number from operand a in bits 7..4, with
  70  *                 the 4-bit immediate from operand b in bits 3..0.
  71  * \173\xab      - the register number from operand a in bits 7..4, with
  72  *                 the value b in bits 3..0.
  73  * \174..\177    - the register number from operand 0..3 in bits 7..4, and
  74  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  75  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  76  *                 field equal to digit b.
  77  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  78  *                 is not equal to the truncated and sign-extended 32-bit
  79  *                 operand; used for 32-bit immediates in 64-bit mode.
  80  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  81  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  82  *                 V field taken from operand 0..3.
  83  * \270          - this instruction uses VEX/XOP rather than REX, with the
  84  *                 V field set to 1111b.
  85  *
  86  * VEX/XOP prefixes are followed by the sequence:
  87  * \tmm\wlp        where mm is the M field; and wlp is:
  88  *                 00 wwl lpp
  89  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  90  *                 [l1]  ll = 1 for L = 1 (.256)
  91  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  92  *
  93  *                 [w0]  ww = 0 for W = 0
  94  *                 [w1 ] ww = 1 for W = 1
  95  *                 [wig] ww = 2 for W don't care (always assembled as 0)
  96  *                 [ww]  ww = 3 for W used as REX.W
  97  *
  98  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
  99  *
 100  * \271          - instruction takes XRELEASE (F3) with or without lock
 101  * \272          - instruction takes XACQUIRE/XRELEASE with or without lock
 102  * \273          - instruction takes XACQUIRE/XRELEASE with lock only
 103  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 104  *                 which is to be extended to the operand size.
 105  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 106  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 107  * \312          - (disassembler only) invalid with non-default address size.
 108  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 109  * \314          - (disassembler only) invalid with REX.B
 110  * \315          - (disassembler only) invalid with REX.X
 111  * \316          - (disassembler only) invalid with REX.R
 112  * \317          - (disassembler only) invalid with REX.W
 113  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 114  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 115  * \322          - indicates that this instruction is only valid when the
 116  *                 operand size is the default (instruction to disassembler,
 117  *                 generates no code in the assembler)
 118  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 119  * \324          - indicates 64-bit operand size requiring REX prefix.
 120  * \325          - instruction which always uses spl/bpl/sil/dil
 121  * \330          - a literal byte follows in the code stream, to be added
 122  *                 to the condition code value of the instruction.
 123  * \331          - instruction not valid with REP prefix.  Hint for
 124  *                 disassembler only; for SSE instructions.
 125  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 126  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 127  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 128  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 129  * \336          - force a REP(E) prefix (0xF3) even if not specified.
 130  * \337          - force a REPNE prefix (0xF2) even if not specified.
 131  *                 \336-\337 are still listed as prefixes in the disassembler.
 132  * \340          - reserve <operand 0> bytes of uninitialized storage.
 133  *                 Operand 0 had better be a segmentless constant.
 134  * \341          - this instruction needs a WAIT "prefix"
 135  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 136  *                 (POP is never used for CS) depending on operand 0
 137  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 138  *                 on operand 0
 139  * \360          - no SSE prefix (== \364\331)
 140  * \361          - 66 SSE prefix (== \366\331)
 141  * \362          - F2 SSE prefix (== \364\332)
 142  * \363          - F3 SSE prefix (== \364\333)
 143  * \364          - operand-size prefix (0x66) not permitted
 144  * \365          - address-size prefix (0x67) not permitted
 145  * \366          - operand-size prefix (0x66) used as opcode extension
 146  * \367          - address-size prefix (0x67) used as opcode extension
 147  * \370,\371     - match only if operand 0 meets byte jump criteria.
 148  *                 370 is used for Jcc, 371 is used for JMP.
 149  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 150  *                 used for conditional jump over longer jump
 151  * \374          - this instruction takes an XMM VSIB memory EA
 152  * \375          - this instruction takes an YMM VSIB memory EA
 153  */
 154
 155 #include "compiler.h"
 156
 157 #include <stdio.h>
 158 #include <string.h>
 159 #include <inttypes.h>
 160
 161 #include "nasm.h"
 162 #include "nasmlib.h"
 163 #include "assemble.h"
 164 #include "insns.h"
 165 #include "tables.h"
 166
 167 enum match_result {
 168     /*
 169      * Matching errors.  These should be sorted so that more specific
 170      * errors come later in the sequence.
 171      */
 172     MERR_INVALOP,
 173     MERR_OPSIZEMISSING,
 174     MERR_OPSIZEMISMATCH,
 175     MERR_BADCPU,
 176     MERR_BADMODE,
 177     MERR_BADHLE,
 178     /*
 179      * Matching success; the conditional ones first
 180      */
 181     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 182     MOK_GOOD    /* Matching unconditionally OK */
 183 };
 184
 185 typedef struct {
 186     enum ea_type type;            /* what kind of EA is this? */
 187     int sib_present;              /* is a SIB byte necessary? */
 188     int bytes;                    /* # of bytes of offset needed */
 189     int size;                     /* lazy - this is sib+bytes+1 */
 190     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 191 } ea;
 192
 193 #define GEN_SIB(scale, index, base)                 \
 194         (((scale) << 6) | ((index) << 3) | ((base)))
 195
 196 #define GEN_MODRM(mod, reg, rm)                     \
 197         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 198
 199 static uint32_t cpu;            /* cpu level received from nasm.c */
 200 static efunc errfunc;
 201 static struct ofmt *outfmt;
 202 static ListGen *list;
 203
 204 static int64_t calcsize(int32_t, int64_t, int, insn *,
 205                         const struct itemplate *);
 206 static void gencode(int32_t segment, int64_t offset, int bits,
 207                     insn * ins, const struct itemplate *temp,
 208                     int64_t insn_end);
 209 static enum match_result find_match(const struct itemplate **tempp,
 210                                     insn *instruction,
 211                                     int32_t segment, int64_t offset, int bits);
 212 static enum match_result matches(const struct itemplate *, insn *, int bits);
 213 static opflags_t regflag(const operand *);
 214 static int32_t regval(const operand *);
 215 static int rexflags(int, opflags_t, int);
 216 static int op_rexflags(const operand *, int);
 217 static void add_asp(insn *, int);
 218
 219 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 220
 221 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 222 {
 223     return ins->prefixes[pos] == prefix;
 224 }
 225
 226 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 227 {
 228     if (ins->prefixes[pos])
 229         errfunc(ERR_NONFATAL, "invalid %s prefix",
 230                 prefix_name(ins->prefixes[pos]));
 231 }
 232
 233 static const char *size_name(int size)
 234 {
 235     switch (size) {
 236     case 1:
 237         return "byte";
 238     case 2:
 239         return "word";
 240     case 4:
 241         return "dword";
 242     case 8:
 243         return "qword";
 244     case 10:
 245         return "tword";
 246     case 16:
 247         return "oword";
 248     case 32:
 249         return "yword";
 250     default:
 251         return "???";
 252     }
 253 }
 254
 255 static void warn_overflow(int pass, int size)
 256 {
 257     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 258             "%s data exceeds bounds", size_name(size));
 259 }
 260
 261 static void warn_overflow_const(int64_t data, int size)
 262 {
 263     if (overflow_general(data, size))
 264         warn_overflow(ERR_PASS1, size);
 265 }
 266
 267 static void warn_overflow_opd(const struct operand *o, int size)
 268 {
 269     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 270         if (overflow_general(o->offset, size))
 271             warn_overflow(ERR_PASS2, size);
 272     }
 273 }
 274
 275 /*
 276  * This routine wrappers the real output format's output routine,
 277  * in order to pass a copy of the data off to the listing file
 278  * generator at the same time.
 279  */
 280 static void out(int64_t offset, int32_t segto, const void *data,
 281                 enum out_type type, uint64_t size,
 282                 int32_t segment, int32_t wrt)
 283 {
 284     static int32_t lineno = 0;     /* static!!! */
 285     static char *lnfname = NULL;
 286     uint8_t p[8];
 287
 288     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 289         /*
 290          * This is a non-relocated address, and we're going to
 291          * convert it into RAWDATA format.
 292          */
 293         uint8_t *q = p;
 294
 295         if (size > 8) {
 296             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 297             return;
 298         }
 299
 300         WRITEADDR(q, *(int64_t *)data, size);
 301         data = p;
 302         type = OUT_RAWDATA;
 303     }
 304
 305     list->output(offset, data, type, size);
 306
 307     /*
 308      * this call to src_get determines when we call the
 309      * debug-format-specific "linenum" function
 310      * it updates lineno and lnfname to the current values
 311      * returning 0 if "same as last time", -2 if lnfname
 312      * changed, and the amount by which lineno changed,
 313      * if it did. thus, these variables must be static
 314      */
 315
 316     if (src_get(&lineno, &lnfname))
 317         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 318
 319     outfmt->output(segto, data, type, size, segment, wrt);
 320 }
 321
 322 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 323                       insn * ins, const struct itemplate *temp)
 324 {
 325     int64_t isize;
 326     const uint8_t *code = temp->code;
 327     uint8_t c = code[0];
 328
 329     if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT))
 330         return false;
 331     if (!optimizing)
 332         return false;
 333     if (optimizing < 0 && c == 0371)
 334         return false;
 335
 336     isize = calcsize(segment, offset, bits, ins, temp);
 337
 338     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 339         /* Be optimistic in pass 1 */
 340         return true;
 341
 342     if (ins->oprs[0].segment != segment)
 343         return false;
 344
 345     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 346     return (isize >= -128 && isize <= 127); /* is it byte size? */
 347 }
 348
 349 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 350                  insn * instruction, struct ofmt *output, efunc error,
 351                  ListGen * listgen)
 352 {
 353     const struct itemplate *temp;
 354     int j;
 355     enum match_result m;
 356     int64_t insn_end;
 357     int32_t itimes;
 358     int64_t start = offset;
 359     int64_t wsize;              /* size for DB etc. */
 360
 361     errfunc = error;            /* to pass to other functions */
 362     cpu = cp;
 363     outfmt = output;            /* likewise */
 364     list = listgen;             /* and again */
 365
 366     wsize = idata_bytes(instruction->opcode);
 367     if (wsize == -1)
 368         return 0;
 369
 370     if (wsize) {
 371         extop *e;
 372         int32_t t = instruction->times;
 373         if (t < 0)
 374             errfunc(ERR_PANIC,
 375                     "instruction->times < 0 (%ld) in assemble()", t);
 376
 377         while (t--) {           /* repeat TIMES times */
 378             list_for_each(e, instruction->eops) {
 379                 if (e->type == EOT_DB_NUMBER) {
 380                     if (wsize > 8) {
 381                         errfunc(ERR_NONFATAL,
 382                                 "integer supplied to a DT, DO or DY"
 383                                 " instruction");
 384                     } else {
 385                         out(offset, segment, &e->offset,
 386                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 387                         offset += wsize;
 388                     }
 389                 } else if (e->type == EOT_DB_STRING ||
 390                            e->type == EOT_DB_STRING_FREE) {
 391                     int align;
 392
 393                     out(offset, segment, e->stringval,
 394                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 395                     align = e->stringlen % wsize;
 396
 397                     if (align) {
 398                         align = wsize - align;
 399                         out(offset, segment, zero_buffer,
 400                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 401                     }
 402                     offset += e->stringlen + align;
 403                 }
 404             }
 405             if (t > 0 && t == instruction->times - 1) {
 406                 /*
 407                  * Dummy call to list->output to give the offset to the
 408                  * listing module.
 409                  */
 410                 list->output(offset, NULL, OUT_RAWDATA, 0);
 411                 list->uplevel(LIST_TIMES);
 412             }
 413         }
 414         if (instruction->times > 1)
 415             list->downlevel(LIST_TIMES);
 416         return offset - start;
 417     }
 418
 419     if (instruction->opcode == I_INCBIN) {
 420         const char *fname = instruction->eops->stringval;
 421         FILE *fp;
 422
 423         fp = fopen(fname, "rb");
 424         if (!fp) {
 425             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 426                   fname);
 427         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 428             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 429                   fname);
 430         } else {
 431             static char buf[4096];
 432             size_t t = instruction->times;
 433             size_t base = 0;
 434             size_t len;
 435
 436             len = ftell(fp);
 437             if (instruction->eops->next) {
 438                 base = instruction->eops->next->offset;
 439                 len -= base;
 440                 if (instruction->eops->next->next &&
 441                     len > (size_t)instruction->eops->next->next->offset)
 442                     len = (size_t)instruction->eops->next->next->offset;
 443             }
 444             /*
 445              * Dummy call to list->output to give the offset to the
 446              * listing module.
 447              */
 448             list->output(offset, NULL, OUT_RAWDATA, 0);
 449             list->uplevel(LIST_INCBIN);
 450             while (t--) {
 451                 size_t l;
 452
 453                 fseek(fp, base, SEEK_SET);
 454                 l = len;
 455                 while (l > 0) {
 456                     int32_t m;
 457                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 458                     if (!m) {
 459                         /*
 460                          * This shouldn't happen unless the file
 461                          * actually changes while we are reading
 462                          * it.
 463                          */
 464                         error(ERR_NONFATAL,
 465                               "`incbin': unexpected EOF while"
 466                               " reading file `%s'", fname);
 467                         t = 0;  /* Try to exit cleanly */
 468                         break;
 469                     }
 470                     out(offset, segment, buf, OUT_RAWDATA, m,
 471                         NO_SEG, NO_SEG);
 472                     l -= m;
 473                 }
 474             }
 475             list->downlevel(LIST_INCBIN);
 476             if (instruction->times > 1) {
 477                 /*
 478                  * Dummy call to list->output to give the offset to the
 479                  * listing module.
 480                  */
 481                 list->output(offset, NULL, OUT_RAWDATA, 0);
 482                 list->uplevel(LIST_TIMES);
 483                 list->downlevel(LIST_TIMES);
 484             }
 485             fclose(fp);
 486             return instruction->times * len;
 487         }
 488         return 0;               /* if we're here, there's an error */
 489     }
 490
 491     /* Check to see if we need an address-size prefix */
 492     add_asp(instruction, bits);
 493
 494     m = find_match(&temp, instruction, segment, offset, bits);
 495
 496     if (m == MOK_GOOD) {
 497         /* Matches! */
 498         int64_t insn_size = calcsize(segment, offset, bits, instruction, temp);
 499         itimes = instruction->times;
 500         if (insn_size < 0)  /* shouldn't be, on pass two */
 501             error(ERR_PANIC, "errors made it through from pass one");
 502         else
 503             while (itimes--) {
 504                 for (j = 0; j < MAXPREFIX; j++) {
 505                     uint8_t c = 0;
 506                     switch (instruction->prefixes[j]) {
 507                     case P_WAIT:
 508                         c = 0x9B;
 509                         break;
 510                     case P_LOCK:
 511                         c = 0xF0;
 512                         break;
 513                     case P_REPNE:
 514                     case P_REPNZ:
 515                     case P_XACQUIRE:
 516                         c = 0xF2;
 517                         break;
 518                     case P_REPE:
 519                     case P_REPZ:
 520                     case P_REP:
 521                     case P_XRELEASE:
 522                         c = 0xF3;
 523                         break;
 524                     case R_CS:
 525                         if (bits == 64) {
 526                             error(ERR_WARNING | ERR_PASS2,
 527                                   "cs segment base generated, but will be ignored in 64-bit mode");
 528                         }
 529                         c = 0x2E;
 530                         break;
 531                     case R_DS:
 532                         if (bits == 64) {
 533                             error(ERR_WARNING | ERR_PASS2,
 534                                   "ds segment base generated, but will be ignored in 64-bit mode");
 535                         }
 536                         c = 0x3E;
 537                         break;
 538                     case R_ES:
 539                         if (bits == 64) {
 540                             error(ERR_WARNING | ERR_PASS2,
 541                                   "es segment base generated, but will be ignored in 64-bit mode");
 542                         }
 543                         c = 0x26;
 544                         break;
 545                     case R_FS:
 546                         c = 0x64;
 547                         break;
 548                     case R_GS:
 549                         c = 0x65;
 550                         break;
 551                     case R_SS:
 552                         if (bits == 64) {
 553                             error(ERR_WARNING | ERR_PASS2,
 554                                   "ss segment base generated, but will be ignored in 64-bit mode");
 555                         }
 556                         c = 0x36;
 557                         break;
 558                     case R_SEGR6:
 559                     case R_SEGR7:
 560                         error(ERR_NONFATAL,
 561                               "segr6 and segr7 cannot be used as prefixes");
 562                         break;
 563                     case P_A16:
 564                         if (bits == 64) {
 565                             error(ERR_NONFATAL,
 566                                   "16-bit addressing is not supported "
 567                                   "in 64-bit mode");
 568                         } else if (bits != 16)
 569                             c = 0x67;
 570                         break;
 571                     case P_A32:
 572                         if (bits != 32)
 573                             c = 0x67;
 574                         break;
 575                     case P_A64:
 576                         if (bits != 64) {
 577                             error(ERR_NONFATAL,
 578                                   "64-bit addressing is only supported "
 579                                   "in 64-bit mode");
 580                         }
 581                         break;
 582                     case P_ASP:
 583                         c = 0x67;
 584                         break;
 585                     case P_O16:
 586                         if (bits != 16)
 587                             c = 0x66;
 588                         break;
 589                     case P_O32:
 590                         if (bits == 16)
 591                             c = 0x66;
 592                         break;
 593                     case P_O64:
 594                         /* REX.W */
 595                         break;
 596                     case P_OSP:
 597                         c = 0x66;
 598                         break;
 599                     case P_none:
 600                         break;
 601                     default:
 602                         error(ERR_PANIC, "invalid instruction prefix");
 603                     }
 604                     if (c != 0) {
 605                         out(offset, segment, &c, OUT_RAWDATA, 1,
 606                             NO_SEG, NO_SEG);
 607                         offset++;
 608                     }
 609                 }
 610                 insn_end = offset + insn_size;
 611                 gencode(segment, offset, bits, instruction,
 612                         temp, insn_end);
 613                 offset += insn_size;
 614                 if (itimes > 0 && itimes == instruction->times - 1) {
 615                     /*
 616                      * Dummy call to list->output to give the offset to the
 617                      * listing module.
 618                      */
 619                     list->output(offset, NULL, OUT_RAWDATA, 0);
 620                     list->uplevel(LIST_TIMES);
 621                 }
 622             }
 623         if (instruction->times > 1)
 624             list->downlevel(LIST_TIMES);
 625         return offset - start;
 626     } else {
 627         /* No match */
 628         switch (m) {
 629         case MERR_OPSIZEMISSING:
 630             error(ERR_NONFATAL, "operation size not specified");
 631             break;
 632         case MERR_OPSIZEMISMATCH:
 633             error(ERR_NONFATAL, "mismatch in operand sizes");
 634             break;
 635         case MERR_BADCPU:
 636             error(ERR_NONFATAL, "no instruction for this cpu level");
 637             break;
 638         case MERR_BADMODE:
 639             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 640                   bits);
 641             break;
 642         default:
 643             error(ERR_NONFATAL,
 644                   "invalid combination of opcode and operands");
 645             break;
 646         }
 647     }
 648     return 0;
 649 }
 650
 651 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 652                   insn * instruction, efunc error)
 653 {
 654     const struct itemplate *temp;
 655     enum match_result m;
 656
 657     errfunc = error;            /* to pass to other functions */
 658     cpu = cp;
 659
 660     if (instruction->opcode == I_none)
 661         return 0;
 662
 663     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 664         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 665         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 666         instruction->opcode == I_DY) {
 667         extop *e;
 668         int32_t isize, osize, wsize;
 669
 670         isize = 0;
 671         wsize = idata_bytes(instruction->opcode);
 672
 673         list_for_each(e, instruction->eops) {
 674             int32_t align;
 675
 676             osize = 0;
 677             if (e->type == EOT_DB_NUMBER) {
 678                 osize = 1;
 679                 warn_overflow_const(e->offset, wsize);
 680             } else if (e->type == EOT_DB_STRING ||
 681                        e->type == EOT_DB_STRING_FREE)
 682                 osize = e->stringlen;
 683
 684             align = (-osize) % wsize;
 685             if (align < 0)
 686                 align += wsize;
 687             isize += osize + align;
 688         }
 689         return isize * instruction->times;
 690     }
 691
 692     if (instruction->opcode == I_INCBIN) {
 693         const char *fname = instruction->eops->stringval;
 694         FILE *fp;
 695         int64_t val = 0;
 696         size_t len;
 697
 698         fp = fopen(fname, "rb");
 699         if (!fp)
 700             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 701                   fname);
 702         else if (fseek(fp, 0L, SEEK_END) < 0)
 703             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 704                   fname);
 705         else {
 706             len = ftell(fp);
 707             if (instruction->eops->next) {
 708                 len -= instruction->eops->next->offset;
 709                 if (instruction->eops->next->next &&
 710                     len > (size_t)instruction->eops->next->next->offset) {
 711                     len = (size_t)instruction->eops->next->next->offset;
 712                 }
 713             }
 714             val = instruction->times * len;
 715         }
 716         if (fp)
 717             fclose(fp);
 718         return val;
 719     }
 720
 721     /* Check to see if we need an address-size prefix */
 722     add_asp(instruction, bits);
 723
 724     m = find_match(&temp, instruction, segment, offset, bits);
 725     if (m == MOK_GOOD) {
 726         /* we've matched an instruction. */
 727         int64_t isize;
 728         int j;
 729
 730         isize = calcsize(segment, offset, bits, instruction, temp);
 731         if (isize < 0)
 732             return -1;
 733         for (j = 0; j < MAXPREFIX; j++) {
 734             switch (instruction->prefixes[j]) {
 735             case P_A16:
 736                 if (bits != 16)
 737                     isize++;
 738                 break;
 739             case P_A32:
 740                 if (bits != 32)
 741                     isize++;
 742                 break;
 743             case P_O16:
 744                 if (bits != 16)
 745                     isize++;
 746                 break;
 747             case P_O32:
 748                 if (bits == 16)
 749                     isize++;
 750                 break;
 751             case P_A64:
 752             case P_O64:
 753             case P_none:
 754                 break;
 755             default:
 756                 isize++;
 757                 break;
 758             }
 759         }
 760         return isize * instruction->times;
 761     } else {
 762         return -1;                  /* didn't match any instruction */
 763     }
 764 }
 765
 766 static bool possible_sbyte(operand *o)
 767 {
 768     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 769         !(o->opflags & OPFLAG_UNKNOWN) &&
 770         optimizing >= 0 && !(o->type & STRICT);
 771 }
 772
 773 /* check that opn[op]  is a signed byte of size 16 or 32 */
 774 static bool is_sbyte16(operand *o)
 775 {
 776     int16_t v;
 777
 778     if (!possible_sbyte(o))
 779         return false;
 780
 781     v = o->offset;
 782     return v >= -128 && v <= 127;
 783 }
 784
 785 static bool is_sbyte32(operand *o)
 786 {
 787     int32_t v;
 788
 789     if (!possible_sbyte(o))
 790         return false;
 791
 792     v = o->offset;
 793     return v >= -128 && v <= 127;
 794 }
 795
 796 static void bad_hle_warn(const insn * ins, uint8_t hleok)
 797 {
 798     enum prefixes rep_pfx = ins->prefixes[PPS_REP];
 799     enum whatwarn { w_none, w_lock, w_inval } ww;
 800     static const enum whatwarn warn[2][4] =
 801     {
 802         { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */
 803         { w_inval, w_none,  w_none, w_lock }, /* XRELEASE */
 804     };
 805     unsigned int n;
 806
 807     n = (unsigned int)rep_pfx - P_XACQUIRE;
 808     if (n > 1)
 809         return;                 /* Not XACQUIRE/XRELEASE */
 810
 811     ww = warn[n][hleok];
 812     if (!is_class(MEMORY, ins->oprs[0].type))
 813         ww = w_inval;           /* HLE requires operand 0 to be memory */
 814
 815     switch (ww) {
 816     case w_none:
 817         break;
 818
 819     case w_lock:
 820         if (ins->prefixes[PPS_LOCK] != P_LOCK) {
 821             errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 822                     "%s with this instruction requires lock",
 823                     prefix_name(rep_pfx));
 824         }
 825         break;
 826
 827     case w_inval:
 828         errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 829                 "%s invalid with this instruction",
 830                 prefix_name(rep_pfx));
 831         break;
 832     }
 833 }
 834
 835 /* Common construct */
 836 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 837
 838 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 839                         insn * ins, const struct itemplate *temp)
 840 {
 841     const uint8_t *codes = temp->code;
 842     int64_t length = 0;
 843     uint8_t c;
 844     int rex_mask = ~0;
 845     int op1, op2;
 846     struct operand *opx;
 847     uint8_t opex = 0;
 848     enum ea_type eat;
 849     uint8_t hleok = 0;
 850     bool lockcheck = true;
 851
 852     ins->rex = 0;               /* Ensure REX is reset */
 853     eat = EA_SCALAR;            /* Expect a scalar EA */
 854
 855     if (ins->prefixes[PPS_OSIZE] == P_O64)
 856         ins->rex |= REX_W;
 857
 858     (void)segment;              /* Don't warn that this parameter is unused */
 859     (void)offset;               /* Don't warn that this parameter is unused */
 860
 861     while (*codes) {
 862         c = *codes++;
 863         op1 = (c & 3) + ((opex & 1) << 2);
 864         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 865         opx = &ins->oprs[op1];
 866         opex = 0;               /* For the next iteration */
 867
 868         switch (c) {
 869         case 01:
 870         case 02:
 871         case 03:
 872         case 04:
 873             codes += c, length += c;
 874             break;
 875
 876         case 05:
 877         case 06:
 878         case 07:
 879             opex = c;
 880             break;
 881
 882         case4(010):
 883             ins->rex |=
 884                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 885             codes++, length++;
 886             break;
 887
 888         case4(014):
 889         case4(020):
 890         case4(024):
 891             length++;
 892             break;
 893
 894         case4(030):
 895             length += 2;
 896             break;
 897
 898         case4(034):
 899             if (opx->type & (BITS16 | BITS32 | BITS64))
 900                 length += (opx->type & BITS16) ? 2 : 4;
 901             else
 902                 length += (bits == 16) ? 2 : 4;
 903             break;
 904
 905         case4(040):
 906             length += 4;
 907             break;
 908
 909         case4(044):
 910             length += ins->addr_size >> 3;
 911             break;
 912
 913         case4(050):
 914             length++;
 915             break;
 916
 917         case4(054):
 918             length += 8; /* MOV reg64/imm */
 919             break;
 920
 921         case4(060):
 922             length += 2;
 923             break;
 924
 925         case4(064):
 926             if (opx->type & (BITS16 | BITS32 | BITS64))
 927                 length += (opx->type & BITS16) ? 2 : 4;
 928             else
 929                 length += (bits == 16) ? 2 : 4;
 930             break;
 931
 932         case4(070):
 933             length += 4;
 934             break;
 935
 936         case4(074):
 937             length += 2;
 938             break;
 939
 940         case4(0140):
 941             length += is_sbyte16(opx) ? 1 : 2;
 942             break;
 943
 944         case4(0144):
 945             codes++;
 946             length++;
 947             break;
 948
 949         case4(0150):
 950             length += is_sbyte32(opx) ? 1 : 4;
 951             break;
 952
 953         case4(0154):
 954             codes++;
 955             length++;
 956             break;
 957
 958         case 0172:
 959         case 0173:
 960             codes++;
 961             length++;
 962             break;
 963
 964         case4(0174):
 965             length++;
 966             break;
 967
 968         case4(0250):
 969             length += is_sbyte32(opx) ? 1 : 4;
 970             break;
 971
 972         case4(0254):
 973             length += 4;
 974             break;
 975
 976         case4(0260):
 977             ins->rex |= REX_V;
 978             ins->vexreg = regval(opx);
 979             ins->vex_cm = *codes++;
 980             ins->vex_wlp = *codes++;
 981             break;
 982
 983         case 0270:
 984             ins->rex |= REX_V;
 985             ins->vexreg = 0;
 986             ins->vex_cm = *codes++;
 987             ins->vex_wlp = *codes++;
 988             break;
 989
 990         case 0271:
 991         case 0272:
 992         case 0273:
 993             hleok = c & 3;
 994             break;
 995
 996         case4(0274):
 997             length++;
 998             break;
 999
1000         case4(0300):
1001             break;
1002
1003         case 0310:
1004             if (bits == 64)
1005                 return -1;
1006             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1007             break;
1008
1009         case 0311:
1010             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1011             break;
1012
1013         case 0312:
1014             break;
1015
1016         case 0313:
1017             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1018                 has_prefix(ins, PPS_ASIZE, P_A32))
1019                 return -1;
1020             break;
1021
1022         case4(0314):
1023             break;
1024
1025         case 0320:
1026         {
1027             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1028             if (pfx == P_O16)
1029                 break;
1030             if (pfx != P_none)
1031                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1032             else
1033                 ins->prefixes[PPS_OSIZE] = P_O16;
1034             break;
1035         }
1036
1037         case 0321:
1038         {
1039             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1040             if (pfx == P_O32)
1041                 break;
1042             if (pfx != P_none)
1043                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1044             else
1045                 ins->prefixes[PPS_OSIZE] = P_O32;
1046             break;
1047         }
1048
1049         case 0322:
1050             break;
1051
1052         case 0323:
1053             rex_mask &= ~REX_W;
1054             break;
1055
1056         case 0324:
1057             ins->rex |= REX_W;
1058             break;
1059
1060         case 0325:
1061             ins->rex |= REX_NH;
1062             break;
1063
1064         case 0330:
1065             codes++, length++;
1066             break;
1067
1068         case 0331:
1069             break;
1070
1071         case 0332:
1072         case 0333:
1073             length++;
1074             break;
1075
1076         case 0334:
1077             ins->rex |= REX_L;
1078             break;
1079
1080         case 0335:
1081             break;
1082
1083         case 0336:
1084             if (!ins->prefixes[PPS_REP])
1085                 ins->prefixes[PPS_REP] = P_REP;
1086             break;
1087
1088         case 0337:
1089             if (!ins->prefixes[PPS_REP])
1090                 ins->prefixes[PPS_REP] = P_REPNE;
1091             break;
1092
1093         case 0340:
1094             if (ins->oprs[0].segment != NO_SEG)
1095                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1096                         " quantity of BSS space");
1097             else
1098                 length += ins->oprs[0].offset;
1099             break;
1100
1101         case 0341:
1102             if (!ins->prefixes[PPS_WAIT])
1103                 ins->prefixes[PPS_WAIT] = P_WAIT;
1104             break;
1105
1106         case4(0344):
1107             length++;
1108             break;
1109
1110         case 0360:
1111             break;
1112
1113         case 0361:
1114         case 0362:
1115         case 0363:
1116             length++;
1117             break;
1118
1119         case 0364:
1120         case 0365:
1121             break;
1122
1123         case 0366:
1124         case 0367:
1125             length++;
1126             break;
1127
1128         case 0370:
1129         case 0371:
1130         case 0372:
1131             break;
1132
1133         case 0373:
1134             length++;
1135             break;
1136
1137         case 0374:
1138             eat = EA_XMMVSIB;
1139             break;
1140
1141         case 0375:
1142             eat = EA_YMMVSIB;
1143             break;
1144
1145         case4(0100):
1146         case4(0110):
1147         case4(0120):
1148         case4(0130):
1149         case4(0200):
1150         case4(0204):
1151         case4(0210):
1152         case4(0214):
1153         case4(0220):
1154         case4(0224):
1155         case4(0230):
1156         case4(0234):
1157             {
1158                 ea ea_data;
1159                 int rfield;
1160                 opflags_t rflags;
1161                 struct operand *opy = &ins->oprs[op2];
1162
1163                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1164
1165                 if (c <= 0177) {
1166                     /* pick rfield from operand b (opx) */
1167                     rflags = regflag(opx);
1168                     rfield = nasm_regvals[opx->basereg];
1169                 } else {
1170                     rflags = 0;
1171                     rfield = c & 7;
1172                 }
1173                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1174                                rfield, rflags) != eat) {
1175                     errfunc(ERR_NONFATAL, "invalid effective address");
1176                     return -1;
1177                 } else {
1178                     ins->rex |= ea_data.rex;
1179                     length += ea_data.size;
1180                 }
1181             }
1182             break;
1183
1184         default:
1185             errfunc(ERR_PANIC, "internal instruction table corrupt"
1186                     ": instruction code \\%o (0x%02X) given", c, c);
1187             break;
1188         }
1189     }
1190
1191     ins->rex &= rex_mask;
1192
1193     if (ins->rex & REX_NH) {
1194         if (ins->rex & REX_H) {
1195             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1196             return -1;
1197         }
1198         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1199     }
1200
1201     if (ins->rex & REX_V) {
1202         int bad32 = REX_R|REX_W|REX_X|REX_B;
1203
1204         if (ins->rex & REX_H) {
1205             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1206             return -1;
1207         }
1208         switch (ins->vex_wlp & 060) {
1209         case 000:
1210         case 040:
1211             ins->rex &= ~REX_W;
1212             break;
1213         case 020:
1214             ins->rex |= REX_W;
1215             bad32 &= ~REX_W;
1216             break;
1217         case 060:
1218             /* Follow REX_W */
1219             break;
1220         }
1221
1222         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1223             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1224             return -1;
1225         }
1226         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1227             length += 3;
1228         else
1229             length += 2;
1230     } else if (ins->rex & REX_REAL) {
1231         if (ins->rex & REX_H) {
1232             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1233             return -1;
1234         } else if (bits == 64) {
1235             length++;
1236         } else if ((ins->rex & REX_L) &&
1237                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1238                    cpu >= IF_X86_64) {
1239             /* LOCK-as-REX.R */
1240             assert_no_prefix(ins, PPS_LOCK);
1241             lockcheck = false;  /* Already errored, no need for warning */
1242             length++;
1243         } else {
1244             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1245             return -1;
1246         }
1247     }
1248
1249     if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck &&
1250         (!(temp->flags & IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
1251         errfunc(ERR_WARNING | ERR_WARN_LOCK | ERR_PASS2 ,
1252                 "instruction is not lockable");
1253     }
1254
1255     bad_hle_warn(ins, hleok);
1256
1257     return length;
1258 }
1259
1260 #define EMIT_REX()                                                              \
1261     if (!(ins->rex & REX_V) && (ins->rex & REX_REAL) && (bits == 64)) { \
1262         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1263         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1264         ins->rex = 0;                                                           \
1265         offset += 1;                                                            \
1266     }
1267
1268 static void gencode(int32_t segment, int64_t offset, int bits,
1269                     insn * ins, const struct itemplate *temp,
1270                     int64_t insn_end)
1271 {
1272     static const char condval[] = {   /* conditional opcodes */
1273         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1274         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1275         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1276     };
1277     uint8_t c;
1278     uint8_t bytes[4];
1279     int64_t size;
1280     int64_t data;
1281     int op1, op2;
1282     struct operand *opx;
1283     const uint8_t *codes = temp->code;
1284     uint8_t opex = 0;
1285     enum ea_type eat = EA_SCALAR;
1286
1287     while (*codes) {
1288         c = *codes++;
1289         op1 = (c & 3) + ((opex & 1) << 2);
1290         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1291         opx = &ins->oprs[op1];
1292         opex = 0;                /* For the next iteration */
1293
1294         switch (c) {
1295         case 01:
1296         case 02:
1297         case 03:
1298         case 04:
1299             EMIT_REX();
1300             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1301             codes += c;
1302             offset += c;
1303             break;
1304
1305         case 05:
1306         case 06:
1307         case 07:
1308             opex = c;
1309             break;
1310
1311         case4(010):
1312             EMIT_REX();
1313             bytes[0] = *codes++ + (regval(opx) & 7);
1314             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1315             offset += 1;
1316             break;
1317
1318         case4(014):
1319             /*
1320              * The test for BITS8 and SBYTE here is intended to avoid
1321              * warning on optimizer actions due to SBYTE, while still
1322              * warn on explicit BYTE directives.  Also warn, obviously,
1323              * if the optimizer isn't enabled.
1324              */
1325             if (((opx->type & BITS8) ||
1326                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1327                 (opx->offset < -128 || opx->offset > 127)) {
1328                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1329                         "signed byte value exceeds bounds");
1330             }
1331             if (opx->segment != NO_SEG) {
1332                 data = opx->offset;
1333                 out(offset, segment, &data, OUT_ADDRESS, 1,
1334                     opx->segment, opx->wrt);
1335             } else {
1336                 bytes[0] = opx->offset;
1337                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1338                     NO_SEG);
1339             }
1340             offset += 1;
1341             break;
1342
1343         case4(020):
1344             if (opx->offset < -256 || opx->offset > 255) {
1345                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1346                         "byte value exceeds bounds");
1347             }
1348             if (opx->segment != NO_SEG) {
1349                 data = opx->offset;
1350                 out(offset, segment, &data, OUT_ADDRESS, 1,
1351                     opx->segment, opx->wrt);
1352             } else {
1353                 bytes[0] = opx->offset;
1354                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1355                     NO_SEG);
1356             }
1357             offset += 1;
1358             break;
1359
1360         case4(024):
1361             if (opx->offset < 0 || opx->offset > 255)
1362                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1363                         "unsigned byte value exceeds bounds");
1364             if (opx->segment != NO_SEG) {
1365                 data = opx->offset;
1366                 out(offset, segment, &data, OUT_ADDRESS, 1,
1367                     opx->segment, opx->wrt);
1368             } else {
1369                 bytes[0] = opx->offset;
1370                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1371                     NO_SEG);
1372             }
1373             offset += 1;
1374             break;
1375
1376         case4(030):
1377             warn_overflow_opd(opx, 2);
1378             data = opx->offset;
1379             out(offset, segment, &data, OUT_ADDRESS, 2,
1380                 opx->segment, opx->wrt);
1381             offset += 2;
1382             break;
1383
1384         case4(034):
1385             if (opx->type & (BITS16 | BITS32))
1386                 size = (opx->type & BITS16) ? 2 : 4;
1387             else
1388                 size = (bits == 16) ? 2 : 4;
1389             warn_overflow_opd(opx, size);
1390             data = opx->offset;
1391             out(offset, segment, &data, OUT_ADDRESS, size,
1392                 opx->segment, opx->wrt);
1393             offset += size;
1394             break;
1395
1396         case4(040):
1397             warn_overflow_opd(opx, 4);
1398             data = opx->offset;
1399             out(offset, segment, &data, OUT_ADDRESS, 4,
1400                 opx->segment, opx->wrt);
1401             offset += 4;
1402             break;
1403
1404         case4(044):
1405             data = opx->offset;
1406             size = ins->addr_size >> 3;
1407             warn_overflow_opd(opx, size);
1408             out(offset, segment, &data, OUT_ADDRESS, size,
1409                 opx->segment, opx->wrt);
1410             offset += size;
1411             break;
1412
1413         case4(050):
1414             if (opx->segment != segment) {
1415                 data = opx->offset;
1416                 out(offset, segment, &data,
1417                     OUT_REL1ADR, insn_end - offset,
1418                     opx->segment, opx->wrt);
1419             } else {
1420                 data = opx->offset - insn_end;
1421                 if (data > 127 || data < -128)
1422                     errfunc(ERR_NONFATAL, "short jump is out of range");
1423                 out(offset, segment, &data,
1424                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1425             }
1426             offset += 1;
1427             break;
1428
1429         case4(054):
1430             data = (int64_t)opx->offset;
1431             out(offset, segment, &data, OUT_ADDRESS, 8,
1432                 opx->segment, opx->wrt);
1433             offset += 8;
1434             break;
1435
1436         case4(060):
1437             if (opx->segment != segment) {
1438                 data = opx->offset;
1439                 out(offset, segment, &data,
1440                     OUT_REL2ADR, insn_end - offset,
1441                     opx->segment, opx->wrt);
1442             } else {
1443                 data = opx->offset - insn_end;
1444                 out(offset, segment, &data,
1445                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1446             }
1447             offset += 2;
1448             break;
1449
1450         case4(064):
1451             if (opx->type & (BITS16 | BITS32 | BITS64))
1452                 size = (opx->type & BITS16) ? 2 : 4;
1453             else
1454                 size = (bits == 16) ? 2 : 4;
1455             if (opx->segment != segment) {
1456                 data = opx->offset;
1457                 out(offset, segment, &data,
1458                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1459                     insn_end - offset, opx->segment, opx->wrt);
1460             } else {
1461                 data = opx->offset - insn_end;
1462                 out(offset, segment, &data,
1463                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1464             }
1465             offset += size;
1466             break;
1467
1468         case4(070):
1469             if (opx->segment != segment) {
1470                 data = opx->offset;
1471                 out(offset, segment, &data,
1472                     OUT_REL4ADR, insn_end - offset,
1473                     opx->segment, opx->wrt);
1474             } else {
1475                 data = opx->offset - insn_end;
1476                 out(offset, segment, &data,
1477                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1478             }
1479             offset += 4;
1480             break;
1481
1482         case4(074):
1483             if (opx->segment == NO_SEG)
1484                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1485                         " relocatable");
1486             data = 0;
1487             out(offset, segment, &data, OUT_ADDRESS, 2,
1488                 outfmt->segbase(1 + opx->segment),
1489                 opx->wrt);
1490             offset += 2;
1491             break;
1492
1493         case4(0140):
1494             data = opx->offset;
1495             warn_overflow_opd(opx, 2);
1496             if (is_sbyte16(opx)) {
1497                 bytes[0] = data;
1498                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1499                     NO_SEG);
1500                 offset++;
1501             } else {
1502                 out(offset, segment, &data, OUT_ADDRESS, 2,
1503                     opx->segment, opx->wrt);
1504                 offset += 2;
1505             }
1506             break;
1507
1508         case4(0144):
1509             EMIT_REX();
1510             bytes[0] = *codes++;
1511             if (is_sbyte16(opx))
1512                 bytes[0] |= 2;  /* s-bit */
1513             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1514             offset++;
1515             break;
1516
1517         case4(0150):
1518             data = opx->offset;
1519             warn_overflow_opd(opx, 4);
1520             if (is_sbyte32(opx)) {
1521                 bytes[0] = data;
1522                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1523                     NO_SEG);
1524                 offset++;
1525             } else {
1526                 out(offset, segment, &data, OUT_ADDRESS, 4,
1527                     opx->segment, opx->wrt);
1528                 offset += 4;
1529             }
1530             break;
1531
1532         case4(0154):
1533             EMIT_REX();
1534             bytes[0] = *codes++;
1535             if (is_sbyte32(opx))
1536                 bytes[0] |= 2;  /* s-bit */
1537             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1538             offset++;
1539             break;
1540
1541         case 0172:
1542             c = *codes++;
1543             opx = &ins->oprs[c >> 3];
1544             bytes[0] = nasm_regvals[opx->basereg] << 4;
1545             opx = &ins->oprs[c & 7];
1546             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1547                 errfunc(ERR_NONFATAL,
1548                         "non-absolute expression not permitted as argument %d",
1549                         c & 7);
1550             } else {
1551                 if (opx->offset & ~15) {
1552                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1553                             "four-bit argument exceeds bounds");
1554                 }
1555                 bytes[0] |= opx->offset & 15;
1556             }
1557             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1558             offset++;
1559             break;
1560
1561         case 0173:
1562             c = *codes++;
1563             opx = &ins->oprs[c >> 4];
1564             bytes[0] = nasm_regvals[opx->basereg] << 4;
1565             bytes[0] |= c & 15;
1566             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1567             offset++;
1568             break;
1569
1570         case4(0174):
1571             bytes[0] = nasm_regvals[opx->basereg] << 4;
1572             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1573             offset++;
1574             break;
1575
1576         case4(0250):
1577             data = opx->offset;
1578             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1579                 (int32_t)data != (int64_t)data) {
1580                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1581                         "signed dword immediate exceeds bounds");
1582             }
1583             if (is_sbyte32(opx)) {
1584                 bytes[0] = data;
1585                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1586                     NO_SEG);
1587                 offset++;
1588             } else {
1589                 out(offset, segment, &data, OUT_ADDRESS, 4,
1590                     opx->segment, opx->wrt);
1591                 offset += 4;
1592             }
1593             break;
1594
1595         case4(0254):
1596             data = opx->offset;
1597             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1598                 (int32_t)data != (int64_t)data) {
1599                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1600                         "signed dword immediate exceeds bounds");
1601             }
1602             out(offset, segment, &data, OUT_ADDRESS, 4,
1603                 opx->segment, opx->wrt);
1604             offset += 4;
1605             break;
1606
1607         case4(0260):
1608         case 0270:
1609             codes += 2;
1610             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1611                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1612                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1613                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1614                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1615                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1616                 offset += 3;
1617             } else {
1618                 bytes[0] = 0xc5;
1619                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1620                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1621                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1622                 offset += 2;
1623             }
1624             break;
1625
1626         case 0271:
1627         case 0272:
1628         case 0273:
1629             break;
1630
1631         case4(0274):
1632         {
1633             uint64_t uv, um;
1634             int s;
1635
1636             if (ins->rex & REX_W)
1637                 s = 64;
1638             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1639                 s = 16;
1640             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1641                 s = 32;
1642             else
1643                 s = bits;
1644
1645             um = (uint64_t)2 << (s-1);
1646             uv = opx->offset;
1647
1648             if (uv > 127 && uv < (uint64_t)-128 &&
1649                 (uv < um-128 || uv > um-1)) {
1650                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1651                         "signed byte value exceeds bounds");
1652             }
1653             if (opx->segment != NO_SEG) {
1654                 data = uv;
1655                 out(offset, segment, &data, OUT_ADDRESS, 1,
1656                     opx->segment, opx->wrt);
1657             } else {
1658                 bytes[0] = uv;
1659                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1660                     NO_SEG);
1661             }
1662             offset += 1;
1663             break;
1664         }
1665
1666         case4(0300):
1667             break;
1668
1669         case 0310:
1670             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1671                 *bytes = 0x67;
1672                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1673                 offset += 1;
1674             } else
1675                 offset += 0;
1676             break;
1677
1678         case 0311:
1679             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1680                 *bytes = 0x67;
1681                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1682                 offset += 1;
1683             } else
1684                 offset += 0;
1685             break;
1686
1687         case 0312:
1688             break;
1689
1690         case 0313:
1691             ins->rex = 0;
1692             break;
1693
1694         case4(0314):
1695             break;
1696
1697         case 0320:
1698         case 0321:
1699             break;
1700
1701         case 0322:
1702         case 0323:
1703             break;
1704
1705         case 0324:
1706             ins->rex |= REX_W;
1707             break;
1708
1709         case 0325:
1710             break;
1711
1712         case 0330:
1713             *bytes = *codes++ ^ condval[ins->condition];
1714             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1715             offset += 1;
1716             break;
1717
1718         case 0331:
1719             break;
1720
1721         case 0332:
1722         case 0333:
1723             *bytes = c - 0332 + 0xF2;
1724             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1725             offset += 1;
1726             break;
1727
1728         case 0334:
1729             if (ins->rex & REX_R) {
1730                 *bytes = 0xF0;
1731                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1732                 offset += 1;
1733             }
1734             ins->rex &= ~(REX_L|REX_R);
1735             break;
1736
1737         case 0335:
1738             break;
1739
1740         case 0336:
1741         case 0337:
1742             break;
1743
1744         case 0340:
1745             if (ins->oprs[0].segment != NO_SEG)
1746                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1747             else {
1748                 int64_t size = ins->oprs[0].offset;
1749                 if (size > 0)
1750                     out(offset, segment, NULL,
1751                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1752                 offset += size;
1753             }
1754             break;
1755
1756         case 0341:
1757             break;
1758
1759         case 0344:
1760         case 0345:
1761             bytes[0] = c & 1;
1762             switch (ins->oprs[0].basereg) {
1763             case R_CS:
1764                 bytes[0] += 0x0E;
1765                 break;
1766             case R_DS:
1767                 bytes[0] += 0x1E;
1768                 break;
1769             case R_ES:
1770                 bytes[0] += 0x06;
1771                 break;
1772             case R_SS:
1773                 bytes[0] += 0x16;
1774                 break;
1775             default:
1776                 errfunc(ERR_PANIC,
1777                         "bizarre 8086 segment register received");
1778             }
1779             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1780             offset++;
1781             break;
1782
1783         case 0346:
1784         case 0347:
1785             bytes[0] = c & 1;
1786             switch (ins->oprs[0].basereg) {
1787             case R_FS:
1788                 bytes[0] += 0xA0;
1789                 break;
1790             case R_GS:
1791                 bytes[0] += 0xA8;
1792                 break;
1793             default:
1794                 errfunc(ERR_PANIC,
1795                         "bizarre 386 segment register received");
1796             }
1797             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1798             offset++;
1799             break;
1800
1801         case 0360:
1802             break;
1803
1804         case 0361:
1805             bytes[0] = 0x66;
1806             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1807             offset += 1;
1808             break;
1809
1810         case 0362:
1811         case 0363:
1812             bytes[0] = c - 0362 + 0xf2;
1813             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1814             offset += 1;
1815             break;
1816
1817         case 0364:
1818         case 0365:
1819             break;
1820
1821         case 0366:
1822         case 0367:
1823             *bytes = c - 0366 + 0x66;
1824             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1825             offset += 1;
1826             break;
1827
1828         case 0370:
1829         case 0371:
1830             break;
1831
1832         case 0373:
1833             *bytes = bits == 16 ? 3 : 5;
1834             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1835             offset += 1;
1836             break;
1837
1838         case 0374:
1839             eat = EA_XMMVSIB;
1840             break;
1841
1842         case 0375:
1843             eat = EA_YMMVSIB;
1844             break;
1845
1846         case4(0100):
1847         case4(0110):
1848         case4(0120):
1849         case4(0130):
1850         case4(0200):
1851         case4(0204):
1852         case4(0210):
1853         case4(0214):
1854         case4(0220):
1855         case4(0224):
1856         case4(0230):
1857         case4(0234):
1858             {
1859                 ea ea_data;
1860                 int rfield;
1861                 opflags_t rflags;
1862                 uint8_t *p;
1863                 int32_t s;
1864                 struct operand *opy = &ins->oprs[op2];
1865
1866                 if (c <= 0177) {
1867                     /* pick rfield from operand b (opx) */
1868                     rflags = regflag(opx);
1869                     rfield = nasm_regvals[opx->basereg];
1870                 } else {
1871                     /* rfield is constant */
1872                     rflags = 0;
1873                     rfield = c & 7;
1874                 }
1875
1876                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1877                                rfield, rflags) != eat)
1878                     errfunc(ERR_NONFATAL, "invalid effective address");
1879
1880                 p = bytes;
1881                 *p++ = ea_data.modrm;
1882                 if (ea_data.sib_present)
1883                     *p++ = ea_data.sib;
1884
1885                 s = p - bytes;
1886                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1887
1888                 /*
1889                  * Make sure the address gets the right offset in case
1890                  * the line breaks in the .lst file (BR 1197827)
1891                  */
1892                 offset += s;
1893                 s = 0;
1894
1895                 switch (ea_data.bytes) {
1896                 case 0:
1897                     break;
1898                 case 1:
1899                 case 2:
1900                 case 4:
1901                 case 8:
1902                     data = opy->offset;
1903                     s += ea_data.bytes;
1904                     if (ea_data.rip) {
1905                         if (opy->segment == segment) {
1906                             data -= insn_end;
1907                             if (overflow_signed(data, ea_data.bytes))
1908                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1909                             out(offset, segment, &data, OUT_ADDRESS,
1910                                 ea_data.bytes, NO_SEG, NO_SEG);
1911                         } else {
1912                             /* overflow check in output/linker? */
1913                             out(offset, segment, &data,        OUT_REL4ADR,
1914                                 insn_end - offset, opy->segment, opy->wrt);
1915                         }
1916                     } else {
1917                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1918                             signed_bits(opy->offset, ins->addr_size) !=
1919                             signed_bits(opy->offset, ea_data.bytes * 8))
1920                             warn_overflow(ERR_PASS2, ea_data.bytes);
1921
1922                         out(offset, segment, &data, OUT_ADDRESS,
1923                             ea_data.bytes, opy->segment, opy->wrt);
1924                     }
1925                     break;
1926                 default:
1927                     /* Impossible! */
1928                     errfunc(ERR_PANIC,
1929                             "Invalid amount of bytes (%d) for offset?!",
1930                             ea_data.bytes);
1931                     break;
1932                 }
1933                 offset += s;
1934             }
1935             break;
1936
1937         default:
1938             errfunc(ERR_PANIC, "internal instruction table corrupt"
1939                     ": instruction code \\%o (0x%02X) given", c, c);
1940             break;
1941         }
1942     }
1943 }
1944
1945 static opflags_t regflag(const operand * o)
1946 {
1947     if (!is_register(o->basereg))
1948         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1949     return nasm_reg_flags[o->basereg];
1950 }
1951
1952 static int32_t regval(const operand * o)
1953 {
1954     if (!is_register(o->basereg))
1955         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1956     return nasm_regvals[o->basereg];
1957 }
1958
1959 static int op_rexflags(const operand * o, int mask)
1960 {
1961     opflags_t flags;
1962     int val;
1963
1964     if (!is_register(o->basereg))
1965         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1966
1967     flags = nasm_reg_flags[o->basereg];
1968     val = nasm_regvals[o->basereg];
1969
1970     return rexflags(val, flags, mask);
1971 }
1972
1973 static int rexflags(int val, opflags_t flags, int mask)
1974 {
1975     int rex = 0;
1976
1977     if (val >= 8)
1978         rex |= REX_B|REX_X|REX_R;
1979     if (flags & BITS64)
1980         rex |= REX_W;
1981     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1982         rex |= REX_H;
1983     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1984         rex |= REX_P;
1985
1986     return rex & mask;
1987 }
1988
1989 static enum match_result find_match(const struct itemplate **tempp,
1990                                     insn *instruction,
1991                                     int32_t segment, int64_t offset, int bits)
1992 {
1993     const struct itemplate *temp;
1994     enum match_result m, merr;
1995     opflags_t xsizeflags[MAX_OPERANDS];
1996     bool opsizemissing = false;
1997     int i;
1998
1999     for (i = 0; i < instruction->operands; i++)
2000         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
2001
2002     merr = MERR_INVALOP;
2003
2004     for (temp = nasm_instructions[instruction->opcode];
2005          temp->opcode != I_none; temp++) {
2006         m = matches(temp, instruction, bits);
2007         if (m == MOK_JUMP) {
2008             if (jmp_match(segment, offset, bits, instruction, temp))
2009                 m = MOK_GOOD;
2010             else
2011                 m = MERR_INVALOP;
2012         } else if (m == MERR_OPSIZEMISSING &&
2013                    (temp->flags & IF_SMASK) != IF_SX) {
2014             /*
2015              * Missing operand size and a candidate for fuzzy matching...
2016              */
2017             for (i = 0; i < temp->operands; i++) {
2018                 if ((temp->opd[i] & SAME_AS) == 0)
2019                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
2020             }
2021             opsizemissing = true;
2022         }
2023         if (m > merr)
2024             merr = m;
2025         if (merr == MOK_GOOD)
2026             goto done;
2027     }
2028
2029     /* No match, but see if we can get a fuzzy operand size match... */
2030     if (!opsizemissing)
2031         goto done;
2032
2033     for (i = 0; i < instruction->operands; i++) {
2034         /*
2035          * We ignore extrinsic operand sizes on registers, so we should
2036          * never try to fuzzy-match on them.  This also resolves the case
2037          * when we have e.g. "xmmrm128" in two different positions.
2038          */
2039         if (is_class(REGISTER, instruction->oprs[i].type))
2040             continue;
2041
2042         /* This tests if xsizeflags[i] has more than one bit set */
2043         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2044             goto done;                /* No luck */
2045
2046         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2047     }
2048
2049     /* Try matching again... */
2050     for (temp = nasm_instructions[instruction->opcode];
2051          temp->opcode != I_none; temp++) {
2052         m = matches(temp, instruction, bits);
2053         if (m == MOK_JUMP) {
2054             if (jmp_match(segment, offset, bits, instruction, temp))
2055                 m = MOK_GOOD;
2056             else
2057                 m = MERR_INVALOP;
2058         }
2059         if (m > merr)
2060             merr = m;
2061         if (merr == MOK_GOOD)
2062             goto done;
2063     }
2064
2065 done:
2066     *tempp = temp;
2067     return merr;
2068 }
2069
2070 static enum match_result matches(const struct itemplate *itemp,
2071                                  insn *instruction, int bits)
2072 {
2073     opflags_t size[MAX_OPERANDS], asize;
2074     bool opsizemissing = false;
2075     int i, oprs;
2076
2077     /*
2078      * Check the opcode
2079      */
2080     if (itemp->opcode != instruction->opcode)
2081         return MERR_INVALOP;
2082
2083     /*
2084      * Count the operands
2085      */
2086     if (itemp->operands != instruction->operands)
2087         return MERR_INVALOP;
2088
2089     /*
2090      * Is it legal?
2091      */
2092     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2093         return MERR_INVALOP;
2094
2095     /*
2096      * Check that no spurious colons or TOs are present
2097      */
2098     for (i = 0; i < itemp->operands; i++)
2099         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2100             return MERR_INVALOP;
2101
2102     /*
2103      * Process size flags
2104      */
2105     switch (itemp->flags & IF_SMASK) {
2106     case IF_SB:
2107         asize = BITS8;
2108         break;
2109     case IF_SW:
2110         asize = BITS16;
2111         break;
2112     case IF_SD:
2113         asize = BITS32;
2114         break;
2115     case IF_SQ:
2116         asize = BITS64;
2117         break;
2118     case IF_SO:
2119         asize = BITS128;
2120         break;
2121     case IF_SY:
2122         asize = BITS256;
2123         break;
2124     case IF_SZ:
2125         switch (bits) {
2126         case 16:
2127             asize = BITS16;
2128             break;
2129         case 32:
2130             asize = BITS32;
2131             break;
2132         case 64:
2133             asize = BITS64;
2134             break;
2135         default:
2136             asize = 0;
2137             break;
2138         }
2139         break;
2140     default:
2141         asize = 0;
2142         break;
2143     }
2144
2145     if (itemp->flags & IF_ARMASK) {
2146         /* S- flags only apply to a specific operand */
2147         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2148         memset(size, 0, sizeof size);
2149         size[i] = asize;
2150     } else {
2151         /* S- flags apply to all operands */
2152         for (i = 0; i < MAX_OPERANDS; i++)
2153             size[i] = asize;
2154     }
2155
2156     /*
2157      * Check that the operand flags all match up,
2158      * it's a bit tricky so lets be verbose:
2159      *
2160      * 1) Find out the size of operand. If instruction
2161      *    doesn't have one specified -- we're trying to
2162      *    guess it either from template (IF_S* flag) or
2163      *    from code bits.
2164      *
2165      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2166      *    (ie the same operand as was specified somewhere in template, and
2167      *    this referred operand index is being achieved via ~SAME_AS)
2168      *    we are to be sure that both registers (in template and instruction)
2169      *    do exactly match.
2170      *
2171      * 3) If template operand do not match the instruction OR
2172      *    template has an operand size specified AND this size differ
2173      *    from which instruction has (perhaps we got it from code bits)
2174      *    we are:
2175      *      a)  Check that only size of instruction and operand is differ
2176      *          other characteristics do match
2177      *      b)  Perhaps it's a register specified in instruction so
2178      *          for such a case we just mark that operand as "size
2179      *          missing" and this will turn on fuzzy operand size
2180      *          logic facility (handled by a caller)
2181      */
2182     for (i = 0; i < itemp->operands; i++) {
2183         opflags_t type = instruction->oprs[i].type;
2184         if (!(type & SIZE_MASK))
2185             type |= size[i];
2186
2187         if (itemp->opd[i] & SAME_AS) {
2188             int j = itemp->opd[i] & ~SAME_AS;
2189             if (type != instruction->oprs[j].type ||
2190                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2191                 return MERR_INVALOP;
2192         } else if (itemp->opd[i] & ~type ||
2193             ((itemp->opd[i] & SIZE_MASK) &&
2194              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2195             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2196                 return MERR_INVALOP;
2197             } else if (!is_class(REGISTER, type)) {
2198                 /*
2199                  * Note: we don't honor extrinsic operand sizes for registers,
2200                  * so "missing operand size" for a register should be
2201                  * considered a wildcard match rather than an error.
2202                  */
2203                 opsizemissing = true;
2204             }
2205         }
2206     }
2207
2208     if (opsizemissing)
2209         return MERR_OPSIZEMISSING;
2210
2211     /*
2212      * Check operand sizes
2213      */
2214     if (itemp->flags & (IF_SM | IF_SM2)) {
2215         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2216         for (i = 0; i < oprs; i++) {
2217             asize = itemp->opd[i] & SIZE_MASK;
2218             if (asize) {
2219                 for (i = 0; i < oprs; i++)
2220                     size[i] = asize;
2221                 break;
2222             }
2223         }
2224     } else {
2225         oprs = itemp->operands;
2226     }
2227
2228     for (i = 0; i < itemp->operands; i++) {
2229         if (!(itemp->opd[i] & SIZE_MASK) &&
2230             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2231             return MERR_OPSIZEMISMATCH;
2232     }
2233
2234     /*
2235      * Check template is okay at the set cpu level
2236      */
2237     if (((itemp->flags & IF_PLEVEL) > cpu))
2238         return MERR_BADCPU;
2239
2240     /*
2241      * Verify the appropriate long mode flag.
2242      */
2243     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2244         return MERR_BADMODE;
2245
2246     /*
2247      * If we have a HLE prefix, look for the NOHLE flag
2248      */
2249     if ((itemp->flags & IF_NOHLE) &&
2250         (has_prefix(instruction, PPS_REP, P_XACQUIRE) ||
2251          has_prefix(instruction, PPS_REP, P_XRELEASE)))
2252         return MERR_BADHLE;
2253
2254     /*
2255      * Check if special handling needed for Jumps
2256      */
2257     if ((itemp->code[0] & ~1) == 0370)
2258         return MOK_JUMP;
2259
2260     return MOK_GOOD;
2261 }
2262
2263 static enum ea_type process_ea(operand *input, ea *output, int bits,
2264                                int addrbits, int rfield, opflags_t rflags)
2265 {
2266     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2267
2268     output->type    = EA_SCALAR;
2269     output->rip     = false;
2270
2271     /* REX flags for the rfield operand */
2272     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2273
2274     if (is_class(REGISTER, input->type)) {
2275         /*
2276          * It's a direct register.
2277          */
2278         opflags_t f;
2279
2280         if (!is_register(input->basereg))
2281             goto err;
2282
2283         f = regflag(input);
2284
2285         if (!is_class(REG_EA, f))
2286             goto err;
2287
2288         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2289         output->sib_present = false;    /* no SIB necessary */
2290         output->bytes       = 0;        /* no offset necessary either */
2291         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
2292     } else {
2293         /*
2294          * It's a memory reference.
2295          */
2296         if (input->basereg == -1 &&
2297             (input->indexreg == -1 || input->scale == 0)) {
2298             /*
2299              * It's a pure offset.
2300              */
2301             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2302                 input->segment == NO_SEG) {
2303                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2304                 input->type &= ~IP_REL;
2305                 input->type |= MEMORY;
2306             }
2307
2308             if (input->eaflags & EAF_BYTEOFFS ||
2309                 (input->eaflags & EAF_WORDOFFS &&
2310                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2311                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2312             }
2313
2314             if (bits == 64 && (~input->type & IP_REL)) {
2315                 output->sib_present = true;
2316                 output->sib         = GEN_SIB(0, 4, 5);
2317                 output->bytes       = 4;
2318                 output->modrm       = GEN_MODRM(0, rfield, 4);
2319                 output->rip         = false;
2320             } else {
2321                 output->sib_present = false;
2322                 output->bytes       = (addrbits != 16 ? 4 : 2);
2323                 output->modrm       = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
2324                 output->rip         = bits == 64;
2325             }
2326         } else {
2327             /*
2328              * It's an indirection.
2329              */
2330             int i = input->indexreg, b = input->basereg, s = input->scale;
2331             int32_t seg = input->segment;
2332             int hb = input->hintbase, ht = input->hinttype;
2333             int t, it, bt;              /* register numbers */
2334             opflags_t x, ix, bx;        /* register flags */
2335
2336             if (s == 0)
2337                 i = -1;         /* make this easy, at least */
2338
2339             if (is_register(i)) {
2340                 it = nasm_regvals[i];
2341                 ix = nasm_reg_flags[i];
2342             } else {
2343                 it = -1;
2344                 ix = 0;
2345             }
2346
2347             if (is_register(b)) {
2348                 bt = nasm_regvals[b];
2349                 bx = nasm_reg_flags[b];
2350             } else {
2351                 bt = -1;
2352                 bx = 0;
2353             }
2354
2355             /* if either one are a vector register... */
2356             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2357                 opflags_t sok = BITS32 | BITS64;
2358                 int32_t o = input->offset;
2359                 int mod, scale, index, base;
2360
2361                 /*
2362                  * For a vector SIB, one has to be a vector and the other,
2363                  * if present, a GPR.  The vector must be the index operand.
2364                  */
2365                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2366                     if (s == 0)
2367                         s = 1;
2368                     else if (s != 1)
2369                         goto err;
2370
2371                     t = bt, bt = it, it = t;
2372                     x = bx, bx = ix, ix = x;
2373                 }
2374
2375                 if (bt != -1) {
2376                     if (REG_GPR & ~bx)
2377                         goto err;
2378                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2379                         sok &= bx;
2380                     else
2381                         goto err;
2382                 }
2383
2384                 /*
2385                  * While we're here, ensure the user didn't specify
2386                  * WORD or QWORD
2387                  */
2388                 if (input->disp_size == 16 || input->disp_size == 64)
2389                     goto err;
2390
2391                 if (addrbits == 16 ||
2392                     (addrbits == 32 && !(sok & BITS32)) ||
2393                     (addrbits == 64 && !(sok & BITS64)))
2394                     goto err;
2395
2396                 output->type = (ix & YMMREG & ~REG_EA)
2397                     ? EA_YMMVSIB : EA_XMMVSIB;
2398
2399                 output->rex |= rexflags(it, ix, REX_X);
2400                 output->rex |= rexflags(bt, bx, REX_B);
2401
2402                 index = it & 7; /* it is known to be != -1 */
2403
2404                 switch (s) {
2405                 case 1:
2406                     scale = 0;
2407                     break;
2408                 case 2:
2409                     scale = 1;
2410                     break;
2411                 case 4:
2412                     scale = 2;
2413                     break;
2414                 case 8:
2415                     scale = 3;
2416                     break;
2417                 default:   /* then what the smeg is it? */
2418                     goto err;    /* panic */
2419                 }
2420
2421                 if (bt == -1) {
2422                     base = 5;
2423                     mod = 0;
2424                 } else {
2425                     base = (bt & 7);
2426                     if (base != REG_NUM_EBP && o == 0 &&
2427                         seg == NO_SEG && !forw_ref &&
2428                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2429                         mod = 0;
2430                     else if (input->eaflags & EAF_BYTEOFFS ||
2431                              (o >= -128 && o <= 127 &&
2432                               seg == NO_SEG && !forw_ref &&
2433                               !(input->eaflags & EAF_WORDOFFS)))
2434                         mod = 1;
2435                     else
2436                         mod = 2;
2437                 }
2438
2439                 output->sib_present = true;
2440                 output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2441                 output->modrm       = GEN_MODRM(mod, rfield, 4);
2442                 output->sib         = GEN_SIB(scale, index, base);
2443             } else if ((ix|bx) & (BITS32|BITS64)) {
2444                 /*
2445                  * it must be a 32/64-bit memory reference. Firstly we have
2446                  * to check that all registers involved are type E/Rxx.
2447                  */
2448                 opflags_t sok = BITS32 | BITS64;
2449                 int32_t o = input->offset;
2450
2451                 if (it != -1) {
2452                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2453                         sok &= ix;
2454                     else
2455                         goto err;
2456                 }
2457
2458                 if (bt != -1) {
2459                     if (REG_GPR & ~bx)
2460                         goto err; /* Invalid register */
2461                     if (~sok & bx & SIZE_MASK)
2462                         goto err; /* Invalid size */
2463                     sok &= bx;
2464                 }
2465
2466                 /*
2467                  * While we're here, ensure the user didn't specify
2468                  * WORD or QWORD
2469                  */
2470                 if (input->disp_size == 16 || input->disp_size == 64)
2471                     goto err;
2472
2473                 if (addrbits == 16 ||
2474                     (addrbits == 32 && !(sok & BITS32)) ||
2475                     (addrbits == 64 && !(sok & BITS64)))
2476                     goto err;
2477
2478                 /* now reorganize base/index */
2479                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2480                     ((hb == b && ht == EAH_NOTBASE) ||
2481                      (hb == i && ht == EAH_MAKEBASE))) {
2482                     /* swap if hints say so */
2483                     t = bt, bt = it, it = t;
2484                     x = bx, bx = ix, ix = x;
2485                 }
2486                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2487                     bt = -1, bx = 0, s++;
2488                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2489                     /* make single reg base, unless hint */
2490                     bt = it, bx = ix, it = -1, ix = 0;
2491                 }
2492                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2493                       s == 3 || s == 5 || s == 9) && bt == -1)
2494                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2495                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2496                     (input->eaflags & EAF_TIMESTWO))
2497                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2498                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2499                 if (s == 1 && it == REG_NUM_ESP) {
2500                     /* swap ESP into base if scale is 1 */
2501                     t = it, it = bt, bt = t;
2502                     x = ix, ix = bx, bx = x;
2503                 }
2504                 if (it == REG_NUM_ESP ||
2505                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2506                     goto err;        /* wrong, for various reasons */
2507
2508                 output->rex |= rexflags(it, ix, REX_X);
2509                 output->rex |= rexflags(bt, bx, REX_B);
2510
2511                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2512                     /* no SIB needed */
2513                     int mod, rm;
2514
2515                     if (bt == -1) {
2516                         rm = 5;
2517                         mod = 0;
2518                     } else {
2519                         rm = (bt & 7);
2520                         if (rm != REG_NUM_EBP && o == 0 &&
2521                             seg == NO_SEG && !forw_ref &&
2522                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2523                             mod = 0;
2524                         else if (input->eaflags & EAF_BYTEOFFS ||
2525                                  (o >= -128 && o <= 127 &&
2526                                   seg == NO_SEG && !forw_ref &&
2527                                   !(input->eaflags & EAF_WORDOFFS)))
2528                             mod = 1;
2529                         else
2530                             mod = 2;
2531                     }
2532
2533                     output->sib_present = false;
2534                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2535                     output->modrm       = GEN_MODRM(mod, rfield, rm);
2536                 } else {
2537                     /* we need a SIB */
2538                     int mod, scale, index, base;
2539
2540                     if (it == -1)
2541                         index = 4, s = 1;
2542                     else
2543                         index = (it & 7);
2544
2545                     switch (s) {
2546                     case 1:
2547                         scale = 0;
2548                         break;
2549                     case 2:
2550                         scale = 1;
2551                         break;
2552                     case 4:
2553                         scale = 2;
2554                         break;
2555                     case 8:
2556                         scale = 3;
2557                         break;
2558                     default:   /* then what the smeg is it? */
2559                         goto err;    /* panic */
2560                     }
2561
2562                     if (bt == -1) {
2563                         base = 5;
2564                         mod = 0;
2565                     } else {
2566                         base = (bt & 7);
2567                         if (base != REG_NUM_EBP && o == 0 &&
2568                             seg == NO_SEG && !forw_ref &&
2569                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2570                             mod = 0;
2571                         else if (input->eaflags & EAF_BYTEOFFS ||
2572                                  (o >= -128 && o <= 127 &&
2573                                   seg == NO_SEG && !forw_ref &&
2574                                   !(input->eaflags & EAF_WORDOFFS)))
2575                             mod = 1;
2576                         else
2577                             mod = 2;
2578                     }
2579
2580                     output->sib_present = true;
2581                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2582                     output->modrm       = GEN_MODRM(mod, rfield, 4);
2583                     output->sib         = GEN_SIB(scale, index, base);
2584                 }
2585             } else {            /* it's 16-bit */
2586                 int mod, rm;
2587                 int16_t o = input->offset;
2588
2589                 /* check for 64-bit long mode */
2590                 if (addrbits == 64)
2591                     goto err;
2592
2593                 /* check all registers are BX, BP, SI or DI */
2594                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2595                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2596                     goto err;
2597
2598                 /* ensure the user didn't specify DWORD/QWORD */
2599                 if (input->disp_size == 32 || input->disp_size == 64)
2600                     goto err;
2601
2602                 if (s != 1 && i != -1)
2603                     goto err;        /* no can do, in 16-bit EA */
2604                 if (b == -1 && i != -1) {
2605                     int tmp = b;
2606                     b = i;
2607                     i = tmp;
2608                 }               /* swap */
2609                 if ((b == R_SI || b == R_DI) && i != -1) {
2610                     int tmp = b;
2611                     b = i;
2612                     i = tmp;
2613                 }
2614                 /* have BX/BP as base, SI/DI index */
2615                 if (b == i)
2616                     goto err;        /* shouldn't ever happen, in theory */
2617                 if (i != -1 && b != -1 &&
2618                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2619                     goto err;        /* invalid combinations */
2620                 if (b == -1)            /* pure offset: handled above */
2621                     goto err;        /* so if it gets to here, panic! */
2622
2623                 rm = -1;
2624                 if (i != -1)
2625                     switch (i * 256 + b) {
2626                     case R_SI * 256 + R_BX:
2627                         rm = 0;
2628                         break;
2629                     case R_DI * 256 + R_BX:
2630                         rm = 1;
2631                         break;
2632                     case R_SI * 256 + R_BP:
2633                         rm = 2;
2634                         break;
2635                     case R_DI * 256 + R_BP:
2636                         rm = 3;
2637                         break;
2638                 } else
2639                     switch (b) {
2640                     case R_SI:
2641                         rm = 4;
2642                         break;
2643                     case R_DI:
2644                         rm = 5;
2645                         break;
2646                     case R_BP:
2647                         rm = 6;
2648                         break;
2649                     case R_BX:
2650                         rm = 7;
2651                         break;
2652                     }
2653                 if (rm == -1)           /* can't happen, in theory */
2654                     goto err;        /* so panic if it does */
2655
2656                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2657                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2658                     mod = 0;
2659                 else if (input->eaflags & EAF_BYTEOFFS ||
2660                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2661                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2662                     mod = 1;
2663                 else
2664                     mod = 2;
2665
2666                 output->sib_present = false;    /* no SIB - it's 16-bit */
2667                 output->bytes       = mod;      /* bytes of offset needed */
2668                 output->modrm       = GEN_MODRM(mod, rfield, rm);
2669             }
2670         }
2671     }
2672
2673     output->size = 1 + output->sib_present + output->bytes;
2674     return output->type;
2675
2676 err:
2677     return output->type = EA_INVALID;
2678 }
2679
2680 static void add_asp(insn *ins, int addrbits)
2681 {
2682     int j, valid;
2683     int defdisp;
2684
2685     valid = (addrbits == 64) ? 64|32 : 32|16;
2686
2687     switch (ins->prefixes[PPS_ASIZE]) {
2688     case P_A16:
2689         valid &= 16;
2690         break;
2691     case P_A32:
2692         valid &= 32;
2693         break;
2694     case P_A64:
2695         valid &= 64;
2696         break;
2697     case P_ASP:
2698         valid &= (addrbits == 32) ? 16 : 32;
2699         break;
2700     default:
2701         break;
2702     }
2703
2704     for (j = 0; j < ins->operands; j++) {
2705         if (is_class(MEMORY, ins->oprs[j].type)) {
2706             opflags_t i, b;
2707
2708             /* Verify as Register */
2709             if (!is_register(ins->oprs[j].indexreg))
2710                 i = 0;
2711             else
2712                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2713
2714             /* Verify as Register */
2715             if (!is_register(ins->oprs[j].basereg))
2716                 b = 0;
2717             else
2718                 b = nasm_reg_flags[ins->oprs[j].basereg];
2719
2720             if (ins->oprs[j].scale == 0)
2721                 i = 0;
2722
2723             if (!i && !b) {
2724                 int ds = ins->oprs[j].disp_size;
2725                 if ((addrbits != 64 && ds > 8) ||
2726                     (addrbits == 64 && ds == 16))
2727                     valid &= ds;
2728             } else {
2729                 if (!(REG16 & ~b))
2730                     valid &= 16;
2731                 if (!(REG32 & ~b))
2732                     valid &= 32;
2733                 if (!(REG64 & ~b))
2734                     valid &= 64;
2735
2736                 if (!(REG16 & ~i))
2737                     valid &= 16;
2738                 if (!(REG32 & ~i))
2739                     valid &= 32;
2740                 if (!(REG64 & ~i))
2741                     valid &= 64;
2742             }
2743         }
2744     }
2745
2746     if (valid & addrbits) {
2747         ins->addr_size = addrbits;
2748     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2749         /* Add an address size prefix */
2750         ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
2751         ins->addr_size = (addrbits == 32) ? 16 : 32;
2752     } else {
2753         /* Impossible... */
2754         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2755         ins->addr_size = addrbits; /* Error recovery */
2756     }
2757
2758     defdisp = ins->addr_size == 16 ? 16 : 32;
2759
2760     for (j = 0; j < ins->operands; j++) {
2761         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2762             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2763             /*
2764              * mem_offs sizes must match the address size; if not,
2765              * strip the MEM_OFFS bit and match only EA instructions
2766              */
2767             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2768         }
2769     }
2770 }