assemble.c

   1 /* ----------------------------------------------------------------------- *
   2  *
   3  *   Copyright 1996-2012 The NASM Authors - All Rights Reserved
   4  *   See the file AUTHORS included with the NASM distribution for
   5  *   the specific copyright holders.
   6  *
   7  *   Redistribution and use in source and binary forms, with or without
   8  *   modification, are permitted provided that the following
   9  *   conditions are met:
  10  *
  11  *   * Redistributions of source code must retain the above copyright
  12  *     notice, this list of conditions and the following disclaimer.
  13  *   * Redistributions in binary form must reproduce the above
  14  *     copyright notice, this list of conditions and the following
  15  *     disclaimer in the documentation and/or other materials provided
  16  *     with the distribution.
  17  *
  18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  *
  32  * ----------------------------------------------------------------------- */
  33
  34 /*
  35  * assemble.c   code generation for the Netwide Assembler
  36  *
  37  * the actual codes (C syntax, i.e. octal):
  38  * \0            - terminates the code. (Unless it's a literal of course.)
  39  * \1..\4        - that many literal bytes follow in the code stream
  40  * \5            - add 4 to the primary operand number (b, low octdigit)
  41  * \6            - add 4 to the secondary operand number (a, middle octdigit)
  42  * \7            - add 4 to both the primary and the secondary operand number
  43  * \10..\13      - a literal byte follows in the code stream, to be added
  44  *                 to the register value of operand 0..3
  45  * \14..\17      - a signed byte immediate operand, from operand 0..3
  46  * \20..\23      - a byte immediate operand, from operand 0..3
  47  * \24..\27      - an unsigned byte immediate operand, from operand 0..3
  48  * \30..\33      - a word immediate operand, from operand 0..3
  49  * \34..\37      - select between \3[0-3] and \4[0-3] depending on 16/32 bit
  50  *                 assembly mode or the operand-size override on the operand
  51  * \40..\43      - a long immediate operand, from operand 0..3
  52  * \44..\47      - select between \3[0-3], \4[0-3] and \5[4-7]
  53  *                 depending on the address size of the instruction.
  54  * \50..\53      - a byte relative operand, from operand 0..3
  55  * \54..\57      - a qword immediate operand, from operand 0..3
  56  * \60..\63      - a word relative operand, from operand 0..3
  57  * \64..\67      - select between \6[0-3] and \7[0-3] depending on 16/32 bit
  58  *                 assembly mode or the operand-size override on the operand
  59  * \70..\73      - a long relative operand, from operand 0..3
  60  * \74..\77      - a word constant, from the _segment_ part of operand 0..3
  61  * \1ab          - a ModRM, calculated on EA in operand a, with the spare
  62  *                 field the register value of operand b.
  63  * \140..\143    - an immediate word or signed byte for operand 0..3
  64  * \144..\147    - or 2 (s-field) into opcode byte if operand 0..3
  65  *                  is a signed byte rather than a word.  Opcode byte follows.
  66  * \150..\153    - an immediate dword or signed byte for operand 0..3
  67  * \154..\157    - or 2 (s-field) into opcode byte if operand 0..3
  68  *                  is a signed byte rather than a dword.  Opcode byte follows.
  69  * \172\ab       - the register number from operand a in bits 7..4, with
  70  *                 the 4-bit immediate from operand b in bits 3..0.
  71  * \173\xab      - the register number from operand a in bits 7..4, with
  72  *                 the value b in bits 3..0.
  73  * \174..\177    - the register number from operand 0..3 in bits 7..4, and
  74  *                 an arbitrary value in bits 3..0 (assembled as zero.)
  75  * \2ab          - a ModRM, calculated on EA in operand a, with the spare
  76  *                 field equal to digit b.
  77  * \250..\253    - same as \150..\153, except warn if the 64-bit operand
  78  *                 is not equal to the truncated and sign-extended 32-bit
  79  *                 operand; used for 32-bit immediates in 64-bit mode.
  80  * \254..\257    - a signed 32-bit operand to be extended to 64 bits.
  81  * \260..\263    - this instruction uses VEX/XOP rather than REX, with the
  82  *                 V field taken from operand 0..3.
  83  * \264          - skip this instruction pattern if HLE prefixes present
  84  * \265          - instruction takes XRELEASE (F3) with or without lock
  85  * \266          - instruction takes XACQUIRE/XRELEASE with or without lock
  86  * \267          - instruction takes XACQUIRE/XRELEASE with lock only
  87  * \270          - this instruction uses VEX/XOP rather than REX, with the
  88  *                 V field set to 1111b.
  89  *
  90  * VEX/XOP prefixes are followed by the sequence:
  91  * \tmm\wlp        where mm is the M field; and wlp is:
  92  *                 00 wwl lpp
  93  *                 [l0]  ll = 0 for L = 0 (.128, .lz)
  94  *                 [l1]  ll = 1 for L = 1 (.256)
  95  *                 [lig] ll = 2 for L don't care (always assembled as 0)
  96  *
  97  *                 [w0]  ww = 0 for W = 0
  98  *                 [w1 ] ww = 1 for W = 1
  99  *                 [wig] ww = 2 for W don't care (always assembled as 0)
 100  *                 [ww]  ww = 3 for W used as REX.W
 101  *
 102  * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
 103  *
 104  * \274..\277    - a signed byte immediate operand, from operand 0..3,
 105  *                 which is to be extended to the operand size.
 106  * \310          - indicates fixed 16-bit address size, i.e. optional 0x67.
 107  * \311          - indicates fixed 32-bit address size, i.e. optional 0x67.
 108  * \312          - (disassembler only) invalid with non-default address size.
 109  * \313          - indicates fixed 64-bit address size, 0x67 invalid.
 110  * \314          - (disassembler only) invalid with REX.B
 111  * \315          - (disassembler only) invalid with REX.X
 112  * \316          - (disassembler only) invalid with REX.R
 113  * \317          - (disassembler only) invalid with REX.W
 114  * \320          - indicates fixed 16-bit operand size, i.e. optional 0x66.
 115  * \321          - indicates fixed 32-bit operand size, i.e. optional 0x66.
 116  * \322          - indicates that this instruction is only valid when the
 117  *                 operand size is the default (instruction to disassembler,
 118  *                 generates no code in the assembler)
 119  * \323          - indicates fixed 64-bit operand size, REX on extensions only.
 120  * \324          - indicates 64-bit operand size requiring REX prefix.
 121  * \325          - instruction which always uses spl/bpl/sil/dil
 122  * \330          - a literal byte follows in the code stream, to be added
 123  *                 to the condition code value of the instruction.
 124  * \331          - instruction not valid with REP prefix.  Hint for
 125  *                 disassembler only; for SSE instructions.
 126  * \332          - REP prefix (0xF2 byte) used as opcode extension.
 127  * \333          - REP prefix (0xF3 byte) used as opcode extension.
 128  * \334          - LOCK prefix used as REX.R (used in non-64-bit mode)
 129  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep.
 130  * \336          - force a REP(E) prefix (0xF3) even if not specified.
 131  * \337          - force a REPNE prefix (0xF2) even if not specified.
 132  *                 \336-\337 are still listed as prefixes in the disassembler.
 133  * \340          - reserve <operand 0> bytes of uninitialized storage.
 134  *                 Operand 0 had better be a segmentless constant.
 135  * \341          - this instruction needs a WAIT "prefix"
 136  * \344,\345     - the PUSH/POP (respectively) codes for CS, DS, ES, SS
 137  *                 (POP is never used for CS) depending on operand 0
 138  * \346,\347     - the second byte of PUSH/POP codes for FS, GS, depending
 139  *                 on operand 0
 140  * \360          - no SSE prefix (== \364\331)
 141  * \361          - 66 SSE prefix (== \366\331)
 142  * \362          - F2 SSE prefix (== \364\332)
 143  * \363          - F3 SSE prefix (== \364\333)
 144  * \364          - operand-size prefix (0x66) not permitted
 145  * \365          - address-size prefix (0x67) not permitted
 146  * \366          - operand-size prefix (0x66) used as opcode extension
 147  * \367          - address-size prefix (0x67) used as opcode extension
 148  * \370,\371     - match only if operand 0 meets byte jump criteria.
 149  *                 370 is used for Jcc, 371 is used for JMP.
 150  * \373          - assemble 0x03 if bits==16, 0x05 if bits==32;
 151  *                 used for conditional jump over longer jump
 152  * \374          - this instruction takes an XMM VSIB memory EA
 153  * \375          - this instruction takes an YMM VSIB memory EA
 154  */
 155
 156 #include "compiler.h"
 157
 158 #include <stdio.h>
 159 #include <string.h>
 160 #include <inttypes.h>
 161
 162 #include "nasm.h"
 163 #include "nasmlib.h"
 164 #include "assemble.h"
 165 #include "insns.h"
 166 #include "tables.h"
 167
 168 enum match_result {
 169     /*
 170      * Matching errors.  These should be sorted so that more specific
 171      * errors come later in the sequence.
 172      */
 173     MERR_INVALOP,
 174     MERR_OPSIZEMISSING,
 175     MERR_OPSIZEMISMATCH,
 176     MERR_BADCPU,
 177     MERR_BADMODE,
 178     MERR_BADHLE,
 179     /*
 180      * Matching success; the conditional ones first
 181      */
 182     MOK_JUMP,   /* Matching OK but needs jmp_match() */
 183     MOK_GOOD    /* Matching unconditionally OK */
 184 };
 185
 186 typedef struct {
 187     enum ea_type type;            /* what kind of EA is this? */
 188     int sib_present;              /* is a SIB byte necessary? */
 189     int bytes;                    /* # of bytes of offset needed */
 190     int size;                     /* lazy - this is sib+bytes+1 */
 191     uint8_t modrm, sib, rex, rip; /* the bytes themselves */
 192 } ea;
 193
 194 #define GEN_SIB(scale, index, base)                 \
 195         (((scale) << 6) | ((index) << 3) | ((base)))
 196
 197 #define GEN_MODRM(mod, reg, rm)                     \
 198         (((mod) << 6) | (((reg) & 7) << 3) | ((rm) & 7))
 199
 200 static uint32_t cpu;            /* cpu level received from nasm.c */
 201 static efunc errfunc;
 202 static struct ofmt *outfmt;
 203 static ListGen *list;
 204
 205 static int64_t calcsize(int32_t, int64_t, int, insn *,
 206                         const struct itemplate *);
 207 static void gencode(int32_t segment, int64_t offset, int bits,
 208                     insn * ins, const struct itemplate *temp,
 209                     int64_t insn_end);
 210 static enum match_result find_match(const struct itemplate **tempp,
 211                                     insn *instruction,
 212                                     int32_t segment, int64_t offset, int bits);
 213 static enum match_result matches(const struct itemplate *, insn *, int bits);
 214 static opflags_t regflag(const operand *);
 215 static int32_t regval(const operand *);
 216 static int rexflags(int, opflags_t, int);
 217 static int op_rexflags(const operand *, int);
 218 static void add_asp(insn *, int);
 219
 220 static enum ea_type process_ea(operand *, ea *, int, int, int, opflags_t);
 221
 222 static int has_prefix(insn * ins, enum prefix_pos pos, int prefix)
 223 {
 224     return ins->prefixes[pos] == prefix;
 225 }
 226
 227 static void assert_no_prefix(insn * ins, enum prefix_pos pos)
 228 {
 229     if (ins->prefixes[pos])
 230         errfunc(ERR_NONFATAL, "invalid %s prefix",
 231                 prefix_name(ins->prefixes[pos]));
 232 }
 233
 234 static const char *size_name(int size)
 235 {
 236     switch (size) {
 237     case 1:
 238         return "byte";
 239     case 2:
 240         return "word";
 241     case 4:
 242         return "dword";
 243     case 8:
 244         return "qword";
 245     case 10:
 246         return "tword";
 247     case 16:
 248         return "oword";
 249     case 32:
 250         return "yword";
 251     default:
 252         return "???";
 253     }
 254 }
 255
 256 static void warn_overflow(int pass, int size)
 257 {
 258     errfunc(ERR_WARNING | pass | ERR_WARN_NOV,
 259             "%s data exceeds bounds", size_name(size));
 260 }
 261
 262 static void warn_overflow_const(int64_t data, int size)
 263 {
 264     if (overflow_general(data, size))
 265         warn_overflow(ERR_PASS1, size);
 266 }
 267
 268 static void warn_overflow_opd(const struct operand *o, int size)
 269 {
 270     if (o->wrt == NO_SEG && o->segment == NO_SEG) {
 271         if (overflow_general(o->offset, size))
 272             warn_overflow(ERR_PASS2, size);
 273     }
 274 }
 275
 276 /*
 277  * This routine wrappers the real output format's output routine,
 278  * in order to pass a copy of the data off to the listing file
 279  * generator at the same time.
 280  */
 281 static void out(int64_t offset, int32_t segto, const void *data,
 282                 enum out_type type, uint64_t size,
 283                 int32_t segment, int32_t wrt)
 284 {
 285     static int32_t lineno = 0;     /* static!!! */
 286     static char *lnfname = NULL;
 287     uint8_t p[8];
 288
 289     if (type == OUT_ADDRESS && segment == NO_SEG && wrt == NO_SEG) {
 290         /*
 291          * This is a non-relocated address, and we're going to
 292          * convert it into RAWDATA format.
 293          */
 294         uint8_t *q = p;
 295
 296         if (size > 8) {
 297             errfunc(ERR_PANIC, "OUT_ADDRESS with size > 8");
 298             return;
 299         }
 300
 301         WRITEADDR(q, *(int64_t *)data, size);
 302         data = p;
 303         type = OUT_RAWDATA;
 304     }
 305
 306     list->output(offset, data, type, size);
 307
 308     /*
 309      * this call to src_get determines when we call the
 310      * debug-format-specific "linenum" function
 311      * it updates lineno and lnfname to the current values
 312      * returning 0 if "same as last time", -2 if lnfname
 313      * changed, and the amount by which lineno changed,
 314      * if it did. thus, these variables must be static
 315      */
 316
 317     if (src_get(&lineno, &lnfname))
 318         outfmt->current_dfmt->linenum(lnfname, lineno, segto);
 319
 320     outfmt->output(segto, data, type, size, segment, wrt);
 321 }
 322
 323 static bool jmp_match(int32_t segment, int64_t offset, int bits,
 324                       insn * ins, const struct itemplate *temp)
 325 {
 326     int64_t isize;
 327     const uint8_t *code = temp->code;
 328     uint8_t c = code[0];
 329
 330     if (((c & ~1) != 0370) || (ins->oprs[0].type & STRICT))
 331         return false;
 332     if (!optimizing)
 333         return false;
 334     if (optimizing < 0 && c == 0371)
 335         return false;
 336
 337     isize = calcsize(segment, offset, bits, ins, temp);
 338
 339     if (ins->oprs[0].opflags & OPFLAG_UNKNOWN)
 340         /* Be optimistic in pass 1 */
 341         return true;
 342
 343     if (ins->oprs[0].segment != segment)
 344         return false;
 345
 346     isize = ins->oprs[0].offset - offset - isize; /* isize is delta */
 347     return (isize >= -128 && isize <= 127); /* is it byte size? */
 348 }
 349
 350 int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
 351                  insn * instruction, struct ofmt *output, efunc error,
 352                  ListGen * listgen)
 353 {
 354     const struct itemplate *temp;
 355     int j;
 356     enum match_result m;
 357     int64_t insn_end;
 358     int32_t itimes;
 359     int64_t start = offset;
 360     int64_t wsize;              /* size for DB etc. */
 361
 362     errfunc = error;            /* to pass to other functions */
 363     cpu = cp;
 364     outfmt = output;            /* likewise */
 365     list = listgen;             /* and again */
 366
 367     wsize = idata_bytes(instruction->opcode);
 368     if (wsize == -1)
 369         return 0;
 370
 371     if (wsize) {
 372         extop *e;
 373         int32_t t = instruction->times;
 374         if (t < 0)
 375             errfunc(ERR_PANIC,
 376                     "instruction->times < 0 (%ld) in assemble()", t);
 377
 378         while (t--) {           /* repeat TIMES times */
 379             list_for_each(e, instruction->eops) {
 380                 if (e->type == EOT_DB_NUMBER) {
 381                     if (wsize > 8) {
 382                         errfunc(ERR_NONFATAL,
 383                                 "integer supplied to a DT, DO or DY"
 384                                 " instruction");
 385                     } else {
 386                         out(offset, segment, &e->offset,
 387                             OUT_ADDRESS, wsize, e->segment, e->wrt);
 388                         offset += wsize;
 389                     }
 390                 } else if (e->type == EOT_DB_STRING ||
 391                            e->type == EOT_DB_STRING_FREE) {
 392                     int align;
 393
 394                     out(offset, segment, e->stringval,
 395                         OUT_RAWDATA, e->stringlen, NO_SEG, NO_SEG);
 396                     align = e->stringlen % wsize;
 397
 398                     if (align) {
 399                         align = wsize - align;
 400                         out(offset, segment, zero_buffer,
 401                             OUT_RAWDATA, align, NO_SEG, NO_SEG);
 402                     }
 403                     offset += e->stringlen + align;
 404                 }
 405             }
 406             if (t > 0 && t == instruction->times - 1) {
 407                 /*
 408                  * Dummy call to list->output to give the offset to the
 409                  * listing module.
 410                  */
 411                 list->output(offset, NULL, OUT_RAWDATA, 0);
 412                 list->uplevel(LIST_TIMES);
 413             }
 414         }
 415         if (instruction->times > 1)
 416             list->downlevel(LIST_TIMES);
 417         return offset - start;
 418     }
 419
 420     if (instruction->opcode == I_INCBIN) {
 421         const char *fname = instruction->eops->stringval;
 422         FILE *fp;
 423
 424         fp = fopen(fname, "rb");
 425         if (!fp) {
 426             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 427                   fname);
 428         } else if (fseek(fp, 0L, SEEK_END) < 0) {
 429             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 430                   fname);
 431         } else {
 432             static char buf[4096];
 433             size_t t = instruction->times;
 434             size_t base = 0;
 435             size_t len;
 436
 437             len = ftell(fp);
 438             if (instruction->eops->next) {
 439                 base = instruction->eops->next->offset;
 440                 len -= base;
 441                 if (instruction->eops->next->next &&
 442                     len > (size_t)instruction->eops->next->next->offset)
 443                     len = (size_t)instruction->eops->next->next->offset;
 444             }
 445             /*
 446              * Dummy call to list->output to give the offset to the
 447              * listing module.
 448              */
 449             list->output(offset, NULL, OUT_RAWDATA, 0);
 450             list->uplevel(LIST_INCBIN);
 451             while (t--) {
 452                 size_t l;
 453
 454                 fseek(fp, base, SEEK_SET);
 455                 l = len;
 456                 while (l > 0) {
 457                     int32_t m;
 458                     m = fread(buf, 1, l > sizeof(buf) ? sizeof(buf) : l, fp);
 459                     if (!m) {
 460                         /*
 461                          * This shouldn't happen unless the file
 462                          * actually changes while we are reading
 463                          * it.
 464                          */
 465                         error(ERR_NONFATAL,
 466                               "`incbin': unexpected EOF while"
 467                               " reading file `%s'", fname);
 468                         t = 0;  /* Try to exit cleanly */
 469                         break;
 470                     }
 471                     out(offset, segment, buf, OUT_RAWDATA, m,
 472                         NO_SEG, NO_SEG);
 473                     l -= m;
 474                 }
 475             }
 476             list->downlevel(LIST_INCBIN);
 477             if (instruction->times > 1) {
 478                 /*
 479                  * Dummy call to list->output to give the offset to the
 480                  * listing module.
 481                  */
 482                 list->output(offset, NULL, OUT_RAWDATA, 0);
 483                 list->uplevel(LIST_TIMES);
 484                 list->downlevel(LIST_TIMES);
 485             }
 486             fclose(fp);
 487             return instruction->times * len;
 488         }
 489         return 0;               /* if we're here, there's an error */
 490     }
 491
 492     /* Check to see if we need an address-size prefix */
 493     add_asp(instruction, bits);
 494
 495     m = find_match(&temp, instruction, segment, offset, bits);
 496
 497     if (m == MOK_GOOD) {
 498         /* Matches! */
 499         int64_t insn_size = calcsize(segment, offset, bits, instruction, temp);
 500         itimes = instruction->times;
 501         if (insn_size < 0)  /* shouldn't be, on pass two */
 502             error(ERR_PANIC, "errors made it through from pass one");
 503         else
 504             while (itimes--) {
 505                 for (j = 0; j < MAXPREFIX; j++) {
 506                     uint8_t c = 0;
 507                     switch (instruction->prefixes[j]) {
 508                     case P_WAIT:
 509                         c = 0x9B;
 510                         break;
 511                     case P_LOCK:
 512                         c = 0xF0;
 513                         break;
 514                     case P_REPNE:
 515                     case P_REPNZ:
 516                     case P_XACQUIRE:
 517                         c = 0xF2;
 518                         break;
 519                     case P_REPE:
 520                     case P_REPZ:
 521                     case P_REP:
 522                     case P_XRELEASE:
 523                         c = 0xF3;
 524                         break;
 525                     case R_CS:
 526                         if (bits == 64) {
 527                             error(ERR_WARNING | ERR_PASS2,
 528                                   "cs segment base generated, but will be ignored in 64-bit mode");
 529                         }
 530                         c = 0x2E;
 531                         break;
 532                     case R_DS:
 533                         if (bits == 64) {
 534                             error(ERR_WARNING | ERR_PASS2,
 535                                   "ds segment base generated, but will be ignored in 64-bit mode");
 536                         }
 537                         c = 0x3E;
 538                         break;
 539                     case R_ES:
 540                         if (bits == 64) {
 541                             error(ERR_WARNING | ERR_PASS2,
 542                                   "es segment base generated, but will be ignored in 64-bit mode");
 543                         }
 544                         c = 0x26;
 545                         break;
 546                     case R_FS:
 547                         c = 0x64;
 548                         break;
 549                     case R_GS:
 550                         c = 0x65;
 551                         break;
 552                     case R_SS:
 553                         if (bits == 64) {
 554                             error(ERR_WARNING | ERR_PASS2,
 555                                   "ss segment base generated, but will be ignored in 64-bit mode");
 556                         }
 557                         c = 0x36;
 558                         break;
 559                     case R_SEGR6:
 560                     case R_SEGR7:
 561                         error(ERR_NONFATAL,
 562                               "segr6 and segr7 cannot be used as prefixes");
 563                         break;
 564                     case P_A16:
 565                         if (bits == 64) {
 566                             error(ERR_NONFATAL,
 567                                   "16-bit addressing is not supported "
 568                                   "in 64-bit mode");
 569                         } else if (bits != 16)
 570                             c = 0x67;
 571                         break;
 572                     case P_A32:
 573                         if (bits != 32)
 574                             c = 0x67;
 575                         break;
 576                     case P_A64:
 577                         if (bits != 64) {
 578                             error(ERR_NONFATAL,
 579                                   "64-bit addressing is only supported "
 580                                   "in 64-bit mode");
 581                         }
 582                         break;
 583                     case P_ASP:
 584                         c = 0x67;
 585                         break;
 586                     case P_O16:
 587                         if (bits != 16)
 588                             c = 0x66;
 589                         break;
 590                     case P_O32:
 591                         if (bits == 16)
 592                             c = 0x66;
 593                         break;
 594                     case P_O64:
 595                         /* REX.W */
 596                         break;
 597                     case P_OSP:
 598                         c = 0x66;
 599                         break;
 600                     case P_none:
 601                         break;
 602                     default:
 603                         error(ERR_PANIC, "invalid instruction prefix");
 604                     }
 605                     if (c != 0) {
 606                         out(offset, segment, &c, OUT_RAWDATA, 1,
 607                             NO_SEG, NO_SEG);
 608                         offset++;
 609                     }
 610                 }
 611                 insn_end = offset + insn_size;
 612                 gencode(segment, offset, bits, instruction,
 613                         temp, insn_end);
 614                 offset += insn_size;
 615                 if (itimes > 0 && itimes == instruction->times - 1) {
 616                     /*
 617                      * Dummy call to list->output to give the offset to the
 618                      * listing module.
 619                      */
 620                     list->output(offset, NULL, OUT_RAWDATA, 0);
 621                     list->uplevel(LIST_TIMES);
 622                 }
 623             }
 624         if (instruction->times > 1)
 625             list->downlevel(LIST_TIMES);
 626         return offset - start;
 627     } else {
 628         /* No match */
 629         switch (m) {
 630         case MERR_OPSIZEMISSING:
 631             error(ERR_NONFATAL, "operation size not specified");
 632             break;
 633         case MERR_OPSIZEMISMATCH:
 634             error(ERR_NONFATAL, "mismatch in operand sizes");
 635             break;
 636         case MERR_BADCPU:
 637             error(ERR_NONFATAL, "no instruction for this cpu level");
 638             break;
 639         case MERR_BADMODE:
 640             error(ERR_NONFATAL, "instruction not supported in %d-bit mode",
 641                   bits);
 642             break;
 643         default:
 644             error(ERR_NONFATAL,
 645                   "invalid combination of opcode and operands");
 646             break;
 647         }
 648     }
 649     return 0;
 650 }
 651
 652 int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
 653                   insn * instruction, efunc error)
 654 {
 655     const struct itemplate *temp;
 656     enum match_result m;
 657
 658     errfunc = error;            /* to pass to other functions */
 659     cpu = cp;
 660
 661     if (instruction->opcode == I_none)
 662         return 0;
 663
 664     if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
 665         instruction->opcode == I_DD || instruction->opcode == I_DQ ||
 666         instruction->opcode == I_DT || instruction->opcode == I_DO ||
 667         instruction->opcode == I_DY) {
 668         extop *e;
 669         int32_t isize, osize, wsize;
 670
 671         isize = 0;
 672         wsize = idata_bytes(instruction->opcode);
 673
 674         list_for_each(e, instruction->eops) {
 675             int32_t align;
 676
 677             osize = 0;
 678             if (e->type == EOT_DB_NUMBER) {
 679                 osize = 1;
 680                 warn_overflow_const(e->offset, wsize);
 681             } else if (e->type == EOT_DB_STRING ||
 682                        e->type == EOT_DB_STRING_FREE)
 683                 osize = e->stringlen;
 684
 685             align = (-osize) % wsize;
 686             if (align < 0)
 687                 align += wsize;
 688             isize += osize + align;
 689         }
 690         return isize * instruction->times;
 691     }
 692
 693     if (instruction->opcode == I_INCBIN) {
 694         const char *fname = instruction->eops->stringval;
 695         FILE *fp;
 696         int64_t val = 0;
 697         size_t len;
 698
 699         fp = fopen(fname, "rb");
 700         if (!fp)
 701             error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
 702                   fname);
 703         else if (fseek(fp, 0L, SEEK_END) < 0)
 704             error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
 705                   fname);
 706         else {
 707             len = ftell(fp);
 708             if (instruction->eops->next) {
 709                 len -= instruction->eops->next->offset;
 710                 if (instruction->eops->next->next &&
 711                     len > (size_t)instruction->eops->next->next->offset) {
 712                     len = (size_t)instruction->eops->next->next->offset;
 713                 }
 714             }
 715             val = instruction->times * len;
 716         }
 717         if (fp)
 718             fclose(fp);
 719         return val;
 720     }
 721
 722     /* Check to see if we need an address-size prefix */
 723     add_asp(instruction, bits);
 724
 725     m = find_match(&temp, instruction, segment, offset, bits);
 726     if (m == MOK_GOOD) {
 727         /* we've matched an instruction. */
 728         int64_t isize;
 729         int j;
 730
 731         isize = calcsize(segment, offset, bits, instruction, temp);
 732         if (isize < 0)
 733             return -1;
 734         for (j = 0; j < MAXPREFIX; j++) {
 735             switch (instruction->prefixes[j]) {
 736             case P_A16:
 737                 if (bits != 16)
 738                     isize++;
 739                 break;
 740             case P_A32:
 741                 if (bits != 32)
 742                     isize++;
 743                 break;
 744             case P_O16:
 745                 if (bits != 16)
 746                     isize++;
 747                 break;
 748             case P_O32:
 749                 if (bits == 16)
 750                     isize++;
 751                 break;
 752             case P_A64:
 753             case P_O64:
 754             case P_none:
 755                 break;
 756             default:
 757                 isize++;
 758                 break;
 759             }
 760         }
 761         return isize * instruction->times;
 762     } else {
 763         return -1;                  /* didn't match any instruction */
 764     }
 765 }
 766
 767 static bool possible_sbyte(operand *o)
 768 {
 769     return o->wrt == NO_SEG && o->segment == NO_SEG &&
 770         !(o->opflags & OPFLAG_UNKNOWN) &&
 771         optimizing >= 0 && !(o->type & STRICT);
 772 }
 773
 774 /* check that opn[op]  is a signed byte of size 16 or 32 */
 775 static bool is_sbyte16(operand *o)
 776 {
 777     int16_t v;
 778
 779     if (!possible_sbyte(o))
 780         return false;
 781
 782     v = o->offset;
 783     return v >= -128 && v <= 127;
 784 }
 785
 786 static bool is_sbyte32(operand *o)
 787 {
 788     int32_t v;
 789
 790     if (!possible_sbyte(o))
 791         return false;
 792
 793     v = o->offset;
 794     return v >= -128 && v <= 127;
 795 }
 796
 797 static void bad_hle_warn(const insn * ins, uint8_t hleok)
 798 {
 799     enum prefixes rep_pfx = ins->prefixes[PPS_REP];
 800     enum whatwarn { w_none, w_lock, w_inval } ww;
 801     static const enum whatwarn warn[2][4] =
 802     {
 803         { w_inval, w_inval, w_none, w_lock }, /* XACQUIRE */
 804         { w_inval, w_none,  w_none, w_lock }, /* XRELEASE */
 805     };
 806     unsigned int n;
 807
 808     n = (unsigned int)rep_pfx - P_XACQUIRE;
 809     if (n > 1)
 810         return;                 /* Not XACQUIRE/XRELEASE */
 811
 812     ww = warn[n][hleok];
 813     if (!is_class(MEMORY, ins->oprs[0].type))
 814         ww = w_inval;           /* HLE requires operand 0 to be memory */
 815
 816     switch (ww) {
 817     case w_none:
 818         break;
 819
 820     case w_lock:
 821         if (ins->prefixes[PPS_LOCK] != P_LOCK) {
 822             errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 823                     "%s with this instruction requires lock",
 824                     prefix_name(rep_pfx));
 825         }
 826         break;
 827
 828     case w_inval:
 829         errfunc(ERR_WARNING | ERR_WARN_HLE | ERR_PASS2,
 830                 "%s invalid with this instruction",
 831                 prefix_name(rep_pfx));
 832         break;
 833     }
 834 }
 835
 836 /* Common construct */
 837 #define case4(x) case (x): case (x)+1: case (x)+2: case (x)+3
 838
 839 static int64_t calcsize(int32_t segment, int64_t offset, int bits,
 840                         insn * ins, const struct itemplate *temp)
 841 {
 842     const uint8_t *codes = temp->code;
 843     int64_t length = 0;
 844     uint8_t c;
 845     int rex_mask = ~0;
 846     int op1, op2;
 847     struct operand *opx;
 848     uint8_t opex = 0;
 849     enum ea_type eat;
 850     uint8_t hleok = 0;
 851     bool lockcheck = true;
 852
 853     ins->rex = 0;               /* Ensure REX is reset */
 854     eat = EA_SCALAR;            /* Expect a scalar EA */
 855
 856     if (ins->prefixes[PPS_OSIZE] == P_O64)
 857         ins->rex |= REX_W;
 858
 859     (void)segment;              /* Don't warn that this parameter is unused */
 860     (void)offset;               /* Don't warn that this parameter is unused */
 861
 862     while (*codes) {
 863         c = *codes++;
 864         op1 = (c & 3) + ((opex & 1) << 2);
 865         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
 866         opx = &ins->oprs[op1];
 867         opex = 0;               /* For the next iteration */
 868
 869         switch (c) {
 870         case 01:
 871         case 02:
 872         case 03:
 873         case 04:
 874             codes += c, length += c;
 875             break;
 876
 877         case 05:
 878         case 06:
 879         case 07:
 880             opex = c;
 881             break;
 882
 883         case4(010):
 884             ins->rex |=
 885                 op_rexflags(opx, REX_B|REX_H|REX_P|REX_W);
 886             codes++, length++;
 887             break;
 888
 889         case4(014):
 890         case4(020):
 891         case4(024):
 892             length++;
 893             break;
 894
 895         case4(030):
 896             length += 2;
 897             break;
 898
 899         case4(034):
 900             if (opx->type & (BITS16 | BITS32 | BITS64))
 901                 length += (opx->type & BITS16) ? 2 : 4;
 902             else
 903                 length += (bits == 16) ? 2 : 4;
 904             break;
 905
 906         case4(040):
 907             length += 4;
 908             break;
 909
 910         case4(044):
 911             length += ins->addr_size >> 3;
 912             break;
 913
 914         case4(050):
 915             length++;
 916             break;
 917
 918         case4(054):
 919             length += 8; /* MOV reg64/imm */
 920             break;
 921
 922         case4(060):
 923             length += 2;
 924             break;
 925
 926         case4(064):
 927             if (opx->type & (BITS16 | BITS32 | BITS64))
 928                 length += (opx->type & BITS16) ? 2 : 4;
 929             else
 930                 length += (bits == 16) ? 2 : 4;
 931             break;
 932
 933         case4(070):
 934             length += 4;
 935             break;
 936
 937         case4(074):
 938             length += 2;
 939             break;
 940
 941         case4(0140):
 942             length += is_sbyte16(opx) ? 1 : 2;
 943             break;
 944
 945         case4(0144):
 946             codes++;
 947             length++;
 948             break;
 949
 950         case4(0150):
 951             length += is_sbyte32(opx) ? 1 : 4;
 952             break;
 953
 954         case4(0154):
 955             codes++;
 956             length++;
 957             break;
 958
 959         case 0172:
 960         case 0173:
 961             codes++;
 962             length++;
 963             break;
 964
 965         case4(0174):
 966             length++;
 967             break;
 968
 969         case4(0250):
 970             length += is_sbyte32(opx) ? 1 : 4;
 971             break;
 972
 973         case4(0254):
 974             length += 4;
 975             break;
 976
 977         case4(0260):
 978             ins->rex |= REX_V;
 979             ins->vexreg = regval(opx);
 980             ins->vex_cm = *codes++;
 981             ins->vex_wlp = *codes++;
 982             break;
 983
 984         case 0265:
 985         case 0266:
 986         case 0267:
 987             hleok = c & 3;
 988             break;
 989
 990         case 0270:
 991             ins->rex |= REX_V;
 992             ins->vexreg = 0;
 993             ins->vex_cm = *codes++;
 994             ins->vex_wlp = *codes++;
 995             break;
 996
 997         case4(0274):
 998             length++;
 999             break;
1000
1001         case4(0300):
1002             break;
1003
1004         case 0310:
1005             if (bits == 64)
1006                 return -1;
1007             length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
1008             break;
1009
1010         case 0311:
1011             length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
1012             break;
1013
1014         case 0312:
1015             break;
1016
1017         case 0313:
1018             if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
1019                 has_prefix(ins, PPS_ASIZE, P_A32))
1020                 return -1;
1021             break;
1022
1023         case4(0314):
1024             break;
1025
1026         case 0320:
1027         {
1028             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1029             if (pfx == P_O16)
1030                 break;
1031             if (pfx != P_none)
1032                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1033             else
1034                 ins->prefixes[PPS_OSIZE] = P_O16;
1035             break;
1036         }
1037
1038         case 0321:
1039         {
1040             enum prefixes pfx = ins->prefixes[PPS_OSIZE];
1041             if (pfx == P_O32)
1042                 break;
1043             if (pfx != P_none)
1044                 errfunc(ERR_WARNING | ERR_PASS2, "invalid operand size prefix");
1045             else
1046                 ins->prefixes[PPS_OSIZE] = P_O32;
1047             break;
1048         }
1049
1050         case 0322:
1051             break;
1052
1053         case 0323:
1054             rex_mask &= ~REX_W;
1055             break;
1056
1057         case 0324:
1058             ins->rex |= REX_W;
1059             break;
1060
1061         case 0325:
1062             ins->rex |= REX_NH;
1063             break;
1064
1065         case 0330:
1066             codes++, length++;
1067             break;
1068
1069         case 0331:
1070             break;
1071
1072         case 0332:
1073         case 0333:
1074             length++;
1075             break;
1076
1077         case 0334:
1078             ins->rex |= REX_L;
1079             break;
1080
1081         case 0335:
1082             break;
1083
1084         case 0336:
1085             if (!ins->prefixes[PPS_REP])
1086                 ins->prefixes[PPS_REP] = P_REP;
1087             break;
1088
1089         case 0337:
1090             if (!ins->prefixes[PPS_REP])
1091                 ins->prefixes[PPS_REP] = P_REPNE;
1092             break;
1093
1094         case 0340:
1095             if (ins->oprs[0].segment != NO_SEG)
1096                 errfunc(ERR_NONFATAL, "attempt to reserve non-constant"
1097                         " quantity of BSS space");
1098             else
1099                 length += ins->oprs[0].offset;
1100             break;
1101
1102         case 0341:
1103             if (!ins->prefixes[PPS_WAIT])
1104                 ins->prefixes[PPS_WAIT] = P_WAIT;
1105             break;
1106
1107         case4(0344):
1108             length++;
1109             break;
1110
1111         case 0360:
1112             break;
1113
1114         case 0361:
1115         case 0362:
1116         case 0363:
1117             length++;
1118             break;
1119
1120         case 0364:
1121         case 0365:
1122             break;
1123
1124         case 0366:
1125         case 0367:
1126             length++;
1127             break;
1128
1129         case 0370:
1130         case 0371:
1131         case 0372:
1132             break;
1133
1134         case 0373:
1135             length++;
1136             break;
1137
1138         case 0374:
1139             eat = EA_XMMVSIB;
1140             break;
1141
1142         case 0375:
1143             eat = EA_YMMVSIB;
1144             break;
1145
1146         case4(0100):
1147         case4(0110):
1148         case4(0120):
1149         case4(0130):
1150         case4(0200):
1151         case4(0204):
1152         case4(0210):
1153         case4(0214):
1154         case4(0220):
1155         case4(0224):
1156         case4(0230):
1157         case4(0234):
1158             {
1159                 ea ea_data;
1160                 int rfield;
1161                 opflags_t rflags;
1162                 struct operand *opy = &ins->oprs[op2];
1163
1164                 ea_data.rex = 0;           /* Ensure ea.REX is initially 0 */
1165
1166                 if (c <= 0177) {
1167                     /* pick rfield from operand b (opx) */
1168                     rflags = regflag(opx);
1169                     rfield = nasm_regvals[opx->basereg];
1170                 } else {
1171                     rflags = 0;
1172                     rfield = c & 7;
1173                 }
1174                 if (process_ea(opy, &ea_data, bits,ins->addr_size,
1175                                rfield, rflags) != eat) {
1176                     errfunc(ERR_NONFATAL, "invalid effective address");
1177                     return -1;
1178                 } else {
1179                     ins->rex |= ea_data.rex;
1180                     length += ea_data.size;
1181                 }
1182             }
1183             break;
1184
1185         default:
1186             errfunc(ERR_PANIC, "internal instruction table corrupt"
1187                     ": instruction code \\%o (0x%02X) given", c, c);
1188             break;
1189         }
1190     }
1191
1192     ins->rex &= rex_mask;
1193
1194     if (ins->rex & REX_NH) {
1195         if (ins->rex & REX_H) {
1196             errfunc(ERR_NONFATAL, "instruction cannot use high registers");
1197             return -1;
1198         }
1199         ins->rex &= ~REX_P;        /* Don't force REX prefix due to high reg */
1200     }
1201
1202     if (ins->rex & REX_V) {
1203         int bad32 = REX_R|REX_W|REX_X|REX_B;
1204
1205         if (ins->rex & REX_H) {
1206             errfunc(ERR_NONFATAL, "cannot use high register in vex instruction");
1207             return -1;
1208         }
1209         switch (ins->vex_wlp & 060) {
1210         case 000:
1211         case 040:
1212             ins->rex &= ~REX_W;
1213             break;
1214         case 020:
1215             ins->rex |= REX_W;
1216             bad32 &= ~REX_W;
1217             break;
1218         case 060:
1219             /* Follow REX_W */
1220             break;
1221         }
1222
1223         if (bits != 64 && ((ins->rex & bad32) || ins->vexreg > 7)) {
1224             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1225             return -1;
1226         }
1227         if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)))
1228             length += 3;
1229         else
1230             length += 2;
1231     } else if (ins->rex & REX_REAL) {
1232         if (ins->rex & REX_H) {
1233             errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
1234             return -1;
1235         } else if (bits == 64) {
1236             length++;
1237         } else if ((ins->rex & REX_L) &&
1238                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
1239                    cpu >= IF_X86_64) {
1240             /* LOCK-as-REX.R */
1241             assert_no_prefix(ins, PPS_LOCK);
1242             lockcheck = false;  /* Already errored, no need for warning */
1243             length++;
1244         } else {
1245             errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
1246             return -1;
1247         }
1248     }
1249
1250     if (has_prefix(ins, PPS_LOCK, P_LOCK) && lockcheck &&
1251         (!(temp->flags & IF_LOCK) || !is_class(MEMORY, ins->oprs[0].type))) {
1252         errfunc(ERR_WARNING | ERR_WARN_LOCK | ERR_PASS2 ,
1253                 "instruction is not lockable");
1254     }
1255
1256     bad_hle_warn(ins, hleok);
1257
1258     return length;
1259 }
1260
1261 #define EMIT_REX()                                                              \
1262     if (!(ins->rex & REX_V) && (ins->rex & REX_REAL) && (bits == 64)) { \
1263         ins->rex = (ins->rex & REX_REAL)|REX_P;                                 \
1264         out(offset, segment, &ins->rex, OUT_RAWDATA, 1, NO_SEG, NO_SEG);        \
1265         ins->rex = 0;                                                           \
1266         offset += 1;                                                            \
1267     }
1268
1269 static void gencode(int32_t segment, int64_t offset, int bits,
1270                     insn * ins, const struct itemplate *temp,
1271                     int64_t insn_end)
1272 {
1273     static const char condval[] = {   /* conditional opcodes */
1274         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
1275         0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
1276         0x0, 0xA, 0xA, 0xB, 0x8, 0x4
1277     };
1278     uint8_t c;
1279     uint8_t bytes[4];
1280     int64_t size;
1281     int64_t data;
1282     int op1, op2;
1283     struct operand *opx;
1284     const uint8_t *codes = temp->code;
1285     uint8_t opex = 0;
1286     enum ea_type eat = EA_SCALAR;
1287
1288     while (*codes) {
1289         c = *codes++;
1290         op1 = (c & 3) + ((opex & 1) << 2);
1291         op2 = ((c >> 3) & 3) + ((opex & 2) << 1);
1292         opx = &ins->oprs[op1];
1293         opex = 0;                /* For the next iteration */
1294
1295         switch (c) {
1296         case 01:
1297         case 02:
1298         case 03:
1299         case 04:
1300             EMIT_REX();
1301             out(offset, segment, codes, OUT_RAWDATA, c, NO_SEG, NO_SEG);
1302             codes += c;
1303             offset += c;
1304             break;
1305
1306         case 05:
1307         case 06:
1308         case 07:
1309             opex = c;
1310             break;
1311
1312         case4(010):
1313             EMIT_REX();
1314             bytes[0] = *codes++ + (regval(opx) & 7);
1315             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1316             offset += 1;
1317             break;
1318
1319         case4(014):
1320             /*
1321              * The test for BITS8 and SBYTE here is intended to avoid
1322              * warning on optimizer actions due to SBYTE, while still
1323              * warn on explicit BYTE directives.  Also warn, obviously,
1324              * if the optimizer isn't enabled.
1325              */
1326             if (((opx->type & BITS8) ||
1327                  !(opx->type & temp->opd[op1] & BYTENESS)) &&
1328                 (opx->offset < -128 || opx->offset > 127)) {
1329                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1330                         "signed byte value exceeds bounds");
1331             }
1332             if (opx->segment != NO_SEG) {
1333                 data = opx->offset;
1334                 out(offset, segment, &data, OUT_ADDRESS, 1,
1335                     opx->segment, opx->wrt);
1336             } else {
1337                 bytes[0] = opx->offset;
1338                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1339                     NO_SEG);
1340             }
1341             offset += 1;
1342             break;
1343
1344         case4(020):
1345             if (opx->offset < -256 || opx->offset > 255) {
1346                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1347                         "byte value exceeds bounds");
1348             }
1349             if (opx->segment != NO_SEG) {
1350                 data = opx->offset;
1351                 out(offset, segment, &data, OUT_ADDRESS, 1,
1352                     opx->segment, opx->wrt);
1353             } else {
1354                 bytes[0] = opx->offset;
1355                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1356                     NO_SEG);
1357             }
1358             offset += 1;
1359             break;
1360
1361         case4(024):
1362             if (opx->offset < 0 || opx->offset > 255)
1363                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1364                         "unsigned byte value exceeds bounds");
1365             if (opx->segment != NO_SEG) {
1366                 data = opx->offset;
1367                 out(offset, segment, &data, OUT_ADDRESS, 1,
1368                     opx->segment, opx->wrt);
1369             } else {
1370                 bytes[0] = opx->offset;
1371                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1372                     NO_SEG);
1373             }
1374             offset += 1;
1375             break;
1376
1377         case4(030):
1378             warn_overflow_opd(opx, 2);
1379             data = opx->offset;
1380             out(offset, segment, &data, OUT_ADDRESS, 2,
1381                 opx->segment, opx->wrt);
1382             offset += 2;
1383             break;
1384
1385         case4(034):
1386             if (opx->type & (BITS16 | BITS32))
1387                 size = (opx->type & BITS16) ? 2 : 4;
1388             else
1389                 size = (bits == 16) ? 2 : 4;
1390             warn_overflow_opd(opx, size);
1391             data = opx->offset;
1392             out(offset, segment, &data, OUT_ADDRESS, size,
1393                 opx->segment, opx->wrt);
1394             offset += size;
1395             break;
1396
1397         case4(040):
1398             warn_overflow_opd(opx, 4);
1399             data = opx->offset;
1400             out(offset, segment, &data, OUT_ADDRESS, 4,
1401                 opx->segment, opx->wrt);
1402             offset += 4;
1403             break;
1404
1405         case4(044):
1406             data = opx->offset;
1407             size = ins->addr_size >> 3;
1408             warn_overflow_opd(opx, size);
1409             out(offset, segment, &data, OUT_ADDRESS, size,
1410                 opx->segment, opx->wrt);
1411             offset += size;
1412             break;
1413
1414         case4(050):
1415             if (opx->segment != segment) {
1416                 data = opx->offset;
1417                 out(offset, segment, &data,
1418                     OUT_REL1ADR, insn_end - offset,
1419                     opx->segment, opx->wrt);
1420             } else {
1421                 data = opx->offset - insn_end;
1422                 if (data > 127 || data < -128)
1423                     errfunc(ERR_NONFATAL, "short jump is out of range");
1424                 out(offset, segment, &data,
1425                     OUT_ADDRESS, 1, NO_SEG, NO_SEG);
1426             }
1427             offset += 1;
1428             break;
1429
1430         case4(054):
1431             data = (int64_t)opx->offset;
1432             out(offset, segment, &data, OUT_ADDRESS, 8,
1433                 opx->segment, opx->wrt);
1434             offset += 8;
1435             break;
1436
1437         case4(060):
1438             if (opx->segment != segment) {
1439                 data = opx->offset;
1440                 out(offset, segment, &data,
1441                     OUT_REL2ADR, insn_end - offset,
1442                     opx->segment, opx->wrt);
1443             } else {
1444                 data = opx->offset - insn_end;
1445                 out(offset, segment, &data,
1446                     OUT_ADDRESS, 2, NO_SEG, NO_SEG);
1447             }
1448             offset += 2;
1449             break;
1450
1451         case4(064):
1452             if (opx->type & (BITS16 | BITS32 | BITS64))
1453                 size = (opx->type & BITS16) ? 2 : 4;
1454             else
1455                 size = (bits == 16) ? 2 : 4;
1456             if (opx->segment != segment) {
1457                 data = opx->offset;
1458                 out(offset, segment, &data,
1459                     size == 2 ? OUT_REL2ADR : OUT_REL4ADR,
1460                     insn_end - offset, opx->segment, opx->wrt);
1461             } else {
1462                 data = opx->offset - insn_end;
1463                 out(offset, segment, &data,
1464                     OUT_ADDRESS, size, NO_SEG, NO_SEG);
1465             }
1466             offset += size;
1467             break;
1468
1469         case4(070):
1470             if (opx->segment != segment) {
1471                 data = opx->offset;
1472                 out(offset, segment, &data,
1473                     OUT_REL4ADR, insn_end - offset,
1474                     opx->segment, opx->wrt);
1475             } else {
1476                 data = opx->offset - insn_end;
1477                 out(offset, segment, &data,
1478                     OUT_ADDRESS, 4, NO_SEG, NO_SEG);
1479             }
1480             offset += 4;
1481             break;
1482
1483         case4(074):
1484             if (opx->segment == NO_SEG)
1485                 errfunc(ERR_NONFATAL, "value referenced by FAR is not"
1486                         " relocatable");
1487             data = 0;
1488             out(offset, segment, &data, OUT_ADDRESS, 2,
1489                 outfmt->segbase(1 + opx->segment),
1490                 opx->wrt);
1491             offset += 2;
1492             break;
1493
1494         case4(0140):
1495             data = opx->offset;
1496             warn_overflow_opd(opx, 2);
1497             if (is_sbyte16(opx)) {
1498                 bytes[0] = data;
1499                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1500                     NO_SEG);
1501                 offset++;
1502             } else {
1503                 out(offset, segment, &data, OUT_ADDRESS, 2,
1504                     opx->segment, opx->wrt);
1505                 offset += 2;
1506             }
1507             break;
1508
1509         case4(0144):
1510             EMIT_REX();
1511             bytes[0] = *codes++;
1512             if (is_sbyte16(opx))
1513                 bytes[0] |= 2;  /* s-bit */
1514             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1515             offset++;
1516             break;
1517
1518         case4(0150):
1519             data = opx->offset;
1520             warn_overflow_opd(opx, 4);
1521             if (is_sbyte32(opx)) {
1522                 bytes[0] = data;
1523                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1524                     NO_SEG);
1525                 offset++;
1526             } else {
1527                 out(offset, segment, &data, OUT_ADDRESS, 4,
1528                     opx->segment, opx->wrt);
1529                 offset += 4;
1530             }
1531             break;
1532
1533         case4(0154):
1534             EMIT_REX();
1535             bytes[0] = *codes++;
1536             if (is_sbyte32(opx))
1537                 bytes[0] |= 2;  /* s-bit */
1538             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1539             offset++;
1540             break;
1541
1542         case 0172:
1543             c = *codes++;
1544             opx = &ins->oprs[c >> 3];
1545             bytes[0] = nasm_regvals[opx->basereg] << 4;
1546             opx = &ins->oprs[c & 7];
1547             if (opx->segment != NO_SEG || opx->wrt != NO_SEG) {
1548                 errfunc(ERR_NONFATAL,
1549                         "non-absolute expression not permitted as argument %d",
1550                         c & 7);
1551             } else {
1552                 if (opx->offset & ~15) {
1553                     errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1554                             "four-bit argument exceeds bounds");
1555                 }
1556                 bytes[0] |= opx->offset & 15;
1557             }
1558             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1559             offset++;
1560             break;
1561
1562         case 0173:
1563             c = *codes++;
1564             opx = &ins->oprs[c >> 4];
1565             bytes[0] = nasm_regvals[opx->basereg] << 4;
1566             bytes[0] |= c & 15;
1567             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1568             offset++;
1569             break;
1570
1571         case4(0174):
1572             bytes[0] = nasm_regvals[opx->basereg] << 4;
1573             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1574             offset++;
1575             break;
1576
1577         case4(0250):
1578             data = opx->offset;
1579             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1580                 (int32_t)data != (int64_t)data) {
1581                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1582                         "signed dword immediate exceeds bounds");
1583             }
1584             if (is_sbyte32(opx)) {
1585                 bytes[0] = data;
1586                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1587                     NO_SEG);
1588                 offset++;
1589             } else {
1590                 out(offset, segment, &data, OUT_ADDRESS, 4,
1591                     opx->segment, opx->wrt);
1592                 offset += 4;
1593             }
1594             break;
1595
1596         case4(0254):
1597             data = opx->offset;
1598             if (opx->wrt == NO_SEG && opx->segment == NO_SEG &&
1599                 (int32_t)data != (int64_t)data) {
1600                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1601                         "signed dword immediate exceeds bounds");
1602             }
1603             out(offset, segment, &data, OUT_ADDRESS, 4,
1604                 opx->segment, opx->wrt);
1605             offset += 4;
1606             break;
1607
1608         case4(0260):
1609         case 0270:
1610             codes += 2;
1611             if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B))) {
1612                 bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
1613                 bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
1614                 bytes[2] = ((ins->rex & REX_W) << (7-3)) |
1615                     ((~ins->vexreg & 15)<< 3) | (ins->vex_wlp & 07);
1616                 out(offset, segment, &bytes, OUT_RAWDATA, 3, NO_SEG, NO_SEG);
1617                 offset += 3;
1618             } else {
1619                 bytes[0] = 0xc5;
1620                 bytes[1] = ((~ins->rex & REX_R) << (7-2)) |
1621                     ((~ins->vexreg & 15) << 3) | (ins->vex_wlp & 07);
1622                 out(offset, segment, &bytes, OUT_RAWDATA, 2, NO_SEG, NO_SEG);
1623                 offset += 2;
1624             }
1625             break;
1626
1627         case 0265:
1628         case 0266:
1629         case 0267:
1630             break;
1631
1632         case4(0274):
1633         {
1634             uint64_t uv, um;
1635             int s;
1636
1637             if (ins->rex & REX_W)
1638                 s = 64;
1639             else if (ins->prefixes[PPS_OSIZE] == P_O16)
1640                 s = 16;
1641             else if (ins->prefixes[PPS_OSIZE] == P_O32)
1642                 s = 32;
1643             else
1644                 s = bits;
1645
1646             um = (uint64_t)2 << (s-1);
1647             uv = opx->offset;
1648
1649             if (uv > 127 && uv < (uint64_t)-128 &&
1650                 (uv < um-128 || uv > um-1)) {
1651                 errfunc(ERR_WARNING | ERR_PASS2 | ERR_WARN_NOV,
1652                         "signed byte value exceeds bounds");
1653             }
1654             if (opx->segment != NO_SEG) {
1655                 data = uv;
1656                 out(offset, segment, &data, OUT_ADDRESS, 1,
1657                     opx->segment, opx->wrt);
1658             } else {
1659                 bytes[0] = uv;
1660                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG,
1661                     NO_SEG);
1662             }
1663             offset += 1;
1664             break;
1665         }
1666
1667         case4(0300):
1668             break;
1669
1670         case 0310:
1671             if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
1672                 *bytes = 0x67;
1673                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1674                 offset += 1;
1675             } else
1676                 offset += 0;
1677             break;
1678
1679         case 0311:
1680             if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
1681                 *bytes = 0x67;
1682                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1683                 offset += 1;
1684             } else
1685                 offset += 0;
1686             break;
1687
1688         case 0312:
1689             break;
1690
1691         case 0313:
1692             ins->rex = 0;
1693             break;
1694
1695         case4(0314):
1696             break;
1697
1698         case 0320:
1699         case 0321:
1700             break;
1701
1702         case 0322:
1703         case 0323:
1704             break;
1705
1706         case 0324:
1707             ins->rex |= REX_W;
1708             break;
1709
1710         case 0325:
1711             break;
1712
1713         case 0330:
1714             *bytes = *codes++ ^ condval[ins->condition];
1715             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1716             offset += 1;
1717             break;
1718
1719         case 0331:
1720             break;
1721
1722         case 0332:
1723         case 0333:
1724             *bytes = c - 0332 + 0xF2;
1725             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1726             offset += 1;
1727             break;
1728
1729         case 0334:
1730             if (ins->rex & REX_R) {
1731                 *bytes = 0xF0;
1732                 out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1733                 offset += 1;
1734             }
1735             ins->rex &= ~(REX_L|REX_R);
1736             break;
1737
1738         case 0335:
1739             break;
1740
1741         case 0336:
1742         case 0337:
1743             break;
1744
1745         case 0340:
1746             if (ins->oprs[0].segment != NO_SEG)
1747                 errfunc(ERR_PANIC, "non-constant BSS size in pass two");
1748             else {
1749                 int64_t size = ins->oprs[0].offset;
1750                 if (size > 0)
1751                     out(offset, segment, NULL,
1752                         OUT_RESERVE, size, NO_SEG, NO_SEG);
1753                 offset += size;
1754             }
1755             break;
1756
1757         case 0341:
1758             break;
1759
1760         case 0344:
1761         case 0345:
1762             bytes[0] = c & 1;
1763             switch (ins->oprs[0].basereg) {
1764             case R_CS:
1765                 bytes[0] += 0x0E;
1766                 break;
1767             case R_DS:
1768                 bytes[0] += 0x1E;
1769                 break;
1770             case R_ES:
1771                 bytes[0] += 0x06;
1772                 break;
1773             case R_SS:
1774                 bytes[0] += 0x16;
1775                 break;
1776             default:
1777                 errfunc(ERR_PANIC,
1778                         "bizarre 8086 segment register received");
1779             }
1780             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1781             offset++;
1782             break;
1783
1784         case 0346:
1785         case 0347:
1786             bytes[0] = c & 1;
1787             switch (ins->oprs[0].basereg) {
1788             case R_FS:
1789                 bytes[0] += 0xA0;
1790                 break;
1791             case R_GS:
1792                 bytes[0] += 0xA8;
1793                 break;
1794             default:
1795                 errfunc(ERR_PANIC,
1796                         "bizarre 386 segment register received");
1797             }
1798             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1799             offset++;
1800             break;
1801
1802         case 0360:
1803             break;
1804
1805         case 0361:
1806             bytes[0] = 0x66;
1807             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1808             offset += 1;
1809             break;
1810
1811         case 0362:
1812         case 0363:
1813             bytes[0] = c - 0362 + 0xf2;
1814             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1815             offset += 1;
1816             break;
1817
1818         case 0364:
1819         case 0365:
1820             break;
1821
1822         case 0366:
1823         case 0367:
1824             *bytes = c - 0366 + 0x66;
1825             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1826             offset += 1;
1827             break;
1828
1829         case 0370:
1830         case 0371:
1831             break;
1832
1833         case 0373:
1834             *bytes = bits == 16 ? 3 : 5;
1835             out(offset, segment, bytes, OUT_RAWDATA, 1, NO_SEG, NO_SEG);
1836             offset += 1;
1837             break;
1838
1839         case 0374:
1840             eat = EA_XMMVSIB;
1841             break;
1842
1843         case 0375:
1844             eat = EA_YMMVSIB;
1845             break;
1846
1847         case4(0100):
1848         case4(0110):
1849         case4(0120):
1850         case4(0130):
1851         case4(0200):
1852         case4(0204):
1853         case4(0210):
1854         case4(0214):
1855         case4(0220):
1856         case4(0224):
1857         case4(0230):
1858         case4(0234):
1859             {
1860                 ea ea_data;
1861                 int rfield;
1862                 opflags_t rflags;
1863                 uint8_t *p;
1864                 int32_t s;
1865                 struct operand *opy = &ins->oprs[op2];
1866
1867                 if (c <= 0177) {
1868                     /* pick rfield from operand b (opx) */
1869                     rflags = regflag(opx);
1870                     rfield = nasm_regvals[opx->basereg];
1871                 } else {
1872                     /* rfield is constant */
1873                     rflags = 0;
1874                     rfield = c & 7;
1875                 }
1876
1877                 if (process_ea(opy, &ea_data, bits, ins->addr_size,
1878                                rfield, rflags) != eat)
1879                     errfunc(ERR_NONFATAL, "invalid effective address");
1880
1881                 p = bytes;
1882                 *p++ = ea_data.modrm;
1883                 if (ea_data.sib_present)
1884                     *p++ = ea_data.sib;
1885
1886                 s = p - bytes;
1887                 out(offset, segment, bytes, OUT_RAWDATA, s, NO_SEG, NO_SEG);
1888
1889                 /*
1890                  * Make sure the address gets the right offset in case
1891                  * the line breaks in the .lst file (BR 1197827)
1892                  */
1893                 offset += s;
1894                 s = 0;
1895
1896                 switch (ea_data.bytes) {
1897                 case 0:
1898                     break;
1899                 case 1:
1900                 case 2:
1901                 case 4:
1902                 case 8:
1903                     data = opy->offset;
1904                     s += ea_data.bytes;
1905                     if (ea_data.rip) {
1906                         if (opy->segment == segment) {
1907                             data -= insn_end;
1908                             if (overflow_signed(data, ea_data.bytes))
1909                                 warn_overflow(ERR_PASS2, ea_data.bytes);
1910                             out(offset, segment, &data, OUT_ADDRESS,
1911                                 ea_data.bytes, NO_SEG, NO_SEG);
1912                         } else {
1913                             /* overflow check in output/linker? */
1914                             out(offset, segment, &data,        OUT_REL4ADR,
1915                                 insn_end - offset, opy->segment, opy->wrt);
1916                         }
1917                     } else {
1918                         if (overflow_general(opy->offset, ins->addr_size >> 3) ||
1919                             signed_bits(opy->offset, ins->addr_size) !=
1920                             signed_bits(opy->offset, ea_data.bytes * 8))
1921                             warn_overflow(ERR_PASS2, ea_data.bytes);
1922
1923                         out(offset, segment, &data, OUT_ADDRESS,
1924                             ea_data.bytes, opy->segment, opy->wrt);
1925                     }
1926                     break;
1927                 default:
1928                     /* Impossible! */
1929                     errfunc(ERR_PANIC,
1930                             "Invalid amount of bytes (%d) for offset?!",
1931                             ea_data.bytes);
1932                     break;
1933                 }
1934                 offset += s;
1935             }
1936             break;
1937
1938         default:
1939             errfunc(ERR_PANIC, "internal instruction table corrupt"
1940                     ": instruction code \\%o (0x%02X) given", c, c);
1941             break;
1942         }
1943     }
1944 }
1945
1946 static opflags_t regflag(const operand * o)
1947 {
1948     if (!is_register(o->basereg))
1949         errfunc(ERR_PANIC, "invalid operand passed to regflag()");
1950     return nasm_reg_flags[o->basereg];
1951 }
1952
1953 static int32_t regval(const operand * o)
1954 {
1955     if (!is_register(o->basereg))
1956         errfunc(ERR_PANIC, "invalid operand passed to regval()");
1957     return nasm_regvals[o->basereg];
1958 }
1959
1960 static int op_rexflags(const operand * o, int mask)
1961 {
1962     opflags_t flags;
1963     int val;
1964
1965     if (!is_register(o->basereg))
1966         errfunc(ERR_PANIC, "invalid operand passed to op_rexflags()");
1967
1968     flags = nasm_reg_flags[o->basereg];
1969     val = nasm_regvals[o->basereg];
1970
1971     return rexflags(val, flags, mask);
1972 }
1973
1974 static int rexflags(int val, opflags_t flags, int mask)
1975 {
1976     int rex = 0;
1977
1978     if (val >= 8)
1979         rex |= REX_B|REX_X|REX_R;
1980     if (flags & BITS64)
1981         rex |= REX_W;
1982     if (!(REG_HIGH & ~flags))                   /* AH, CH, DH, BH */
1983         rex |= REX_H;
1984     else if (!(REG8 & ~flags) && val >= 4)      /* SPL, BPL, SIL, DIL */
1985         rex |= REX_P;
1986
1987     return rex & mask;
1988 }
1989
1990 static enum match_result find_match(const struct itemplate **tempp,
1991                                     insn *instruction,
1992                                     int32_t segment, int64_t offset, int bits)
1993 {
1994     const struct itemplate *temp;
1995     enum match_result m, merr;
1996     opflags_t xsizeflags[MAX_OPERANDS];
1997     bool opsizemissing = false;
1998     int i;
1999
2000     for (i = 0; i < instruction->operands; i++)
2001         xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
2002
2003     merr = MERR_INVALOP;
2004
2005     for (temp = nasm_instructions[instruction->opcode];
2006          temp->opcode != I_none; temp++) {
2007         m = matches(temp, instruction, bits);
2008         if (m == MOK_JUMP) {
2009             if (jmp_match(segment, offset, bits, instruction, temp))
2010                 m = MOK_GOOD;
2011             else
2012                 m = MERR_INVALOP;
2013         } else if (m == MERR_OPSIZEMISSING &&
2014                    (temp->flags & IF_SMASK) != IF_SX) {
2015             /*
2016              * Missing operand size and a candidate for fuzzy matching...
2017              */
2018             for (i = 0; i < temp->operands; i++) {
2019                 if ((temp->opd[i] & SAME_AS) == 0)
2020                     xsizeflags[i] |= temp->opd[i] & SIZE_MASK;
2021             }
2022             opsizemissing = true;
2023         }
2024         if (m > merr)
2025             merr = m;
2026         if (merr == MOK_GOOD)
2027             goto done;
2028     }
2029
2030     /* No match, but see if we can get a fuzzy operand size match... */
2031     if (!opsizemissing)
2032         goto done;
2033
2034     for (i = 0; i < instruction->operands; i++) {
2035         /*
2036          * We ignore extrinsic operand sizes on registers, so we should
2037          * never try to fuzzy-match on them.  This also resolves the case
2038          * when we have e.g. "xmmrm128" in two different positions.
2039          */
2040         if (is_class(REGISTER, instruction->oprs[i].type))
2041             continue;
2042
2043         /* This tests if xsizeflags[i] has more than one bit set */
2044         if ((xsizeflags[i] & (xsizeflags[i]-1)))
2045             goto done;                /* No luck */
2046
2047         instruction->oprs[i].type |= xsizeflags[i]; /* Set the size */
2048     }
2049
2050     /* Try matching again... */
2051     for (temp = nasm_instructions[instruction->opcode];
2052          temp->opcode != I_none; temp++) {
2053         m = matches(temp, instruction, bits);
2054         if (m == MOK_JUMP) {
2055             if (jmp_match(segment, offset, bits, instruction, temp))
2056                 m = MOK_GOOD;
2057             else
2058                 m = MERR_INVALOP;
2059         }
2060         if (m > merr)
2061             merr = m;
2062         if (merr == MOK_GOOD)
2063             goto done;
2064     }
2065
2066 done:
2067     *tempp = temp;
2068     return merr;
2069 }
2070
2071 static enum match_result matches(const struct itemplate *itemp,
2072                                  insn *instruction, int bits)
2073 {
2074     int i, size[MAX_OPERANDS], asize, oprs;
2075     bool opsizemissing = false;
2076
2077     /*
2078      * Check the opcode
2079      */
2080     if (itemp->opcode != instruction->opcode)
2081         return MERR_INVALOP;
2082
2083     /*
2084      * Count the operands
2085      */
2086     if (itemp->operands != instruction->operands)
2087         return MERR_INVALOP;
2088
2089     /*
2090      * Is it legal?
2091      */
2092     if (!(optimizing > 0) && (itemp->flags & IF_OPT))
2093         return MERR_INVALOP;
2094
2095     /*
2096      * Check that no spurious colons or TOs are present
2097      */
2098     for (i = 0; i < itemp->operands; i++)
2099         if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON | TO))
2100             return MERR_INVALOP;
2101
2102     /*
2103      * Process size flags
2104      */
2105     switch (itemp->flags & IF_SMASK) {
2106     case IF_SB:
2107         asize = BITS8;
2108         break;
2109     case IF_SW:
2110         asize = BITS16;
2111         break;
2112     case IF_SD:
2113         asize = BITS32;
2114         break;
2115     case IF_SQ:
2116         asize = BITS64;
2117         break;
2118     case IF_SO:
2119         asize = BITS128;
2120         break;
2121     case IF_SY:
2122         asize = BITS256;
2123         break;
2124     case IF_SZ:
2125         switch (bits) {
2126         case 16:
2127             asize = BITS16;
2128             break;
2129         case 32:
2130             asize = BITS32;
2131             break;
2132         case 64:
2133             asize = BITS64;
2134             break;
2135         default:
2136             asize = 0;
2137             break;
2138         }
2139         break;
2140     default:
2141         asize = 0;
2142         break;
2143     }
2144
2145     if (itemp->flags & IF_ARMASK) {
2146         /* S- flags only apply to a specific operand */
2147         i = ((itemp->flags & IF_ARMASK) >> IF_ARSHFT) - 1;
2148         memset(size, 0, sizeof size);
2149         size[i] = asize;
2150     } else {
2151         /* S- flags apply to all operands */
2152         for (i = 0; i < MAX_OPERANDS; i++)
2153             size[i] = asize;
2154     }
2155
2156     /*
2157      * Check that the operand flags all match up,
2158      * it's a bit tricky so lets be verbose:
2159      *
2160      * 1) Find out the size of operand. If instruction
2161      *    doesn't have one specified -- we're trying to
2162      *    guess it either from template (IF_S* flag) or
2163      *    from code bits.
2164      *
2165      * 2) If template operand (i) has SAME_AS flag [used for registers only]
2166      *    (ie the same operand as was specified somewhere in template, and
2167      *    this referred operand index is being achieved via ~SAME_AS)
2168      *    we are to be sure that both registers (in template and instruction)
2169      *    do exactly match.
2170      *
2171      * 3) If template operand do not match the instruction OR
2172      *    template has an operand size specified AND this size differ
2173      *    from which instruction has (perhaps we got it from code bits)
2174      *    we are:
2175      *      a)  Check that only size of instruction and operand is differ
2176      *          other characteristics do match
2177      *      b)  Perhaps it's a register specified in instruction so
2178      *          for such a case we just mark that operand as "size
2179      *          missing" and this will turn on fuzzy operand size
2180      *          logic facility (handled by a caller)
2181      */
2182     for (i = 0; i < itemp->operands; i++) {
2183         opflags_t type = instruction->oprs[i].type;
2184         if (!(type & SIZE_MASK))
2185             type |= size[i];
2186
2187         if (itemp->opd[i] & SAME_AS) {
2188             int j = itemp->opd[i] & ~SAME_AS;
2189             if (type != instruction->oprs[j].type ||
2190                 instruction->oprs[i].basereg != instruction->oprs[j].basereg)
2191                 return MERR_INVALOP;
2192         } else if (itemp->opd[i] & ~type ||
2193             ((itemp->opd[i] & SIZE_MASK) &&
2194              ((itemp->opd[i] ^ type) & SIZE_MASK))) {
2195             if ((itemp->opd[i] & ~type & ~SIZE_MASK) || (type & SIZE_MASK)) {
2196                 return MERR_INVALOP;
2197             } else if (!is_class(REGISTER, type)) {
2198                 /*
2199                  * Note: we don't honor extrinsic operand sizes for registers,
2200                  * so "missing operand size" for a register should be
2201                  * considered a wildcard match rather than an error.
2202                  */
2203                 opsizemissing = true;
2204             }
2205         }
2206     }
2207
2208     if (opsizemissing)
2209         return MERR_OPSIZEMISSING;
2210
2211     /*
2212      * Check operand sizes
2213      */
2214     if (itemp->flags & (IF_SM | IF_SM2)) {
2215         oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
2216         for (i = 0; i < oprs; i++) {
2217             asize = itemp->opd[i] & SIZE_MASK;
2218             if (asize) {
2219                 for (i = 0; i < oprs; i++)
2220                     size[i] = asize;
2221                 break;
2222             }
2223         }
2224     } else {
2225         oprs = itemp->operands;
2226     }
2227
2228     for (i = 0; i < itemp->operands; i++) {
2229         if (!(itemp->opd[i] & SIZE_MASK) &&
2230             (instruction->oprs[i].type & SIZE_MASK & ~size[i]))
2231             return MERR_OPSIZEMISMATCH;
2232     }
2233
2234     /*
2235      * Check template is okay at the set cpu level
2236      */
2237     if (((itemp->flags & IF_PLEVEL) > cpu))
2238         return MERR_BADCPU;
2239
2240     /*
2241      * Verify the appropriate long mode flag.
2242      */
2243     if ((itemp->flags & (bits == 64 ? IF_NOLONG : IF_LONG)))
2244         return MERR_BADMODE;
2245
2246     /*
2247      * If we have a HLE prefix, look for the NOHLE flag
2248      */
2249     if ((itemp->flags & IF_NOHLE) &&
2250         (has_prefix(instruction, PPS_REP, P_XACQUIRE) ||
2251          has_prefix(instruction, PPS_REP, P_XRELEASE)))
2252         return MERR_BADHLE;
2253
2254     /*
2255      * Check if special handling needed for Jumps
2256      */
2257     if ((itemp->code[0] & ~1) == 0370)
2258         return MOK_JUMP;
2259
2260     return MOK_GOOD;
2261 }
2262
2263 static enum ea_type process_ea(operand *input, ea *output, int bits,
2264                                int addrbits, int rfield, opflags_t rflags)
2265 {
2266     bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
2267
2268     output->type    = EA_SCALAR;
2269     output->rip     = false;
2270
2271     /* REX flags for the rfield operand */
2272     output->rex     |= rexflags(rfield, rflags, REX_R | REX_P | REX_W | REX_H);
2273
2274     if (is_class(REGISTER, input->type)) {
2275         /*
2276          * It's a direct register.
2277          */
2278         opflags_t f;
2279
2280         if (!is_register(input->basereg))
2281             goto err;
2282
2283         f = regflag(input);
2284
2285         if (!is_class(REG_EA, f))
2286             goto err;
2287
2288         output->rex         |= op_rexflags(input, REX_B | REX_P | REX_W | REX_H);
2289         output->sib_present = false;    /* no SIB necessary */
2290         output->bytes       = 0;        /* no offset necessary either */
2291         output->modrm       = GEN_MODRM(3, rfield, nasm_regvals[input->basereg]);
2292     } else {
2293         /*
2294          * It's a memory reference.
2295          */
2296         if (input->basereg == -1 &&
2297             (input->indexreg == -1 || input->scale == 0)) {
2298             /*
2299              * It's a pure offset.
2300              */
2301             if (bits == 64 && ((input->type & IP_REL) == IP_REL) &&
2302                 input->segment == NO_SEG) {
2303                 nasm_error(ERR_WARNING | ERR_PASS1, "absolute address can not be RIP-relative");
2304                 input->type &= ~IP_REL;
2305                 input->type |= MEMORY;
2306             }
2307
2308             if (input->eaflags & EAF_BYTEOFFS ||
2309                 (input->eaflags & EAF_WORDOFFS &&
2310                  input->disp_size != (addrbits != 16 ? 32 : 16))) {
2311                 nasm_error(ERR_WARNING | ERR_PASS1, "displacement size ignored on absolute address");
2312             }
2313
2314             if (bits == 64 && (~input->type & IP_REL)) {
2315                 output->sib_present = true;
2316                 output->sib         = GEN_SIB(0, 4, 5);
2317                 output->bytes       = 4;
2318                 output->modrm       = GEN_MODRM(0, rfield, 4);
2319                 output->rip         = false;
2320             } else {
2321                 output->sib_present = false;
2322                 output->bytes       = (addrbits != 16 ? 4 : 2);
2323                 output->modrm       = GEN_MODRM(0, rfield, (addrbits != 16 ? 5 : 6));
2324                 output->rip         = bits == 64;
2325             }
2326         } else {
2327             /*
2328              * It's an indirection.
2329              */
2330             int i = input->indexreg, b = input->basereg, s = input->scale;
2331             int32_t seg = input->segment;
2332             int hb = input->hintbase, ht = input->hinttype;
2333             int t, it, bt;              /* register numbers */
2334             opflags_t x, ix, bx;        /* register flags */
2335
2336             if (s == 0)
2337                 i = -1;         /* make this easy, at least */
2338
2339             if (is_register(i)) {
2340                 it = nasm_regvals[i];
2341                 ix = nasm_reg_flags[i];
2342             } else {
2343                 it = -1;
2344                 ix = 0;
2345             }
2346
2347             if (is_register(b)) {
2348                 bt = nasm_regvals[b];
2349                 bx = nasm_reg_flags[b];
2350             } else {
2351                 bt = -1;
2352                 bx = 0;
2353             }
2354
2355             /* if either one are a vector register... */
2356             if ((ix|bx) & (XMMREG|YMMREG) & ~REG_EA) {
2357                 int32_t sok = BITS32 | BITS64;
2358                 int32_t o = input->offset;
2359                 int mod, scale, index, base;
2360
2361                 /*
2362                  * For a vector SIB, one has to be a vector and the other,
2363                  * if present, a GPR.  The vector must be the index operand.
2364                  */
2365                 if (it == -1 || (bx & (XMMREG|YMMREG) & ~REG_EA)) {
2366                     if (s == 0)
2367                         s = 1;
2368                     else if (s != 1)
2369                         goto err;
2370
2371                     t = bt, bt = it, it = t;
2372                     x = bx, bx = ix, ix = x;
2373                 }
2374
2375                 if (bt != -1) {
2376                     if (REG_GPR & ~bx)
2377                         goto err;
2378                     if (!(REG64 & ~bx) || !(REG32 & ~bx))
2379                         sok &= bx;
2380                     else
2381                         goto err;
2382                 }
2383
2384                 /*
2385                  * While we're here, ensure the user didn't specify
2386                  * WORD or QWORD
2387                  */
2388                 if (input->disp_size == 16 || input->disp_size == 64)
2389                     goto err;
2390
2391                 if (addrbits == 16 ||
2392                     (addrbits == 32 && !(sok & BITS32)) ||
2393                     (addrbits == 64 && !(sok & BITS64)))
2394                     goto err;
2395
2396                 output->type = (ix & YMMREG & ~REG_EA)
2397                     ? EA_YMMVSIB : EA_XMMVSIB;
2398
2399                 output->rex |= rexflags(it, ix, REX_X);
2400                 output->rex |= rexflags(bt, bx, REX_B);
2401
2402                 index = it & 7; /* it is known to be != -1 */
2403
2404                 switch (s) {
2405                 case 1:
2406                     scale = 0;
2407                     break;
2408                 case 2:
2409                     scale = 1;
2410                     break;
2411                 case 4:
2412                     scale = 2;
2413                     break;
2414                 case 8:
2415                     scale = 3;
2416                     break;
2417                 default:   /* then what the smeg is it? */
2418                     goto err;    /* panic */
2419                 }
2420
2421                 if (bt == -1) {
2422                     base = 5;
2423                     mod = 0;
2424                 } else {
2425                     base = (bt & 7);
2426                     if (base != REG_NUM_EBP && o == 0 &&
2427                         seg == NO_SEG && !forw_ref &&
2428                         !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2429                         mod = 0;
2430                     else if (input->eaflags & EAF_BYTEOFFS ||
2431                              (o >= -128 && o <= 127 &&
2432                               seg == NO_SEG && !forw_ref &&
2433                               !(input->eaflags & EAF_WORDOFFS)))
2434                         mod = 1;
2435                     else
2436                         mod = 2;
2437                 }
2438
2439                 output->sib_present = true;
2440                 output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2441                 output->modrm       = GEN_MODRM(mod, rfield, 4);
2442                 output->sib         = GEN_SIB(scale, index, base);
2443             } else if ((ix|bx) & (BITS32|BITS64)) {
2444                 /*
2445                  * it must be a 32/64-bit memory reference. Firstly we have
2446                  * to check that all registers involved are type E/Rxx.
2447                  */
2448                 int32_t sok = BITS32 | BITS64;
2449                 int32_t o = input->offset;
2450
2451                 if (it != -1) {
2452                     if (!(REG64 & ~ix) || !(REG32 & ~ix))
2453                         sok &= ix;
2454                     else
2455                         goto err;
2456                 }
2457
2458                 if (bt != -1) {
2459                     if (REG_GPR & ~bx)
2460                         goto err; /* Invalid register */
2461                     if (~sok & bx & SIZE_MASK)
2462                         goto err; /* Invalid size */
2463                     sok &= bx;
2464                 }
2465
2466                 /*
2467                  * While we're here, ensure the user didn't specify
2468                  * WORD or QWORD
2469                  */
2470                 if (input->disp_size == 16 || input->disp_size == 64)
2471                     goto err;
2472
2473                 if (addrbits == 16 ||
2474                     (addrbits == 32 && !(sok & BITS32)) ||
2475                     (addrbits == 64 && !(sok & BITS64)))
2476                     goto err;
2477
2478                 /* now reorganize base/index */
2479                 if (s == 1 && bt != it && bt != -1 && it != -1 &&
2480                     ((hb == b && ht == EAH_NOTBASE) ||
2481                      (hb == i && ht == EAH_MAKEBASE))) {
2482                     /* swap if hints say so */
2483                     t = bt, bt = it, it = t;
2484                     x = bx, bx = ix, ix = x;
2485                 }
2486                 if (bt == it)     /* convert EAX+2*EAX to 3*EAX */
2487                     bt = -1, bx = 0, s++;
2488                 if (bt == -1 && s == 1 && !(hb == it && ht == EAH_NOTBASE)) {
2489                     /* make single reg base, unless hint */
2490                     bt = it, bx = ix, it = -1, ix = 0;
2491                 }
2492                 if (((s == 2 && it != REG_NUM_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
2493                       s == 3 || s == 5 || s == 9) && bt == -1)
2494                     bt = it, bx = ix, s--; /* convert 3*EAX to EAX+2*EAX */
2495                 if (it == -1 && (bt & 7) != REG_NUM_ESP &&
2496                     (input->eaflags & EAF_TIMESTWO))
2497                     it = bt, ix = bx, bt = -1, bx = 0, s = 1;
2498                 /* convert [NOSPLIT EAX] to sib format with 0x0 displacement */
2499                 if (s == 1 && it == REG_NUM_ESP) {
2500                     /* swap ESP into base if scale is 1 */
2501                     t = it, it = bt, bt = t;
2502                     x = ix, ix = bx, bx = x;
2503                 }
2504                 if (it == REG_NUM_ESP ||
2505                     (s != 1 && s != 2 && s != 4 && s != 8 && it != -1))
2506                     goto err;        /* wrong, for various reasons */
2507
2508                 output->rex |= rexflags(it, ix, REX_X);
2509                 output->rex |= rexflags(bt, bx, REX_B);
2510
2511                 if (it == -1 && (bt & 7) != REG_NUM_ESP) {
2512                     /* no SIB needed */
2513                     int mod, rm;
2514
2515                     if (bt == -1) {
2516                         rm = 5;
2517                         mod = 0;
2518                     } else {
2519                         rm = (bt & 7);
2520                         if (rm != REG_NUM_EBP && o == 0 &&
2521                             seg == NO_SEG && !forw_ref &&
2522                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2523                             mod = 0;
2524                         else if (input->eaflags & EAF_BYTEOFFS ||
2525                                  (o >= -128 && o <= 127 &&
2526                                   seg == NO_SEG && !forw_ref &&
2527                                   !(input->eaflags & EAF_WORDOFFS)))
2528                             mod = 1;
2529                         else
2530                             mod = 2;
2531                     }
2532
2533                     output->sib_present = false;
2534                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2535                     output->modrm       = GEN_MODRM(mod, rfield, rm);
2536                 } else {
2537                     /* we need a SIB */
2538                     int mod, scale, index, base;
2539
2540                     if (it == -1)
2541                         index = 4, s = 1;
2542                     else
2543                         index = (it & 7);
2544
2545                     switch (s) {
2546                     case 1:
2547                         scale = 0;
2548                         break;
2549                     case 2:
2550                         scale = 1;
2551                         break;
2552                     case 4:
2553                         scale = 2;
2554                         break;
2555                     case 8:
2556                         scale = 3;
2557                         break;
2558                     default:   /* then what the smeg is it? */
2559                         goto err;    /* panic */
2560                     }
2561
2562                     if (bt == -1) {
2563                         base = 5;
2564                         mod = 0;
2565                     } else {
2566                         base = (bt & 7);
2567                         if (base != REG_NUM_EBP && o == 0 &&
2568                             seg == NO_SEG && !forw_ref &&
2569                             !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2570                             mod = 0;
2571                         else if (input->eaflags & EAF_BYTEOFFS ||
2572                                  (o >= -128 && o <= 127 &&
2573                                   seg == NO_SEG && !forw_ref &&
2574                                   !(input->eaflags & EAF_WORDOFFS)))
2575                             mod = 1;
2576                         else
2577                             mod = 2;
2578                     }
2579
2580                     output->sib_present = true;
2581                     output->bytes       = (bt == -1 || mod == 2 ? 4 : mod);
2582                     output->modrm       = GEN_MODRM(mod, rfield, 4);
2583                     output->sib         = GEN_SIB(scale, index, base);
2584                 }
2585             } else {            /* it's 16-bit */
2586                 int mod, rm;
2587                 int16_t o = input->offset;
2588
2589                 /* check for 64-bit long mode */
2590                 if (addrbits == 64)
2591                     goto err;
2592
2593                 /* check all registers are BX, BP, SI or DI */
2594                 if ((b != -1 && b != R_BP && b != R_BX && b != R_SI && b != R_DI) ||
2595                     (i != -1 && i != R_BP && i != R_BX && i != R_SI && i != R_DI))
2596                     goto err;
2597
2598                 /* ensure the user didn't specify DWORD/QWORD */
2599                 if (input->disp_size == 32 || input->disp_size == 64)
2600                     goto err;
2601
2602                 if (s != 1 && i != -1)
2603                     goto err;        /* no can do, in 16-bit EA */
2604                 if (b == -1 && i != -1) {
2605                     int tmp = b;
2606                     b = i;
2607                     i = tmp;
2608                 }               /* swap */
2609                 if ((b == R_SI || b == R_DI) && i != -1) {
2610                     int tmp = b;
2611                     b = i;
2612                     i = tmp;
2613                 }
2614                 /* have BX/BP as base, SI/DI index */
2615                 if (b == i)
2616                     goto err;        /* shouldn't ever happen, in theory */
2617                 if (i != -1 && b != -1 &&
2618                     (i == R_BP || i == R_BX || b == R_SI || b == R_DI))
2619                     goto err;        /* invalid combinations */
2620                 if (b == -1)            /* pure offset: handled above */
2621                     goto err;        /* so if it gets to here, panic! */
2622
2623                 rm = -1;
2624                 if (i != -1)
2625                     switch (i * 256 + b) {
2626                     case R_SI * 256 + R_BX:
2627                         rm = 0;
2628                         break;
2629                     case R_DI * 256 + R_BX:
2630                         rm = 1;
2631                         break;
2632                     case R_SI * 256 + R_BP:
2633                         rm = 2;
2634                         break;
2635                     case R_DI * 256 + R_BP:
2636                         rm = 3;
2637                         break;
2638                 } else
2639                     switch (b) {
2640                     case R_SI:
2641                         rm = 4;
2642                         break;
2643                     case R_DI:
2644                         rm = 5;
2645                         break;
2646                     case R_BP:
2647                         rm = 6;
2648                         break;
2649                     case R_BX:
2650                         rm = 7;
2651                         break;
2652                     }
2653                 if (rm == -1)           /* can't happen, in theory */
2654                     goto err;        /* so panic if it does */
2655
2656                 if (o == 0 && seg == NO_SEG && !forw_ref && rm != 6 &&
2657                     !(input->eaflags & (EAF_BYTEOFFS | EAF_WORDOFFS)))
2658                     mod = 0;
2659                 else if (input->eaflags & EAF_BYTEOFFS ||
2660                          (o >= -128 && o <= 127 && seg == NO_SEG &&
2661                           !forw_ref && !(input->eaflags & EAF_WORDOFFS)))
2662                     mod = 1;
2663                 else
2664                     mod = 2;
2665
2666                 output->sib_present = false;    /* no SIB - it's 16-bit */
2667                 output->bytes       = mod;      /* bytes of offset needed */
2668                 output->modrm       = GEN_MODRM(mod, rfield, rm);
2669             }
2670         }
2671     }
2672
2673     output->size = 1 + output->sib_present + output->bytes;
2674     return output->type;
2675
2676 err:
2677     return output->type = EA_INVALID;
2678 }
2679
2680 static void add_asp(insn *ins, int addrbits)
2681 {
2682     int j, valid;
2683     int defdisp;
2684
2685     valid = (addrbits == 64) ? 64|32 : 32|16;
2686
2687     switch (ins->prefixes[PPS_ASIZE]) {
2688     case P_A16:
2689         valid &= 16;
2690         break;
2691     case P_A32:
2692         valid &= 32;
2693         break;
2694     case P_A64:
2695         valid &= 64;
2696         break;
2697     case P_ASP:
2698         valid &= (addrbits == 32) ? 16 : 32;
2699         break;
2700     default:
2701         break;
2702     }
2703
2704     for (j = 0; j < ins->operands; j++) {
2705         if (is_class(MEMORY, ins->oprs[j].type)) {
2706             opflags_t i, b;
2707
2708             /* Verify as Register */
2709             if (!is_register(ins->oprs[j].indexreg))
2710                 i = 0;
2711             else
2712                 i = nasm_reg_flags[ins->oprs[j].indexreg];
2713
2714             /* Verify as Register */
2715             if (!is_register(ins->oprs[j].basereg))
2716                 b = 0;
2717             else
2718                 b = nasm_reg_flags[ins->oprs[j].basereg];
2719
2720             if (ins->oprs[j].scale == 0)
2721                 i = 0;
2722
2723             if (!i && !b) {
2724                 int ds = ins->oprs[j].disp_size;
2725                 if ((addrbits != 64 && ds > 8) ||
2726                     (addrbits == 64 && ds == 16))
2727                     valid &= ds;
2728             } else {
2729                 if (!(REG16 & ~b))
2730                     valid &= 16;
2731                 if (!(REG32 & ~b))
2732                     valid &= 32;
2733                 if (!(REG64 & ~b))
2734                     valid &= 64;
2735
2736                 if (!(REG16 & ~i))
2737                     valid &= 16;
2738                 if (!(REG32 & ~i))
2739                     valid &= 32;
2740                 if (!(REG64 & ~i))
2741                     valid &= 64;
2742             }
2743         }
2744     }
2745
2746     if (valid & addrbits) {
2747         ins->addr_size = addrbits;
2748     } else if (valid & ((addrbits == 32) ? 16 : 32)) {
2749         /* Add an address size prefix */
2750         ins->prefixes[PPS_ASIZE] = (addrbits == 32) ? P_A16 : P_A32;;
2751         ins->addr_size = (addrbits == 32) ? 16 : 32;
2752     } else {
2753         /* Impossible... */
2754         errfunc(ERR_NONFATAL, "impossible combination of address sizes");
2755         ins->addr_size = addrbits; /* Error recovery */
2756     }
2757
2758     defdisp = ins->addr_size == 16 ? 16 : 32;
2759
2760     for (j = 0; j < ins->operands; j++) {
2761         if (!(MEM_OFFS & ~ins->oprs[j].type) &&
2762             (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) != ins->addr_size) {
2763             /*
2764              * mem_offs sizes must match the address size; if not,
2765              * strip the MEM_OFFS bit and match only EA instructions
2766              */
2767             ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
2768         }
2769     }
2770 }